From b464d1270a8016edcf1fd20d77cefdecf9b0b73e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 May 2016 14:32:04 +0200 Subject: locking/barriers, tile: Provide TILE specific smp_acquire__after_ctrl_dep() Since TILE doesn't do read speculation, its control dependencies also guarantee LOAD->LOAD order and we don't need the additional RMB otherwise required to provide ACQUIRE semantics. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Chris Metcalf Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/tile/include/asm/barrier.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/tile/include') diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h index d55222806c2f..4c419ab95ab7 100644 --- a/arch/tile/include/asm/barrier.h +++ b/arch/tile/include/asm/barrier.h @@ -87,6 +87,13 @@ mb_incoherent(void) #define __smp_mb__after_atomic() __smp_mb() #endif +/* + * The TILE architecture does not do speculative reads; this ensures + * that a control dependency also orders against loads and already provides + * a LOAD->{LOAD,STORE} order and can forgo the additional RMB. + */ +#define smp_acquire__after_ctrl_dep() barrier() + #include #endif /* !__ASSEMBLY__ */ -- cgit v1.2.3 From 1af5de9af138941fb8638cf126293b16f3387de4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 01:16:03 +0200 Subject: locking/atomic, arch/tile: Implement atomic{,64}_fetch_{add,sub,and,or,xor}() Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). Signed-off-by: Peter Zijlstra (Intel) Acked-by: Chris Metcalf Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/tile/include/asm/atomic.h | 4 ++ arch/tile/include/asm/atomic_32.h | 60 ++++++++++++++------ arch/tile/include/asm/atomic_64.h | 115 +++++++++++++++++++++++++------------- arch/tile/include/asm/bitops_32.h | 18 +++--- 4 files changed, 131 insertions(+), 66 deletions(-) (limited to 'arch/tile/include') diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h index 9fc0107a9c5e..9807030557c4 100644 --- a/arch/tile/include/asm/atomic.h +++ b/arch/tile/include/asm/atomic.h @@ -46,6 +46,10 @@ static inline int atomic_read(const atomic_t *v) */ #define atomic_sub_return(i, v) atomic_add_return((int)(-(i)), (v)) +#define atomic_fetch_sub(i, v) atomic_fetch_add(-(int)(i), (v)) + +#define atomic_fetch_or atomic_fetch_or + /** * atomic_sub - subtract integer from atomic variable * @i: integer value to subtract diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index d320ce253d86..da8eb4ed3752 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h @@ -34,18 +34,29 @@ static inline void atomic_add(int i, atomic_t *v) _atomic_xchg_add(&v->counter, i); } -#define ATOMIC_OP(op) \ -unsigned long _atomic_##op(volatile unsigned long *p, unsigned long mask); \ +#define ATOMIC_OPS(op) \ +unsigned long _atomic_fetch_##op(volatile unsigned long *p, unsigned long mask); \ static inline void atomic_##op(int i, atomic_t *v) \ { \ - _atomic_##op((unsigned long *)&v->counter, i); \ + _atomic_fetch_##op((unsigned long *)&v->counter, i); \ +} \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + smp_mb(); \ + return _atomic_fetch_##op((unsigned long *)&v->counter, i); \ } -ATOMIC_OP(and) -ATOMIC_OP(or) -ATOMIC_OP(xor) +ATOMIC_OPS(and) +ATOMIC_OPS(or) +ATOMIC_OPS(xor) + +#undef ATOMIC_OPS -#undef ATOMIC_OP +static inline int atomic_fetch_add(int i, atomic_t *v) +{ + smp_mb(); + return _atomic_xchg_add(&v->counter, i); +} /** * atomic_add_return - add integer and return @@ -126,17 +137,30 @@ static inline void atomic64_add(long long i, atomic64_t *v) _atomic64_xchg_add(&v->counter, i); } -#define ATOMIC64_OP(op) \ -long long _atomic64_##op(long long *v, long long n); \ +#define ATOMIC64_OPS(op) \ +long long _atomic64_fetch_##op(long long *v, long long n); \ +static inline void atomic64_##op(long long i, atomic64_t *v) \ +{ \ + _atomic64_fetch_##op(&v->counter, i); \ +} \ static inline void atomic64_##op(long long i, atomic64_t *v) \ { \ - _atomic64_##op(&v->counter, i); \ + smp_mb(); \ + return _atomic64_fetch_##op(&v->counter, i); \ } ATOMIC64_OP(and) ATOMIC64_OP(or) ATOMIC64_OP(xor) +#undef ATOMIC64_OPS + +static inline long long atomic64_fetch_add(long long i, atomic64_t *v) +{ + smp_mb(); + return _atomic64_xchg_add(&v->counter, i); +} + /** * atomic64_add_return - add integer and return * @v: pointer of type atomic64_t @@ -186,6 +210,7 @@ static inline void atomic64_set(atomic64_t *v, long long n) #define atomic64_inc_return(v) atomic64_add_return(1LL, (v)) #define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) #define atomic64_sub_return(i, v) atomic64_add_return(-(i), (v)) +#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) #define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0) #define atomic64_sub(i, v) atomic64_add(-(i), (v)) #define atomic64_dec(v) atomic64_sub(1LL, (v)) @@ -193,7 +218,6 @@ static inline void atomic64_set(atomic64_t *v, long long n) #define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL) - #endif /* !__ASSEMBLY__ */ /* @@ -248,10 +272,10 @@ extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n); extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n); extern struct __get_user __atomic_xchg_add_unless(volatile int *p, int *lock, int o, int n); -extern struct __get_user __atomic_or(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_and(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n); +extern struct __get_user __atomic_fetch_or(volatile int *p, int *lock, int n); +extern struct __get_user __atomic_fetch_and(volatile int *p, int *lock, int n); +extern struct __get_user __atomic_fetch_andn(volatile int *p, int *lock, int n); +extern struct __get_user __atomic_fetch_xor(volatile int *p, int *lock, int n); extern long long __atomic64_cmpxchg(volatile long long *p, int *lock, long long o, long long n); extern long long __atomic64_xchg(volatile long long *p, int *lock, long long n); @@ -259,9 +283,9 @@ extern long long __atomic64_xchg_add(volatile long long *p, int *lock, long long n); extern long long __atomic64_xchg_add_unless(volatile long long *p, int *lock, long long o, long long n); -extern long long __atomic64_and(volatile long long *p, int *lock, long long n); -extern long long __atomic64_or(volatile long long *p, int *lock, long long n); -extern long long __atomic64_xor(volatile long long *p, int *lock, long long n); +extern long long __atomic64_fetch_and(volatile long long *p, int *lock, long long n); +extern long long __atomic64_fetch_or(volatile long long *p, int *lock, long long n); +extern long long __atomic64_fetch_xor(volatile long long *p, int *lock, long long n); /* Return failure from the atomic wrappers. */ struct __get_user __atomic_bad_address(int __user *addr); diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h index b0531a623653..4cefa0c9fd81 100644 --- a/arch/tile/include/asm/atomic_64.h +++ b/arch/tile/include/asm/atomic_64.h @@ -32,11 +32,6 @@ * on any routine which updates memory and returns a value. */ -static inline void atomic_add(int i, atomic_t *v) -{ - __insn_fetchadd4((void *)&v->counter, i); -} - /* * Note a subtlety of the locking here. We are required to provide a * full memory barrier before and after the operation. However, we @@ -59,28 +54,39 @@ static inline int atomic_add_return(int i, atomic_t *v) return val; } -static inline int __atomic_add_unless(atomic_t *v, int a, int u) +#define ATOMIC_OPS(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int val; \ + smp_mb(); \ + val = __insn_fetch##op##4((void *)&v->counter, i); \ + smp_mb(); \ + return val; \ +} \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + __insn_fetch##op##4((void *)&v->counter, i); \ +} + +ATOMIC_OPS(add) +ATOMIC_OPS(and) +ATOMIC_OPS(or) + +#undef ATOMIC_OPS + +static inline int atomic_fetch_xor(int i, atomic_t *v) { int guess, oldval = v->counter; + smp_mb(); do { - if (oldval == u) - break; guess = oldval; - oldval = cmpxchg(&v->counter, guess, guess + a); + __insn_mtspr(SPR_CMPEXCH_VALUE, guess); + oldval = __insn_cmpexch4(&v->counter, guess ^ i); } while (guess != oldval); + smp_mb(); return oldval; } -static inline void atomic_and(int i, atomic_t *v) -{ - __insn_fetchand4((void *)&v->counter, i); -} - -static inline void atomic_or(int i, atomic_t *v) -{ - __insn_fetchor4((void *)&v->counter, i); -} - static inline void atomic_xor(int i, atomic_t *v) { int guess, oldval = v->counter; @@ -91,6 +97,18 @@ static inline void atomic_xor(int i, atomic_t *v) } while (guess != oldval); } +static inline int __atomic_add_unless(atomic_t *v, int a, int u) +{ + int guess, oldval = v->counter; + do { + if (oldval == u) + break; + guess = oldval; + oldval = cmpxchg(&v->counter, guess, guess + a); + } while (guess != oldval); + return oldval; +} + /* Now the true 64-bit operations. */ #define ATOMIC64_INIT(i) { (i) } @@ -98,11 +116,6 @@ static inline void atomic_xor(int i, atomic_t *v) #define atomic64_read(v) READ_ONCE((v)->counter) #define atomic64_set(v, i) WRITE_ONCE((v)->counter, (i)) -static inline void atomic64_add(long i, atomic64_t *v) -{ - __insn_fetchadd((void *)&v->counter, i); -} - static inline long atomic64_add_return(long i, atomic64_t *v) { int val; @@ -112,26 +125,37 @@ static inline long atomic64_add_return(long i, atomic64_t *v) return val; } -static inline long atomic64_add_unless(atomic64_t *v, long a, long u) +#define ATOMIC64_OPS(op) \ +static inline long atomic64_fetch_##op(long i, atomic64_t *v) \ +{ \ + long val; \ + smp_mb(); \ + val = __insn_fetch##op((void *)&v->counter, i); \ + smp_mb(); \ + return val; \ +} \ +static inline void atomic64_##op(long i, atomic64_t *v) \ +{ \ + __insn_fetch##op((void *)&v->counter, i); \ +} + +ATOMIC64_OPS(add) +ATOMIC64_OPS(and) +ATOMIC64_OPS(or) + +#undef ATOMIC64_OPS + +static inline long atomic64_fetch_xor(long i, atomic64_t *v) { long guess, oldval = v->counter; + smp_mb(); do { - if (oldval == u) - break; guess = oldval; - oldval = cmpxchg(&v->counter, guess, guess + a); + __insn_mtspr(SPR_CMPEXCH_VALUE, guess); + oldval = __insn_cmpexch(&v->counter, guess ^ i); } while (guess != oldval); - return oldval != u; -} - -static inline void atomic64_and(long i, atomic64_t *v) -{ - __insn_fetchand((void *)&v->counter, i); -} - -static inline void atomic64_or(long i, atomic64_t *v) -{ - __insn_fetchor((void *)&v->counter, i); + smp_mb(); + return oldval; } static inline void atomic64_xor(long i, atomic64_t *v) @@ -144,7 +168,20 @@ static inline void atomic64_xor(long i, atomic64_t *v) } while (guess != oldval); } +static inline long atomic64_add_unless(atomic64_t *v, long a, long u) +{ + long guess, oldval = v->counter; + do { + if (oldval == u) + break; + guess = oldval; + oldval = cmpxchg(&v->counter, guess, guess + a); + } while (guess != oldval); + return oldval != u; +} + #define atomic64_sub_return(i, v) atomic64_add_return(-(i), (v)) +#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) #define atomic64_sub(i, v) atomic64_add(-(i), (v)) #define atomic64_inc_return(v) atomic64_add_return(1, (v)) #define atomic64_dec_return(v) atomic64_sub_return(1, (v)) diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h index bbf7b666f21d..d1406a95f6b7 100644 --- a/arch/tile/include/asm/bitops_32.h +++ b/arch/tile/include/asm/bitops_32.h @@ -19,9 +19,9 @@ #include /* Tile-specific routines to support . */ -unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask); -unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask); -unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask); +unsigned long _atomic_fetch_or(volatile unsigned long *p, unsigned long mask); +unsigned long _atomic_fetch_andn(volatile unsigned long *p, unsigned long mask); +unsigned long _atomic_fetch_xor(volatile unsigned long *p, unsigned long mask); /** * set_bit - Atomically set a bit in memory @@ -35,7 +35,7 @@ unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask); */ static inline void set_bit(unsigned nr, volatile unsigned long *addr) { - _atomic_or(addr + BIT_WORD(nr), BIT_MASK(nr)); + _atomic_fetch_or(addr + BIT_WORD(nr), BIT_MASK(nr)); } /** @@ -54,7 +54,7 @@ static inline void set_bit(unsigned nr, volatile unsigned long *addr) */ static inline void clear_bit(unsigned nr, volatile unsigned long *addr) { - _atomic_andn(addr + BIT_WORD(nr), BIT_MASK(nr)); + _atomic_fetch_andn(addr + BIT_WORD(nr), BIT_MASK(nr)); } /** @@ -69,7 +69,7 @@ static inline void clear_bit(unsigned nr, volatile unsigned long *addr) */ static inline void change_bit(unsigned nr, volatile unsigned long *addr) { - _atomic_xor(addr + BIT_WORD(nr), BIT_MASK(nr)); + _atomic_fetch_xor(addr + BIT_WORD(nr), BIT_MASK(nr)); } /** @@ -85,7 +85,7 @@ static inline int test_and_set_bit(unsigned nr, volatile unsigned long *addr) unsigned long mask = BIT_MASK(nr); addr += BIT_WORD(nr); smp_mb(); /* barrier for proper semantics */ - return (_atomic_or(addr, mask) & mask) != 0; + return (_atomic_fetch_or(addr, mask) & mask) != 0; } /** @@ -101,7 +101,7 @@ static inline int test_and_clear_bit(unsigned nr, volatile unsigned long *addr) unsigned long mask = BIT_MASK(nr); addr += BIT_WORD(nr); smp_mb(); /* barrier for proper semantics */ - return (_atomic_andn(addr, mask) & mask) != 0; + return (_atomic_fetch_andn(addr, mask) & mask) != 0; } /** @@ -118,7 +118,7 @@ static inline int test_and_change_bit(unsigned nr, unsigned long mask = BIT_MASK(nr); addr += BIT_WORD(nr); smp_mb(); /* barrier for proper semantics */ - return (_atomic_xor(addr, mask) & mask) != 0; + return (_atomic_fetch_xor(addr, mask) & mask) != 0; } #include -- cgit v1.2.3 From b53d6bedbe781974097fd8c38263f6cc78ff9ea7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 18 Apr 2016 00:58:25 +0200 Subject: locking/atomic: Remove linux/atomic.h:atomic_fetch_or() Since all architectures have this implemented now natively, remove this dead code. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/tile/include/asm/atomic.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/tile/include') diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h index 9807030557c4..8dda3c8ff5ab 100644 --- a/arch/tile/include/asm/atomic.h +++ b/arch/tile/include/asm/atomic.h @@ -48,8 +48,6 @@ static inline int atomic_read(const atomic_t *v) #define atomic_fetch_sub(i, v) atomic_fetch_add(-(int)(i), (v)) -#define atomic_fetch_or atomic_fetch_or - /** * atomic_sub - subtract integer from atomic variable * @i: integer value to subtract -- cgit v1.2.3 From b7271b9f3e18181559b96a610f4e42bdb04b07f5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 22 Jun 2016 11:16:49 +0200 Subject: locking/atomic, arch/tile: Fix tilepro build The tilepro change wasn't ever compiled it seems (the 0day built bot also doesn't have a toolchain for it). Make it work. The thing that makes the patch bigger than desired is namespace collision with the C11 __atomic builtin functions. So rename the tilepro functions to __atomic32. Reported-by: Sudip Mukherjee Signed-off-by: Peter Zijlstra (Intel) Acked-by: Chris Metcalf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephen Rothwell Cc: Thomas Gleixner Fixes: 1af5de9af138 ("locking/atomic, arch/tile: Implement atomic{,64}_fetch_{add,sub,and,or,xor}()") Link: http://lkml.kernel.org/r/20160622091649.GB30154@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/tile/include/asm/atomic_32.h | 24 ++++++++++++------------ arch/tile/include/asm/futex.h | 14 +++++++------- 2 files changed, 19 insertions(+), 19 deletions(-) (limited to 'arch/tile/include') diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index da8eb4ed3752..a93774255136 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h @@ -143,15 +143,15 @@ static inline void atomic64_##op(long long i, atomic64_t *v) \ { \ _atomic64_fetch_##op(&v->counter, i); \ } \ -static inline void atomic64_##op(long long i, atomic64_t *v) \ +static inline long long atomic64_fetch_##op(long long i, atomic64_t *v) \ { \ smp_mb(); \ return _atomic64_fetch_##op(&v->counter, i); \ } -ATOMIC64_OP(and) -ATOMIC64_OP(or) -ATOMIC64_OP(xor) +ATOMIC64_OPS(and) +ATOMIC64_OPS(or) +ATOMIC64_OPS(xor) #undef ATOMIC64_OPS @@ -266,16 +266,16 @@ struct __get_user { unsigned long val; int err; }; -extern struct __get_user __atomic_cmpxchg(volatile int *p, +extern struct __get_user __atomic32_cmpxchg(volatile int *p, int *lock, int o, int n); -extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_xchg_add_unless(volatile int *p, +extern struct __get_user __atomic32_xchg(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_xchg_add(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_xchg_add_unless(volatile int *p, int *lock, int o, int n); -extern struct __get_user __atomic_fetch_or(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_fetch_and(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_fetch_andn(volatile int *p, int *lock, int n); -extern struct __get_user __atomic_fetch_xor(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_fetch_or(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_fetch_and(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_fetch_andn(volatile int *p, int *lock, int n); +extern struct __get_user __atomic32_fetch_xor(volatile int *p, int *lock, int n); extern long long __atomic64_cmpxchg(volatile long long *p, int *lock, long long o, long long n); extern long long __atomic64_xchg(volatile long long *p, int *lock, long long n); diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h index 1a6ef1b69cb1..e64a1b75fc38 100644 --- a/arch/tile/include/asm/futex.h +++ b/arch/tile/include/asm/futex.h @@ -80,16 +80,16 @@ ret = gu.err; \ } -#define __futex_set() __futex_call(__atomic_xchg) -#define __futex_add() __futex_call(__atomic_xchg_add) -#define __futex_or() __futex_call(__atomic_or) -#define __futex_andn() __futex_call(__atomic_andn) -#define __futex_xor() __futex_call(__atomic_xor) +#define __futex_set() __futex_call(__atomic32_xchg) +#define __futex_add() __futex_call(__atomic32_xchg_add) +#define __futex_or() __futex_call(__atomic32_fetch_or) +#define __futex_andn() __futex_call(__atomic32_fetch_andn) +#define __futex_xor() __futex_call(__atomic32_fetch_xor) #define __futex_cmpxchg() \ { \ - struct __get_user gu = __atomic_cmpxchg((u32 __force *)uaddr, \ - lock, oldval, oparg); \ + struct __get_user gu = __atomic32_cmpxchg((u32 __force *)uaddr, \ + lock, oldval, oparg); \ val = gu.val; \ ret = gu.err; \ } -- cgit v1.2.3