From 8bd9cb51daac89337295b6f037b0486911e1b408 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 19 Jun 2018 13:53:08 +0100 Subject: locking/atomics, asm-generic: Move some macros from to a new file In preparation for implementing the asm-generic atomic bitops in terms of atomic_long_*(), we need to prevent implementations from pulling in . A common reason for this include is for the BITS_PER_BYTE definition, so move this and some other BIT() and masking macros into a new header file, . Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arm-kernel@lists.infradead.org Cc: yamada.masahiro@socionext.com Link: https://lore.kernel.org/lkml/1529412794-17720-4-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- include/linux/bitops.h | 22 +--------------------- include/linux/bits.h | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 21 deletions(-) create mode 100644 include/linux/bits.h (limited to 'include') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 4cac4e1a72ff..af419012d77d 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -2,29 +2,9 @@ #ifndef _LINUX_BITOPS_H #define _LINUX_BITOPS_H #include +#include -#ifdef __KERNEL__ -#define BIT(nr) (1UL << (nr)) -#define BIT_ULL(nr) (1ULL << (nr)) -#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) -#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) -#define BIT_ULL_MASK(nr) (1ULL << ((nr) % BITS_PER_LONG_LONG)) -#define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG) -#define BITS_PER_BYTE 8 #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) -#endif - -/* - * Create a contiguous bitmask starting at bit position @l and ending at - * position @h. For example - * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. - */ -#define GENMASK(h, l) \ - (((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) - -#define GENMASK_ULL(h, l) \ - (((~0ULL) - (1ULL << (l)) + 1) & \ - (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h)))) extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); diff --git a/include/linux/bits.h b/include/linux/bits.h new file mode 100644 index 000000000000..2b7b532c1d51 --- /dev/null +++ b/include/linux/bits.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_BITS_H +#define __LINUX_BITS_H +#include + +#define BIT(nr) (1UL << (nr)) +#define BIT_ULL(nr) (1ULL << (nr)) +#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) +#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) +#define BIT_ULL_MASK(nr) (1ULL << ((nr) % BITS_PER_LONG_LONG)) +#define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG) +#define BITS_PER_BYTE 8 + +/* + * Create a contiguous bitmask starting at bit position @l and ending at + * position @h. For example + * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. + */ +#define GENMASK(h, l) \ + (((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) + +#define GENMASK_ULL(h, l) \ + (((~0ULL) - (1ULL << (l)) + 1) & \ + (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h)))) + +#endif /* __LINUX_BITS_H */ -- cgit v1.2.3 From e986a0d6cb36b54838b2f5f8ac2bb5f9baadd977 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 19 Jun 2018 13:53:11 +0100 Subject: locking/atomics, asm-generic/bitops/atomic.h: Rewrite using atomic_*() APIs The atomic bitops can actually be implemented pretty efficiently using the atomic_*() ops, rather than explicit use of spinlocks. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arm-kernel@lists.infradead.org Cc: yamada.masahiro@socionext.com Link: https://lore.kernel.org/lkml/1529412794-17720-7-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/bitops/atomic.h | 188 +++++++----------------------------- 1 file changed, 33 insertions(+), 155 deletions(-) (limited to 'include') diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h index 04deffaf5f7d..dd90c9792909 100644 --- a/include/asm-generic/bitops/atomic.h +++ b/include/asm-generic/bitops/atomic.h @@ -2,189 +2,67 @@ #ifndef _ASM_GENERIC_BITOPS_ATOMIC_H_ #define _ASM_GENERIC_BITOPS_ATOMIC_H_ -#include -#include - -#ifdef CONFIG_SMP -#include -#include /* we use L1_CACHE_BYTES */ - -/* Use an array of spinlocks for our atomic_ts. - * Hash function to index into a different SPINLOCK. - * Since "a" is usually an address, use one spinlock per cacheline. - */ -# define ATOMIC_HASH_SIZE 4 -# define ATOMIC_HASH(a) (&(__atomic_hash[ (((unsigned long) a)/L1_CACHE_BYTES) & (ATOMIC_HASH_SIZE-1) ])) - -extern arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned; - -/* Can't use raw_spin_lock_irq because of #include problems, so - * this is the substitute */ -#define _atomic_spin_lock_irqsave(l,f) do { \ - arch_spinlock_t *s = ATOMIC_HASH(l); \ - local_irq_save(f); \ - arch_spin_lock(s); \ -} while(0) - -#define _atomic_spin_unlock_irqrestore(l,f) do { \ - arch_spinlock_t *s = ATOMIC_HASH(l); \ - arch_spin_unlock(s); \ - local_irq_restore(f); \ -} while(0) - - -#else -# define _atomic_spin_lock_irqsave(l,f) do { local_irq_save(f); } while (0) -# define _atomic_spin_unlock_irqrestore(l,f) do { local_irq_restore(f); } while (0) -#endif +#include +#include +#include /* - * NMI events can occur at any time, including when interrupts have been - * disabled by *_irqsave(). So you can get NMI events occurring while a - * *_bit function is holding a spin lock. If the NMI handler also wants - * to do bit manipulation (and they do) then you can get a deadlock - * between the original caller of *_bit() and the NMI handler. - * - * by Keith Owens + * Implementation of atomic bitops using atomic-fetch ops. + * See Documentation/atomic_bitops.txt for details. */ -/** - * set_bit - Atomically set a bit in memory - * @nr: the bit to set - * @addr: the address to start counting from - * - * This function is atomic and may not be reordered. See __set_bit() - * if you do not require the atomic guarantees. - * - * Note: there are no guarantees that this function will not be reordered - * on non x86 architectures, so if you are writing portable code, - * make sure not to rely on its reordering guarantees. - * - * Note that @nr may be almost arbitrarily large; this function is not - * restricted to acting on a single-word quantity. - */ -static inline void set_bit(int nr, volatile unsigned long *addr) +static inline void set_bit(unsigned int nr, volatile unsigned long *p) { - unsigned long mask = BIT_MASK(nr); - unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); - unsigned long flags; - - _atomic_spin_lock_irqsave(p, flags); - *p |= mask; - _atomic_spin_unlock_irqrestore(p, flags); + p += BIT_WORD(nr); + atomic_long_or(BIT_MASK(nr), (atomic_long_t *)p); } -/** - * clear_bit - Clears a bit in memory - * @nr: Bit to clear - * @addr: Address to start counting from - * - * clear_bit() is atomic and may not be reordered. However, it does - * not contain a memory barrier, so if it is used for locking purposes, - * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic() - * in order to ensure changes are visible on other processors. - */ -static inline void clear_bit(int nr, volatile unsigned long *addr) +static inline void clear_bit(unsigned int nr, volatile unsigned long *p) { - unsigned long mask = BIT_MASK(nr); - unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); - unsigned long flags; - - _atomic_spin_lock_irqsave(p, flags); - *p &= ~mask; - _atomic_spin_unlock_irqrestore(p, flags); + p += BIT_WORD(nr); + atomic_long_andnot(BIT_MASK(nr), (atomic_long_t *)p); } -/** - * change_bit - Toggle a bit in memory - * @nr: Bit to change - * @addr: Address to start counting from - * - * change_bit() is atomic and may not be reordered. It may be - * reordered on other architectures than x86. - * Note that @nr may be almost arbitrarily large; this function is not - * restricted to acting on a single-word quantity. - */ -static inline void change_bit(int nr, volatile unsigned long *addr) +static inline void change_bit(unsigned int nr, volatile unsigned long *p) { - unsigned long mask = BIT_MASK(nr); - unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); - unsigned long flags; - - _atomic_spin_lock_irqsave(p, flags); - *p ^= mask; - _atomic_spin_unlock_irqrestore(p, flags); + p += BIT_WORD(nr); + atomic_long_xor(BIT_MASK(nr), (atomic_long_t *)p); } -/** - * test_and_set_bit - Set a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It may be reordered on other architectures than x86. - * It also implies a memory barrier. - */ -static inline int test_and_set_bit(int nr, volatile unsigned long *addr) +static inline int test_and_set_bit(unsigned int nr, volatile unsigned long *p) { + long old; unsigned long mask = BIT_MASK(nr); - unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); - unsigned long old; - unsigned long flags; - _atomic_spin_lock_irqsave(p, flags); - old = *p; - *p = old | mask; - _atomic_spin_unlock_irqrestore(p, flags); + p += BIT_WORD(nr); + if (READ_ONCE(*p) & mask) + return 1; - return (old & mask) != 0; + old = atomic_long_fetch_or(mask, (atomic_long_t *)p); + return !!(old & mask); } -/** - * test_and_clear_bit - Clear a bit and return its old value - * @nr: Bit to clear - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It can be reorderdered on other architectures other than x86. - * It also implies a memory barrier. - */ -static inline int test_and_clear_bit(int nr, volatile unsigned long *addr) +static inline int test_and_clear_bit(unsigned int nr, volatile unsigned long *p) { + long old; unsigned long mask = BIT_MASK(nr); - unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); - unsigned long old; - unsigned long flags; - _atomic_spin_lock_irqsave(p, flags); - old = *p; - *p = old & ~mask; - _atomic_spin_unlock_irqrestore(p, flags); + p += BIT_WORD(nr); + if (!(READ_ONCE(*p) & mask)) + return 0; - return (old & mask) != 0; + old = atomic_long_fetch_andnot(mask, (atomic_long_t *)p); + return !!(old & mask); } -/** - * test_and_change_bit - Change a bit and return its old value - * @nr: Bit to change - * @addr: Address to count from - * - * This operation is atomic and cannot be reordered. - * It also implies a memory barrier. - */ -static inline int test_and_change_bit(int nr, volatile unsigned long *addr) +static inline int test_and_change_bit(unsigned int nr, volatile unsigned long *p) { + long old; unsigned long mask = BIT_MASK(nr); - unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); - unsigned long old; - unsigned long flags; - - _atomic_spin_lock_irqsave(p, flags); - old = *p; - *p = old ^ mask; - _atomic_spin_unlock_irqrestore(p, flags); - return (old & mask) != 0; + p += BIT_WORD(nr); + old = atomic_long_fetch_xor(mask, (atomic_long_t *)p); + return !!(old & mask); } #endif /* _ASM_GENERIC_BITOPS_ATOMIC_H */ -- cgit v1.2.3 From 84c6591103dbeaf393a092a3fc7b09510825f6b9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 19 Jun 2018 13:53:12 +0100 Subject: locking/atomics, asm-generic/bitops/lock.h: Rewrite using atomic_fetch_*() The lock bitops can be implemented more efficiently using the atomic_fetch_*() ops, which provide finer-grained control over the memory ordering semantics than the bitops. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arm-kernel@lists.infradead.org Cc: yamada.masahiro@socionext.com Link: https://lore.kernel.org/lkml/1529412794-17720-8-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/bitops/lock.h | 68 ++++++++++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/asm-generic/bitops/lock.h b/include/asm-generic/bitops/lock.h index 67ab280ad134..3ae021368f48 100644 --- a/include/asm-generic/bitops/lock.h +++ b/include/asm-generic/bitops/lock.h @@ -2,6 +2,10 @@ #ifndef _ASM_GENERIC_BITOPS_LOCK_H_ #define _ASM_GENERIC_BITOPS_LOCK_H_ +#include +#include +#include + /** * test_and_set_bit_lock - Set a bit and return its old value, for lock * @nr: Bit to set @@ -11,7 +15,20 @@ * the returned value is 0. * It can be used to implement bit locks. */ -#define test_and_set_bit_lock(nr, addr) test_and_set_bit(nr, addr) +static inline int test_and_set_bit_lock(unsigned int nr, + volatile unsigned long *p) +{ + long old; + unsigned long mask = BIT_MASK(nr); + + p += BIT_WORD(nr); + if (READ_ONCE(*p) & mask) + return 1; + + old = atomic_long_fetch_or_acquire(mask, (atomic_long_t *)p); + return !!(old & mask); +} + /** * clear_bit_unlock - Clear a bit in memory, for unlock @@ -20,11 +37,11 @@ * * This operation is atomic and provides release barrier semantics. */ -#define clear_bit_unlock(nr, addr) \ -do { \ - smp_mb__before_atomic(); \ - clear_bit(nr, addr); \ -} while (0) +static inline void clear_bit_unlock(unsigned int nr, volatile unsigned long *p) +{ + p += BIT_WORD(nr); + atomic_long_fetch_andnot_release(BIT_MASK(nr), (atomic_long_t *)p); +} /** * __clear_bit_unlock - Clear a bit in memory, for unlock @@ -37,11 +54,38 @@ do { \ * * See for example x86's implementation. */ -#define __clear_bit_unlock(nr, addr) \ -do { \ - smp_mb__before_atomic(); \ - clear_bit(nr, addr); \ -} while (0) +static inline void __clear_bit_unlock(unsigned int nr, + volatile unsigned long *p) +{ + unsigned long old; -#endif /* _ASM_GENERIC_BITOPS_LOCK_H_ */ + p += BIT_WORD(nr); + old = READ_ONCE(*p); + old &= ~BIT_MASK(nr); + atomic_long_set_release((atomic_long_t *)p, old); +} + +/** + * clear_bit_unlock_is_negative_byte - Clear a bit in memory and test if bottom + * byte is negative, for unlock. + * @nr: the bit to clear + * @addr: the address to start counting from + * + * This is a bit of a one-trick-pony for the filemap code, which clears + * PG_locked and tests PG_waiters, + */ +#ifndef clear_bit_unlock_is_negative_byte +static inline bool clear_bit_unlock_is_negative_byte(unsigned int nr, + volatile unsigned long *p) +{ + long old; + unsigned long mask = BIT_MASK(nr); + + p += BIT_WORD(nr); + old = atomic_long_fetch_andnot_release(mask, (atomic_long_t *)p); + return !!(old & BIT(7)); +} +#define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte +#endif +#endif /* _ASM_GENERIC_BITOPS_LOCK_H_ */ -- cgit v1.2.3 From bfc18e389c7a09fbbbed6bf4032396685b14246e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:04 +0100 Subject: atomics/treewide: Rename __atomic_add_unless() => atomic_fetch_add_unless() While __atomic_add_unless() was originally intended as a building-block for atomic_add_unless(), it's now used in a number of places around the kernel. It's the only common atomic operation named __atomic*(), rather than atomic_*(), and for consistency it would be better named atomic_fetch_add_unless(). This lack of consistency is slightly confusing, and gets in the way of scripting atomics. Given that, let's clean things up and promote it to an official part of the atomics API, in the form of atomic_fetch_add_unless(). This patch converts definitions and invocations over to the new name, including the instrumented version, using the following script: ---- git grep -w __atomic_add_unless | while read line; do sed -i '{s/\<__atomic_add_unless\>/atomic_fetch_add_unless/}' "${line%%:*}"; done git grep -w __arch_atomic_add_unless | while read line; do sed -i '{s/\<__arch_atomic_add_unless\>/arch_atomic_fetch_add_unless/}' "${line%%:*}"; done ---- Note that we do not have atomic{64,_long}_fetch_add_unless(), which will be introduced by later patches. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Acked-by: Geert Uytterhoeven Acked-by: Peter Zijlstra (Intel) Acked-by: Palmer Dabbelt Cc: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/20180621121321.4761-2-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/atomic-instrumented.h | 4 ++-- include/asm-generic/atomic.h | 4 ++-- include/linux/atomic.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h index ec07f23678ea..b8b14cc2df6c 100644 --- a/include/asm-generic/atomic-instrumented.h +++ b/include/asm-generic/atomic-instrumented.h @@ -84,10 +84,10 @@ static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 ne } #endif -static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) +static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u) { kasan_check_write(v, sizeof(*v)); - return __arch_atomic_add_unless(v, a, u); + return arch_atomic_fetch_add_unless(v, a, u); } diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index abe6dd9ca2a8..10051ed6d088 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -221,8 +221,8 @@ static inline void atomic_dec(atomic_t *v) #define atomic_xchg(ptr, v) (xchg(&(ptr)->counter, (v))) #define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) -#ifndef __atomic_add_unless -static inline int __atomic_add_unless(atomic_t *v, int a, int u) +#ifndef atomic_fetch_add_unless +static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u) { int c, old; c = atomic_read(v); diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 01ce3997cb42..9cc982936675 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -530,7 +530,7 @@ */ static inline int atomic_add_unless(atomic_t *v, int a, int u) { - return __atomic_add_unless(v, a, u) != u; + return atomic_fetch_add_unless(v, a, u) != u; } /** -- cgit v1.2.3 From f74445b6dd6b8f33ed34e005d19ecbb49171dabf Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:06 +0100 Subject: atomics/treewide: Remove atomic_inc_not_zero_hint() While documentation suggests atomic_inc_not_zero_hint() will perform better than atomic_inc_not_zero(), this is unlikely to be the case. No architectures implement atomic_inc_not_zero_hint() directly, and thus it either falls back to atomic_inc_not_zero(), or a loop using atomic_cmpxchg(). Whenever the hint does not match the value in memory, the repeated use of atomic_cmpxchg() will be more expensive than the read that atomic_inc_not_zero_hint() attempts to avoid. For architectures with LL/SC atomics, a read cannot be avoided, and it would always be better to use atomic_inc_not_zero() directly. For other architectures, their own atomic_inc_not_zero() is likely to be more optimal than an atomic_cmpxchg() loop regardless. Generally, atomic_inc_not_zero_hint() is liable to perform worse than atomic_inc_not_zero(). Further, atomic_inc_not_zero_hint() only exists for atomic_t, and not atomic64_t or atomic_long_t, and there is only one user in the kernel tree. Given all this, let's remove atomic_inc_not_zero_hint(), and migrate the existing user over to atomic_inc_not_zero(). There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/20180621121321.4761-4-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'include') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 9cc982936675..5c5620ae5a35 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -571,38 +571,6 @@ static inline int atomic_fetch_andnot_release(int i, atomic_t *v) } #endif -/** - * atomic_inc_not_zero_hint - increment if not null - * @v: pointer of type atomic_t - * @hint: probable value of the atomic before the increment - * - * This version of atomic_inc_not_zero() gives a hint of probable - * value of the atomic. This helps processor to not read the memory - * before doing the atomic read/modify/write cycle, lowering - * number of bus transactions on some arches. - * - * Returns: 0 if increment was not done, 1 otherwise. - */ -#ifndef atomic_inc_not_zero_hint -static inline int atomic_inc_not_zero_hint(atomic_t *v, int hint) -{ - int val, c = hint; - - /* sanity test, should be removed by compiler if hint is a constant */ - if (!hint) - return atomic_inc_not_zero(v); - - do { - val = atomic_cmpxchg(v, c, c + 1); - if (val == c) - return 1; - c = val; - } while (c); - - return 0; -} -#endif - #ifndef atomic_inc_unless_negative static inline int atomic_inc_unless_negative(atomic_t *p) { -- cgit v1.2.3 From ade5ef9280c33993099199c51e2e27c2c4013afd Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:07 +0100 Subject: atomics: Make conditional ops return 'bool' Some of the atomics return a status value, which is a boolean value describing whether the operation was performed. To make it clear that this is a boolean value, let's update the common fallbacks to return bool, fixing up the return values and comments likewise. At the same time, let's simplify the description of the operations in their respective comments. The instrumented atomics and generic atomic64 implementation are updated accordingly. Note that atomic64_dec_if_positive() doesn't follow the usual test op pattern, and returns the would-be decremented value. This is not changed. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Boqun Feng Cc: Linus Torvalds Cc: Michael Ellerman Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/20180621121321.4761-5-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/atomic-instrumented.h | 2 +- include/asm-generic/atomic64.h | 3 ++- include/linux/atomic.h | 24 +++++++++++++----------- 3 files changed, 16 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h index b8b14cc2df6c..497faa4a05e3 100644 --- a/include/asm-generic/atomic-instrumented.h +++ b/include/asm-generic/atomic-instrumented.h @@ -205,7 +205,7 @@ static __always_inline s64 atomic64_dec_return(atomic64_t *v) return arch_atomic64_dec_return(v); } -static __always_inline s64 atomic64_inc_not_zero(atomic64_t *v) +static __always_inline bool atomic64_inc_not_zero(atomic64_t *v) { kasan_check_write(v, sizeof(*v)); return arch_atomic64_inc_not_zero(v); diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h index 8d28eb010d0d..a951a721e1bb 100644 --- a/include/asm-generic/atomic64.h +++ b/include/asm-generic/atomic64.h @@ -11,6 +11,7 @@ */ #ifndef _ASM_GENERIC_ATOMIC64_H #define _ASM_GENERIC_ATOMIC64_H +#include typedef struct { long long counter; @@ -52,7 +53,7 @@ ATOMIC64_OPS(xor) extern long long atomic64_dec_if_positive(atomic64_t *v); extern long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n); extern long long atomic64_xchg(atomic64_t *v, long long new); -extern int atomic64_add_unless(atomic64_t *v, long long a, long long u); +extern bool atomic64_add_unless(atomic64_t *v, long long a, long long u); #define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) #define atomic64_inc(v) atomic64_add(1LL, (v)) diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 5c5620ae5a35..307a7f6d619a 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -2,6 +2,8 @@ /* Atomic operations usable in machine independent code */ #ifndef _LINUX_ATOMIC_H #define _LINUX_ATOMIC_H +#include + #include #include @@ -525,10 +527,10 @@ * @a: the amount to add to v... * @u: ...unless v is equal to u. * - * Atomically adds @a to @v, so long as @v was not already @u. - * Returns non-zero if @v was not @u, and zero otherwise. + * Atomically adds @a to @v, if @v was not already @u. + * Returns true if the addition was done. */ -static inline int atomic_add_unless(atomic_t *v, int a, int u) +static inline bool atomic_add_unless(atomic_t *v, int a, int u) { return atomic_fetch_add_unless(v, a, u) != u; } @@ -537,8 +539,8 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) * atomic_inc_not_zero - increment unless the number is zero * @v: pointer of type atomic_t * - * Atomically increments @v by 1, so long as @v is non-zero. - * Returns non-zero if @v was non-zero, and zero otherwise. + * Atomically increments @v by 1, if @v is non-zero. + * Returns true if the increment was done. */ #ifndef atomic_inc_not_zero #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) @@ -572,28 +574,28 @@ static inline int atomic_fetch_andnot_release(int i, atomic_t *v) #endif #ifndef atomic_inc_unless_negative -static inline int atomic_inc_unless_negative(atomic_t *p) +static inline bool atomic_inc_unless_negative(atomic_t *p) { int v, v1; for (v = 0; v >= 0; v = v1) { v1 = atomic_cmpxchg(p, v, v + 1); if (likely(v1 == v)) - return 1; + return true; } - return 0; + return false; } #endif #ifndef atomic_dec_unless_positive -static inline int atomic_dec_unless_positive(atomic_t *p) +static inline bool atomic_dec_unless_positive(atomic_t *p) { int v, v1; for (v = 0; v <= 0; v = v1) { v1 = atomic_cmpxchg(p, v, v - 1); if (likely(v1 == v)) - return 1; + return true; } - return 0; + return false; } #endif -- cgit v1.2.3 From bef828204a1bc7a0fd3a24551c4265e9c2ab95ed Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:08 +0100 Subject: atomics/treewide: Make atomic64_inc_not_zero() optional We define a trivial fallback for atomic_inc_not_zero(), but don't do the same for atomic64_inc_not_zero(), leading most architectures to define the same boilerplate. Let's add a fallback in , and remove the redundant implementations. Note that atomic64_add_unless() is always defined in , and promotes its arguments to the requisite types, so we need not do this explicitly. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Palmer Dabbelt Cc: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/20180621121321.4761-6-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/atomic-instrumented.h | 3 +++ include/asm-generic/atomic64.h | 1 - include/linux/atomic.h | 11 +++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h index 497faa4a05e3..83bb88d791c4 100644 --- a/include/asm-generic/atomic-instrumented.h +++ b/include/asm-generic/atomic-instrumented.h @@ -205,11 +205,14 @@ static __always_inline s64 atomic64_dec_return(atomic64_t *v) return arch_atomic64_dec_return(v); } +#ifdef arch_atomic64_inc_not_zero +#define atomic64_inc_not_zero atomic64_inc_not_zero static __always_inline bool atomic64_inc_not_zero(atomic64_t *v) { kasan_check_write(v, sizeof(*v)); return arch_atomic64_inc_not_zero(v); } +#endif static __always_inline s64 atomic64_dec_if_positive(atomic64_t *v) { diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h index a951a721e1bb..5105275ac825 100644 --- a/include/asm-generic/atomic64.h +++ b/include/asm-generic/atomic64.h @@ -63,6 +63,5 @@ extern bool atomic64_add_unless(atomic64_t *v, long long a, long long u); #define atomic64_dec(v) atomic64_sub(1LL, (v)) #define atomic64_dec_return(v) atomic64_sub_return(1LL, (v)) #define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) -#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL) #endif /* _ASM_GENERIC_ATOMIC64_H */ diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 307a7f6d619a..ae3f30923d05 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -1019,6 +1019,17 @@ static inline int atomic_dec_if_positive(atomic_t *v) #define atomic64_try_cmpxchg_release atomic64_try_cmpxchg #endif /* atomic64_try_cmpxchg */ +/** + * atomic64_inc_not_zero - increment unless the number is zero + * @v: pointer of type atomic64_t + * + * Atomically increments @v by 1, if @v is non-zero. + * Returns true if the increment was done. + */ +#ifndef atomic64_inc_not_zero +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) +#endif + #ifndef atomic64_andnot static inline void atomic64_andnot(long long i, atomic64_t *v) { -- cgit v1.2.3 From eccc2da8c03f316bba202e15af2be4615f461900 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:09 +0100 Subject: atomics/treewide: Make atomic_fetch_add_unless() optional Several architectures these have a near-identical implementation based on atomic_read() and atomic_cmpxchg() which we can instead define in , so let's do so, using something close to the existing x86 implementation with try_cmpxchg(). Where an architecture provides its own atomic_fetch_add_unless(), it must define a preprocessor symbol for it. The instrumented atomics are updated accordingly. Note that arch/arc's existing atomic_fetch_add_unless() had redundant barriers, as these are already present in its atomic_cmpxchg() implementation. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Geert Uytterhoeven Reviewed-by: Will Deacon Acked-by: Geert Uytterhoeven Acked-by: Peter Zijlstra (Intel) Acked-by: Palmer Dabbelt Cc: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Vineet Gupta Link: https://lore.kernel.org/lkml/20180621121321.4761-7-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/atomic-instrumented.h | 4 +++- include/asm-generic/atomic.h | 11 ----------- include/linux/atomic.h | 23 +++++++++++++++++++++++ 3 files changed, 26 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h index 83bb88d791c4..1f9b2a767d3c 100644 --- a/include/asm-generic/atomic-instrumented.h +++ b/include/asm-generic/atomic-instrumented.h @@ -84,12 +84,14 @@ static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 ne } #endif +#ifdef arch_atomic_fetch_add_unless +#define atomic_fetch_add_unless atomic_fetch_add_unless static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u) { kasan_check_write(v, sizeof(*v)); return arch_atomic_fetch_add_unless(v, a, u); } - +#endif static __always_inline bool atomic64_add_unless(atomic64_t *v, s64 a, s64 u) { diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 10051ed6d088..757e45821220 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -221,15 +221,4 @@ static inline void atomic_dec(atomic_t *v) #define atomic_xchg(ptr, v) (xchg(&(ptr)->counter, (v))) #define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) -#ifndef atomic_fetch_add_unless -static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u) -{ - int c, old; - c = atomic_read(v); - while (c != u && (old = atomic_cmpxchg(v, c, c + a)) != c) - c = old; - return c; -} -#endif - #endif /* __ASM_GENERIC_ATOMIC_H */ diff --git a/include/linux/atomic.h b/include/linux/atomic.h index ae3f30923d05..b89ba36cab94 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -521,6 +521,29 @@ #endif #endif /* xchg_relaxed */ +/** + * atomic_fetch_add_unless - add unless the number is already a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, if @v was not already @u. + * Returns the original value of @v. + */ +#ifndef atomic_fetch_add_unless +static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u) +{ + int c = atomic_read(v); + + do { + if (unlikely(c == u)) + break; + } while (!atomic_try_cmpxchg(v, &c, c + a)); + + return c; +} +#endif + /** * atomic_add_unless - add unless the number is already a given value * @v: pointer of type atomic_t -- cgit v1.2.3 From 0ae1d994020d75ac065fd42ac4cbf5ac6ce9b255 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:10 +0100 Subject: atomics: Prepare for atomic64_fetch_add_unless() Currently all architectures must implement atomic_fetch_add_unless(), with common code providing atomic_add_unless(). Architectures must also implement atomic64_add_unless() directly, with no corresponding atomic64_fetch_add_unless(). This divergence is unfortunate, and means that the APIs for atomic_t, atomic64_t, and atomic_long_t differ. In preparation for unifying things, with architectures providing atomic64_fetch_add_unless, this patch adds a generic atomic64_add_unless() which will use atomic64_fetch_add_unless(). The instrumented atomics are updated to take this case into account. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Albert Ou Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Boqun Feng Cc: Ivan Kokshaysky Cc: Linus Torvalds Cc: Matt Turner Cc: Michael Ellerman Cc: Palmer Dabbelt Cc: Paul Mackerras Cc: Richard Henderson Cc: Russell King Cc: Thomas Gleixner Cc: Vineet Gupta Link: https://lore.kernel.org/lkml/20180621121321.4761-8-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/atomic-instrumented.h | 9 +++++++++ include/linux/atomic.h | 16 ++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'include') diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h index 1f9b2a767d3c..444bf2f9d54d 100644 --- a/include/asm-generic/atomic-instrumented.h +++ b/include/asm-generic/atomic-instrumented.h @@ -93,11 +93,20 @@ static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u) } #endif +#ifdef arch_atomic64_fetch_add_unless +#define atomic64_fetch_add_unless atomic64_fetch_add_unless +static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) +{ + kasan_check_write(v, sizeof(*v)); + return arch_atomic64_fetch_add_unless(v, a, u); +} +#else static __always_inline bool atomic64_add_unless(atomic64_t *v, s64 a, s64 u) { kasan_check_write(v, sizeof(*v)); return arch_atomic64_add_unless(v, a, u); } +#endif static __always_inline void atomic_inc(atomic_t *v) { diff --git a/include/linux/atomic.h b/include/linux/atomic.h index b89ba36cab94..3c03de648007 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -1042,6 +1042,22 @@ static inline int atomic_dec_if_positive(atomic_t *v) #define atomic64_try_cmpxchg_release atomic64_try_cmpxchg #endif /* atomic64_try_cmpxchg */ +/** + * atomic64_add_unless - add unless the number is already a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, if @v was not already @u. + * Returns true if the addition was done. + */ +#ifdef atomic64_fetch_add_unless +static inline bool atomic64_add_unless(atomic64_t *v, long long a, long long u) +{ + return atomic64_fetch_add_unless(v, a, u) != u; +} +#endif + /** * atomic64_inc_not_zero - increment unless the number is zero * @v: pointer of type atomic64_t -- cgit v1.2.3 From 00b808ab79ead372daf1a0682d1ef271599c0b55 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:11 +0100 Subject: atomics/generic: Define atomic64_fetch_add_unless() As a step towards unifying the atomic/atomic64/atomic_long APIs, this patch converts the generic implementation of atomic64_add_unless() into a generic implementation of atomic64_fetch_add_unless(). A wrapper in will build atomic_add_unless() atop of this, provided it is given a preprocessor definition. No functional change is intended as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Arnd Bergmann Cc: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/20180621121321.4761-9-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/atomic64.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h index 5105275ac825..49460107b29a 100644 --- a/include/asm-generic/atomic64.h +++ b/include/asm-generic/atomic64.h @@ -53,7 +53,8 @@ ATOMIC64_OPS(xor) extern long long atomic64_dec_if_positive(atomic64_t *v); extern long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n); extern long long atomic64_xchg(atomic64_t *v, long long new); -extern bool atomic64_add_unless(atomic64_t *v, long long a, long long u); +extern long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u); +#define atomic64_fetch_add_unless atomic64_fetch_add_unless #define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) #define atomic64_inc(v) atomic64_add(1LL, (v)) -- cgit v1.2.3 From 356701329fb391184618eda7b7fb68cb35271506 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:17 +0100 Subject: atomics/treewide: Make atomic64_fetch_add_unless() optional Architectures with atomic64_fetch_add_unless() provide a preprocessor symbol if they do so, and all other architectures have trivial C implementations of atomic64_add_unless() which are near-identical. Let's unify the trivial definitions of atomic64_fetch_add_unless() in , so that we always have both atomic64_fetch_add_unless() and atomic64_add_unless() with less boilerplate code. This means that atomic64_add_unless() is always implemented in core code, and the instrumented atomics are updated accordingly. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/20180621121321.4761-15-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/atomic-instrumented.h | 6 ------ include/linux/atomic.h | 26 ++++++++++++++++++++++++-- 2 files changed, 24 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h index 444bf2f9d54d..2b487f28ef35 100644 --- a/include/asm-generic/atomic-instrumented.h +++ b/include/asm-generic/atomic-instrumented.h @@ -100,12 +100,6 @@ static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u kasan_check_write(v, sizeof(*v)); return arch_atomic64_fetch_add_unless(v, a, u); } -#else -static __always_inline bool atomic64_add_unless(atomic64_t *v, s64 a, s64 u) -{ - kasan_check_write(v, sizeof(*v)); - return arch_atomic64_add_unless(v, a, u); -} #endif static __always_inline void atomic_inc(atomic_t *v) diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 3c03de648007..530562ac7909 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -1042,6 +1042,30 @@ static inline int atomic_dec_if_positive(atomic_t *v) #define atomic64_try_cmpxchg_release atomic64_try_cmpxchg #endif /* atomic64_try_cmpxchg */ +/** + * atomic64_fetch_add_unless - add unless the number is already a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, if @v was not already @u. + * Returns the original value of @v. + */ +#ifndef atomic64_fetch_add_unless +static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a, + long long u) +{ + long long c = atomic64_read(v); + + do { + if (unlikely(c == u)) + break; + } while (!atomic64_try_cmpxchg(v, &c, c + a)); + + return c; +} +#endif + /** * atomic64_add_unless - add unless the number is already a given value * @v: pointer of type atomic_t @@ -1051,12 +1075,10 @@ static inline int atomic_dec_if_positive(atomic_t *v) * Atomically adds @a to @v, if @v was not already @u. * Returns true if the addition was done. */ -#ifdef atomic64_fetch_add_unless static inline bool atomic64_add_unless(atomic64_t *v, long long a, long long u) { return atomic64_fetch_add_unless(v, a, u) != u; } -#endif /** * atomic64_inc_not_zero - increment unless the number is zero -- cgit v1.2.3 From 18cc1814d4e7560412c9c8c6d28f9d6782c8b402 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:18 +0100 Subject: atomics/treewide: Make test ops optional Some of the atomics return the result of a test applied after the atomic operation, and almost all architectures implement these as trivial wrappers around the underlying atomic. Specifically: * _inc_and_test(v) is (_inc_return(v) == 0) * _dec_and_test(v) is (_dec_return(v) == 0) * _sub_and_test(i, v) is (_sub_return(i, v) == 0) * _add_negative(i, v) is (_add_return(i, v) < 0) Rather than have these definitions duplicated in all architectures, with minor inconsistencies in formatting and documentation, let's make these operations optional, with default fallbacks as above. Implementations must now provide a preprocessor symbol. The instrumented atomics are updated accordingly. Both x86 and m68k have custom implementations, which are left as-is, given preprocessor symbols to avoid being overridden. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Acked-by: Geert Uytterhoeven Acked-by: Peter Zijlstra (Intel) Acked-by: Palmer Dabbelt Cc: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/20180621121321.4761-16-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/atomic-instrumented.h | 24 ++++++ include/asm-generic/atomic.h | 9 --- include/asm-generic/atomic64.h | 4 - include/linux/atomic.h | 124 ++++++++++++++++++++++++++++++ 4 files changed, 148 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h index 2b487f28ef35..6b64c200de73 100644 --- a/include/asm-generic/atomic-instrumented.h +++ b/include/asm-generic/atomic-instrumented.h @@ -225,29 +225,41 @@ static __always_inline s64 atomic64_dec_if_positive(atomic64_t *v) return arch_atomic64_dec_if_positive(v); } +#ifdef arch_atomic_dec_and_test +#define atomic_dec_and_test atomic_dec_and_test static __always_inline bool atomic_dec_and_test(atomic_t *v) { kasan_check_write(v, sizeof(*v)); return arch_atomic_dec_and_test(v); } +#endif +#ifdef arch_atomic64_dec_and_test +#define atomic64_dec_and_test atomic64_dec_and_test static __always_inline bool atomic64_dec_and_test(atomic64_t *v) { kasan_check_write(v, sizeof(*v)); return arch_atomic64_dec_and_test(v); } +#endif +#ifdef arch_atomic_inc_and_test +#define atomic_inc_and_test atomic_inc_and_test static __always_inline bool atomic_inc_and_test(atomic_t *v) { kasan_check_write(v, sizeof(*v)); return arch_atomic_inc_and_test(v); } +#endif +#ifdef arch_atomic64_inc_and_test +#define atomic64_inc_and_test atomic64_inc_and_test static __always_inline bool atomic64_inc_and_test(atomic64_t *v) { kasan_check_write(v, sizeof(*v)); return arch_atomic64_inc_and_test(v); } +#endif static __always_inline int atomic_add_return(int i, atomic_t *v) { @@ -333,29 +345,41 @@ static __always_inline s64 atomic64_fetch_xor(s64 i, atomic64_t *v) return arch_atomic64_fetch_xor(i, v); } +#ifdef arch_atomic_sub_and_test +#define atomic_sub_and_test atomic_sub_and_test static __always_inline bool atomic_sub_and_test(int i, atomic_t *v) { kasan_check_write(v, sizeof(*v)); return arch_atomic_sub_and_test(i, v); } +#endif +#ifdef arch_atomic64_sub_and_test +#define atomic64_sub_and_test atomic64_sub_and_test static __always_inline bool atomic64_sub_and_test(s64 i, atomic64_t *v) { kasan_check_write(v, sizeof(*v)); return arch_atomic64_sub_and_test(i, v); } +#endif +#ifdef arch_atomic_add_negative +#define atomic_add_negative atomic_add_negative static __always_inline bool atomic_add_negative(int i, atomic_t *v) { kasan_check_write(v, sizeof(*v)); return arch_atomic_add_negative(i, v); } +#endif +#ifdef arch_atomic64_add_negative +#define atomic64_add_negative atomic64_add_negative static __always_inline bool atomic64_add_negative(s64 i, atomic64_t *v) { kasan_check_write(v, sizeof(*v)); return arch_atomic64_add_negative(i, v); } +#endif static __always_inline unsigned long cmpxchg_size(volatile void *ptr, unsigned long old, unsigned long new, int size) diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 757e45821220..40cab858aaaa 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -186,11 +186,6 @@ ATOMIC_OP(xor, ^) #include -static inline int atomic_add_negative(int i, atomic_t *v) -{ - return atomic_add_return(i, v) < 0; -} - static inline void atomic_add(int i, atomic_t *v) { atomic_add_return(i, v); @@ -214,10 +209,6 @@ static inline void atomic_dec(atomic_t *v) #define atomic_dec_return(v) atomic_sub_return(1, (v)) #define atomic_inc_return(v) atomic_add_return(1, (v)) -#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0) -#define atomic_dec_and_test(v) (atomic_dec_return(v) == 0) -#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0) - #define atomic_xchg(ptr, v) (xchg(&(ptr)->counter, (v))) #define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h index 49460107b29a..d3827ab97aa4 100644 --- a/include/asm-generic/atomic64.h +++ b/include/asm-generic/atomic64.h @@ -56,13 +56,9 @@ extern long long atomic64_xchg(atomic64_t *v, long long new); extern long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u); #define atomic64_fetch_add_unless atomic64_fetch_add_unless -#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) #define atomic64_inc(v) atomic64_add(1LL, (v)) #define atomic64_inc_return(v) atomic64_add_return(1LL, (v)) -#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) -#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0) #define atomic64_dec(v) atomic64_sub(1LL, (v)) #define atomic64_dec_return(v) atomic64_sub_return(1LL, (v)) -#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) #endif /* _ASM_GENERIC_ATOMIC64_H */ diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 530562ac7909..3ee8da9023cd 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -569,6 +569,68 @@ static inline bool atomic_add_unless(atomic_t *v, int a, int u) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) #endif +/** + * atomic_inc_and_test - increment and test + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +#ifndef atomic_inc_and_test +static inline bool atomic_inc_and_test(atomic_t *v) +{ + return atomic_inc_return(v) == 0; +} +#endif + +/** + * atomic_dec_and_test - decrement and test + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +#ifndef atomic_dec_and_test +static inline bool atomic_dec_and_test(atomic_t *v) +{ + return atomic_dec_return(v) == 0; +} +#endif + +/** + * atomic_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +#ifndef atomic_sub_and_test +static inline bool atomic_sub_and_test(int i, atomic_t *v) +{ + return atomic_sub_return(i, v) == 0; +} +#endif + +/** + * atomic_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +#ifndef atomic_add_negative +static inline bool atomic_add_negative(int i, atomic_t *v) +{ + return atomic_add_return(i, v) < 0; +} +#endif + #ifndef atomic_andnot static inline void atomic_andnot(int i, atomic_t *v) { @@ -1091,6 +1153,68 @@ static inline bool atomic64_add_unless(atomic64_t *v, long long a, long long u) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) #endif +/** + * atomic64_inc_and_test - increment and test + * @v: pointer of type atomic64_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +#ifndef atomic64_inc_and_test +static inline bool atomic64_inc_and_test(atomic64_t *v) +{ + return atomic64_inc_return(v) == 0; +} +#endif + +/** + * atomic64_dec_and_test - decrement and test + * @v: pointer of type atomic64_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +#ifndef atomic64_dec_and_test +static inline bool atomic64_dec_and_test(atomic64_t *v) +{ + return atomic64_dec_return(v) == 0; +} +#endif + +/** + * atomic64_sub_and_test - subtract value from variable and test result + * @i: integer value to subtract + * @v: pointer of type atomic64_t + * + * Atomically subtracts @i from @v and returns + * true if the result is zero, or false for all + * other cases. + */ +#ifndef atomic64_sub_and_test +static inline bool atomic64_sub_and_test(long long i, atomic64_t *v) +{ + return atomic64_sub_return(i, v) == 0; +} +#endif + +/** + * atomic64_add_negative - add and test if negative + * @i: integer value to add + * @v: pointer of type atomic64_t + * + * Atomically adds @i to @v and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +#ifndef atomic64_add_negative +static inline bool atomic64_add_negative(long long i, atomic64_t *v) +{ + return atomic64_add_return(i, v) < 0; +} +#endif + #ifndef atomic64_andnot static inline void atomic64_andnot(long long i, atomic64_t *v) { -- cgit v1.2.3 From 9837559d8eb01ce834e56fc9a567c1d94ebd3698 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:19 +0100 Subject: atomics/treewide: Make unconditional inc/dec ops optional Many of the inc/dec ops are mandatory, but for most architectures inc/dec are simply trivial wrappers around their corresponding add/sub ops. Let's make all the inc/dec ops optional, so that we can get rid of these boilerplate wrappers. The instrumented atomics are updated accordingly. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Palmer Dabbelt Cc: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/20180621121321.4761-17-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/atomic-instrumented.h | 24 ++++++++++++++++ include/asm-generic/atomic.h | 13 --------- include/asm-generic/atomic64.h | 5 ---- include/linux/atomic.h | 48 +++++++++++++++++++++++++++++++ 4 files changed, 72 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h index 6b64c200de73..12f9634750d7 100644 --- a/include/asm-generic/atomic-instrumented.h +++ b/include/asm-generic/atomic-instrumented.h @@ -102,29 +102,41 @@ static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u } #endif +#ifdef arch_atomic_inc +#define atomic_inc atomic_inc static __always_inline void atomic_inc(atomic_t *v) { kasan_check_write(v, sizeof(*v)); arch_atomic_inc(v); } +#endif +#ifdef arch_atomic64_inc +#define atomic64_inc atomic64_inc static __always_inline void atomic64_inc(atomic64_t *v) { kasan_check_write(v, sizeof(*v)); arch_atomic64_inc(v); } +#endif +#ifdef arch_atomic_dec +#define atomic_dec atomic_dec static __always_inline void atomic_dec(atomic_t *v) { kasan_check_write(v, sizeof(*v)); arch_atomic_dec(v); } +#endif +#ifdef atch_atomic64_dec +#define atomic64_dec static __always_inline void atomic64_dec(atomic64_t *v) { kasan_check_write(v, sizeof(*v)); arch_atomic64_dec(v); } +#endif static __always_inline void atomic_add(int i, atomic_t *v) { @@ -186,29 +198,41 @@ static __always_inline void atomic64_xor(s64 i, atomic64_t *v) arch_atomic64_xor(i, v); } +#ifdef arch_atomic_inc_return +#define atomic_inc_return atomic_inc_return static __always_inline int atomic_inc_return(atomic_t *v) { kasan_check_write(v, sizeof(*v)); return arch_atomic_inc_return(v); } +#endif +#ifdef arch_atomic64_in_return +#define atomic64_inc_return atomic64_inc_return static __always_inline s64 atomic64_inc_return(atomic64_t *v) { kasan_check_write(v, sizeof(*v)); return arch_atomic64_inc_return(v); } +#endif +#ifdef arch_atomic_dec_return +#define atomic_dec_return atomic_dec_return static __always_inline int atomic_dec_return(atomic_t *v) { kasan_check_write(v, sizeof(*v)); return arch_atomic_dec_return(v); } +#endif +#ifdef arch_atomic64_dec_return +#define atomic64_dec_return atomic64_dec_return static __always_inline s64 atomic64_dec_return(atomic64_t *v) { kasan_check_write(v, sizeof(*v)); return arch_atomic64_dec_return(v); } +#endif #ifdef arch_atomic64_inc_not_zero #define atomic64_inc_not_zero atomic64_inc_not_zero diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 40cab858aaaa..13324aa828eb 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -196,19 +196,6 @@ static inline void atomic_sub(int i, atomic_t *v) atomic_sub_return(i, v); } -static inline void atomic_inc(atomic_t *v) -{ - atomic_add_return(1, v); -} - -static inline void atomic_dec(atomic_t *v) -{ - atomic_sub_return(1, v); -} - -#define atomic_dec_return(v) atomic_sub_return(1, (v)) -#define atomic_inc_return(v) atomic_add_return(1, (v)) - #define atomic_xchg(ptr, v) (xchg(&(ptr)->counter, (v))) #define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h index d3827ab97aa4..242b79ae0b57 100644 --- a/include/asm-generic/atomic64.h +++ b/include/asm-generic/atomic64.h @@ -56,9 +56,4 @@ extern long long atomic64_xchg(atomic64_t *v, long long new); extern long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u); #define atomic64_fetch_add_unless atomic64_fetch_add_unless -#define atomic64_inc(v) atomic64_add(1LL, (v)) -#define atomic64_inc_return(v) atomic64_add_return(1LL, (v)) -#define atomic64_dec(v) atomic64_sub(1LL, (v)) -#define atomic64_dec_return(v) atomic64_sub_return(1LL, (v)) - #endif /* _ASM_GENERIC_ATOMIC64_H */ diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 3ee8da9023cd..24f345df7ba6 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -97,11 +97,23 @@ #endif #endif /* atomic_add_return_relaxed */ +#ifndef atomic_inc +#define atomic_inc(v) atomic_add(1, (v)) +#endif + /* atomic_inc_return_relaxed */ #ifndef atomic_inc_return_relaxed + +#ifndef atomic_inc_return +#define atomic_inc_return(v) atomic_add_return(1, (v)) +#define atomic_inc_return_relaxed(v) atomic_add_return_relaxed(1, (v)) +#define atomic_inc_return_acquire(v) atomic_add_return_acquire(1, (v)) +#define atomic_inc_return_release(v) atomic_add_return_release(1, (v)) +#else /* atomic_inc_return */ #define atomic_inc_return_relaxed atomic_inc_return #define atomic_inc_return_acquire atomic_inc_return #define atomic_inc_return_release atomic_inc_return +#endif /* atomic_inc_return */ #else /* atomic_inc_return_relaxed */ @@ -145,11 +157,23 @@ #endif #endif /* atomic_sub_return_relaxed */ +#ifndef atomic_dec +#define atomic_dec(v) atomic_sub(1, (v)) +#endif + /* atomic_dec_return_relaxed */ #ifndef atomic_dec_return_relaxed + +#ifndef atomic_dec_return +#define atomic_dec_return(v) atomic_sub_return(1, (v)) +#define atomic_dec_return_relaxed(v) atomic_sub_return_relaxed(1, (v)) +#define atomic_dec_return_acquire(v) atomic_sub_return_acquire(1, (v)) +#define atomic_dec_return_release(v) atomic_sub_return_release(1, (v)) +#else /* atomic_dec_return */ #define atomic_dec_return_relaxed atomic_dec_return #define atomic_dec_return_acquire atomic_dec_return #define atomic_dec_return_release atomic_dec_return +#endif /* atomic_dec_return */ #else /* atomic_dec_return_relaxed */ @@ -748,11 +772,23 @@ static inline int atomic_dec_if_positive(atomic_t *v) #endif #endif /* atomic64_add_return_relaxed */ +#ifndef atomic64_inc +#define atomic64_inc(v) atomic64_add(1, (v)) +#endif + /* atomic64_inc_return_relaxed */ #ifndef atomic64_inc_return_relaxed + +#ifndef atomic64_inc_return +#define atomic64_inc_return(v) atomic64_add_return(1, (v)) +#define atomic64_inc_return_relaxed(v) atomic64_add_return_relaxed(1, (v)) +#define atomic64_inc_return_acquire(v) atomic64_add_return_acquire(1, (v)) +#define atomic64_inc_return_release(v) atomic64_add_return_release(1, (v)) +#else /* atomic64_inc_return */ #define atomic64_inc_return_relaxed atomic64_inc_return #define atomic64_inc_return_acquire atomic64_inc_return #define atomic64_inc_return_release atomic64_inc_return +#endif /* atomic64_inc_return */ #else /* atomic64_inc_return_relaxed */ @@ -797,11 +833,23 @@ static inline int atomic_dec_if_positive(atomic_t *v) #endif #endif /* atomic64_sub_return_relaxed */ +#ifndef atomic64_dec +#define atomic64_dec(v) atomic64_sub(1, (v)) +#endif + /* atomic64_dec_return_relaxed */ #ifndef atomic64_dec_return_relaxed + +#ifndef atomic64_dec_return +#define atomic64_dec_return(v) atomic64_sub_return(1, (v)) +#define atomic64_dec_return_relaxed(v) atomic64_sub_return_relaxed(1, (v)) +#define atomic64_dec_return_acquire(v) atomic64_sub_return_acquire(1, (v)) +#define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v)) +#else /* atomic64_dec_return */ #define atomic64_dec_return_relaxed atomic64_dec_return #define atomic64_dec_return_acquire atomic64_dec_return #define atomic64_dec_return_release atomic64_dec_return +#endif /* atomic64_dec_return */ #else /* atomic64_dec_return_relaxed */ -- cgit v1.2.3 From b3a2a05f9111de0b79312e577608a27b0318c0a1 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:20 +0100 Subject: atomics/treewide: Make conditional inc/dec ops optional The conditional inc/dec ops differ for atomic_t and atomic64_t: - atomic_inc_unless_positive() is optional for atomic_t, and doesn't exist for atomic64_t. - atomic_dec_unless_negative() is optional for atomic_t, and doesn't exist for atomic64_t. - atomic_dec_if_positive is optional for atomic_t, and is mandatory for atomic64_t. Let's make these consistently optional for both. At the same time, let's clean up the existing fallbacks to use atomic_try_cmpxchg(). The instrumented atomics are updated accordingly. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/20180621121321.4761-18-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/atomic-instrumented.h | 3 + include/asm-generic/atomic64.h | 1 + include/linux/atomic.h | 97 +++++++++++++++++++++++-------- 3 files changed, 77 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h index 12f9634750d7..3c64e95d5ed0 100644 --- a/include/asm-generic/atomic-instrumented.h +++ b/include/asm-generic/atomic-instrumented.h @@ -243,11 +243,14 @@ static __always_inline bool atomic64_inc_not_zero(atomic64_t *v) } #endif +#ifdef arch_atomic64_dec_if_positive +#define atomic64_dec_if_positive atomic64_dec_if_positive static __always_inline s64 atomic64_dec_if_positive(atomic64_t *v) { kasan_check_write(v, sizeof(*v)); return arch_atomic64_dec_if_positive(v); } +#endif #ifdef arch_atomic_dec_and_test #define atomic_dec_and_test atomic_dec_and_test diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h index 242b79ae0b57..97b28b7f1f29 100644 --- a/include/asm-generic/atomic64.h +++ b/include/asm-generic/atomic64.h @@ -51,6 +51,7 @@ ATOMIC64_OPS(xor) #undef ATOMIC64_OP extern long long atomic64_dec_if_positive(atomic64_t *v); +#define atomic64_dec_if_positive atomic64_dec_if_positive extern long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n); extern long long atomic64_xchg(atomic64_t *v, long long new); extern long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u); diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 24f345df7ba6..93fe5b4041e1 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -683,28 +683,30 @@ static inline int atomic_fetch_andnot_release(int i, atomic_t *v) #endif #ifndef atomic_inc_unless_negative -static inline bool atomic_inc_unless_negative(atomic_t *p) +static inline bool atomic_inc_unless_negative(atomic_t *v) { - int v, v1; - for (v = 0; v >= 0; v = v1) { - v1 = atomic_cmpxchg(p, v, v + 1); - if (likely(v1 == v)) - return true; - } - return false; + int c = atomic_read(v); + + do { + if (unlikely(c < 0)) + return false; + } while (!atomic_try_cmpxchg(v, &c, c + 1)); + + return true; } #endif #ifndef atomic_dec_unless_positive -static inline bool atomic_dec_unless_positive(atomic_t *p) +static inline bool atomic_dec_unless_positive(atomic_t *v) { - int v, v1; - for (v = 0; v <= 0; v = v1) { - v1 = atomic_cmpxchg(p, v, v - 1); - if (likely(v1 == v)) - return true; - } - return false; + int c = atomic_read(v); + + do { + if (unlikely(c > 0)) + return false; + } while (!atomic_try_cmpxchg(v, &c, c - 1)); + + return true; } #endif @@ -718,17 +720,14 @@ static inline bool atomic_dec_unless_positive(atomic_t *p) #ifndef atomic_dec_if_positive static inline int atomic_dec_if_positive(atomic_t *v) { - int c, old, dec; - c = atomic_read(v); - for (;;) { + int dec, c = atomic_read(v); + + do { dec = c - 1; if (unlikely(dec < 0)) break; - old = atomic_cmpxchg((v), c, dec); - if (likely(old == c)) - break; - c = old; - } + } while (!atomic_try_cmpxchg(v, &c, dec)); + return dec; } #endif @@ -1290,6 +1289,56 @@ static inline long long atomic64_fetch_andnot_release(long long i, atomic64_t *v } #endif +#ifndef atomic64_inc_unless_negative +static inline bool atomic64_inc_unless_negative(atomic64_t *v) +{ + long long c = atomic64_read(v); + + do { + if (unlikely(c < 0)) + return false; + } while (!atomic64_try_cmpxchg(v, &c, c + 1)); + + return true; +} +#endif + +#ifndef atomic64_dec_unless_positive +static inline bool atomic64_dec_unless_positive(atomic64_t *v) +{ + long long c = atomic64_read(v); + + do { + if (unlikely(c > 0)) + return false; + } while (!atomic64_try_cmpxchg(v, &c, c - 1)); + + return true; +} +#endif + +/* + * atomic64_dec_if_positive - decrement by 1 if old value positive + * @v: pointer of type atomic64_t + * + * The function returns the old value of *v minus 1, even if + * the atomic64 variable, v, was not decremented. + */ +#ifndef atomic64_dec_if_positive +static inline long long atomic64_dec_if_positive(atomic64_t *v) +{ + long long dec, c = atomic64_read(v); + + do { + dec = c - 1; + if (unlikely(dec < 0)) + break; + } while (!atomic64_try_cmpxchg(v, &c, dec)); + + return dec; +} +#endif + #define atomic64_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) #define atomic64_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) -- cgit v1.2.3 From 7cc7eaad49c30ac165ecf84d95b26f7e0d53bd97 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:21 +0100 Subject: atomics/treewide: Clean up '*_andnot()' ifdeffery The ifdeffery for atomic*_{fetch_,}andnot() is unlike that for all the other atomics. If atomic*_andnot() is not defined, the corresponding atomic*_fetch_andnot() is assumed to not be defined. Additionally, the fallbacks for the various ordering cases are written much later in atomic.h as static inlines. This isn't problematic today, but gets in the way of scripting the generation of atomics. To prepare for scripting, this patch: * Switches to separate ifdefs for atomic*_andnot() and atomic*_fetch_andnot(), updating implementations as appropriate. * Moves the fallbacks into the standards ifdefs, as macro expansions rather than static inlines. * Removes trivial andnot implementations from architectures, where these are superseded by core code. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/20180621121321.4761-19-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 96 ++++++++++++++++---------------------------------- 1 file changed, 30 insertions(+), 66 deletions(-) (limited to 'include') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 93fe5b4041e1..8e04f1f69bd9 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -354,12 +354,22 @@ #endif #endif /* atomic_fetch_and_relaxed */ -#ifdef atomic_andnot -/* atomic_fetch_andnot_relaxed */ +#ifndef atomic_andnot +#define atomic_andnot(i, v) atomic_and(~(int)(i), (v)) +#endif + #ifndef atomic_fetch_andnot_relaxed -#define atomic_fetch_andnot_relaxed atomic_fetch_andnot -#define atomic_fetch_andnot_acquire atomic_fetch_andnot -#define atomic_fetch_andnot_release atomic_fetch_andnot + +#ifndef atomic_fetch_andnot +#define atomic_fetch_andnot(i, v) atomic_fetch_and(~(int)(i), (v)) +#define atomic_fetch_andnot_relaxed(i, v) atomic_fetch_and_relaxed(~(int)(i), (v)) +#define atomic_fetch_andnot_acquire(i, v) atomic_fetch_and_acquire(~(int)(i), (v)) +#define atomic_fetch_andnot_release(i, v) atomic_fetch_and_release(~(int)(i), (v)) +#else /* atomic_fetch_andnot */ +#define atomic_fetch_andnot_relaxed atomic_fetch_andnot +#define atomic_fetch_andnot_acquire atomic_fetch_andnot +#define atomic_fetch_andnot_release atomic_fetch_andnot +#endif /* atomic_fetch_andnot */ #else /* atomic_fetch_andnot_relaxed */ @@ -378,7 +388,6 @@ __atomic_op_fence(atomic_fetch_andnot, __VA_ARGS__) #endif #endif /* atomic_fetch_andnot_relaxed */ -#endif /* atomic_andnot */ /* atomic_fetch_xor_relaxed */ #ifndef atomic_fetch_xor_relaxed @@ -655,33 +664,6 @@ static inline bool atomic_add_negative(int i, atomic_t *v) } #endif -#ifndef atomic_andnot -static inline void atomic_andnot(int i, atomic_t *v) -{ - atomic_and(~i, v); -} - -static inline int atomic_fetch_andnot(int i, atomic_t *v) -{ - return atomic_fetch_and(~i, v); -} - -static inline int atomic_fetch_andnot_relaxed(int i, atomic_t *v) -{ - return atomic_fetch_and_relaxed(~i, v); -} - -static inline int atomic_fetch_andnot_acquire(int i, atomic_t *v) -{ - return atomic_fetch_and_acquire(~i, v); -} - -static inline int atomic_fetch_andnot_release(int i, atomic_t *v) -{ - return atomic_fetch_and_release(~i, v); -} -#endif - #ifndef atomic_inc_unless_negative static inline bool atomic_inc_unless_negative(atomic_t *v) { @@ -1029,12 +1011,22 @@ static inline int atomic_dec_if_positive(atomic_t *v) #endif #endif /* atomic64_fetch_and_relaxed */ -#ifdef atomic64_andnot -/* atomic64_fetch_andnot_relaxed */ +#ifndef atomic64_andnot +#define atomic64_andnot(i, v) atomic64_and(~(long long)(i), (v)) +#endif + #ifndef atomic64_fetch_andnot_relaxed -#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot -#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot -#define atomic64_fetch_andnot_release atomic64_fetch_andnot + +#ifndef atomic64_fetch_andnot +#define atomic64_fetch_andnot(i, v) atomic64_fetch_and(~(long long)(i), (v)) +#define atomic64_fetch_andnot_relaxed(i, v) atomic64_fetch_and_relaxed(~(long long)(i), (v)) +#define atomic64_fetch_andnot_acquire(i, v) atomic64_fetch_and_acquire(~(long long)(i), (v)) +#define atomic64_fetch_andnot_release(i, v) atomic64_fetch_and_release(~(long long)(i), (v)) +#else /* atomic64_fetch_andnot */ +#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot +#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot +#define atomic64_fetch_andnot_release atomic64_fetch_andnot +#endif /* atomic64_fetch_andnot */ #else /* atomic64_fetch_andnot_relaxed */ @@ -1053,7 +1045,6 @@ static inline int atomic_dec_if_positive(atomic_t *v) __atomic_op_fence(atomic64_fetch_andnot, __VA_ARGS__) #endif #endif /* atomic64_fetch_andnot_relaxed */ -#endif /* atomic64_andnot */ /* atomic64_fetch_xor_relaxed */ #ifndef atomic64_fetch_xor_relaxed @@ -1262,33 +1253,6 @@ static inline bool atomic64_add_negative(long long i, atomic64_t *v) } #endif -#ifndef atomic64_andnot -static inline void atomic64_andnot(long long i, atomic64_t *v) -{ - atomic64_and(~i, v); -} - -static inline long long atomic64_fetch_andnot(long long i, atomic64_t *v) -{ - return atomic64_fetch_and(~i, v); -} - -static inline long long atomic64_fetch_andnot_relaxed(long long i, atomic64_t *v) -{ - return atomic64_fetch_and_relaxed(~i, v); -} - -static inline long long atomic64_fetch_andnot_acquire(long long i, atomic64_t *v) -{ - return atomic64_fetch_and_acquire(~i, v); -} - -static inline long long atomic64_fetch_andnot_release(long long i, atomic64_t *v) -{ - return atomic64_fetch_and_release(~i, v); -} -#endif - #ifndef atomic64_inc_unless_negative static inline bool atomic64_inc_unless_negative(atomic64_t *v) { -- cgit v1.2.3 From 75a040ff14d9a99fc041f5e1d8f09541cab13ba4 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 1 Apr 2018 01:00:36 +0300 Subject: locking/refcounts: Include fewer headers in Debloat 's dependencies: - is not needed, but is. - is not needed, only a forward declaration of "struct mutex". - is not needed, is enough. Signed-off-by: Alexey Dobriyan Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: https://lkml.kernel.org/lkml/20180331220036.GA7676@avx2 Signed-off-by: Ingo Molnar --- include/linux/refcount.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 4193c41e383a..c36addd27dd5 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -3,9 +3,10 @@ #define _LINUX_REFCOUNT_H #include -#include -#include -#include +#include +#include + +struct mutex; /** * struct refcount_t - variant of atomic_t specialized for reference counts -- cgit v1.2.3 From afed7bcf9487bb28e2e2b016a195085c07416c0b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 10:36:07 +0100 Subject: locking/refcount: Always allow checked forms In many cases, it would be useful to be able to use the full sanity-checked refcount helpers regardless of CONFIG_REFCOUNT_FULL, as this would help to avoid duplicate warnings where callers try to sanity-check refcount manipulation. This patch refactors things such that the full refcount helpers were always built, as refcount_${op}_checked(), such that they can be used regardless of CONFIG_REFCOUNT_FULL. This will allow code which *always* wants a checked refcount to opt-in, avoiding the need to duplicate the logic for warnings. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: David Sterba Acked-by: Kees Cook Acked-by: Will Deacon Cc: Boqun Feng Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180711093607.1644-1-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/refcount.h | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index c36addd27dd5..53c5eca24d83 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -43,17 +43,30 @@ static inline unsigned int refcount_read(const refcount_t *r) return atomic_read(&r->refs); } +extern __must_check bool refcount_add_not_zero_checked(unsigned int i, refcount_t *r); +extern void refcount_add_checked(unsigned int i, refcount_t *r); + +extern __must_check bool refcount_inc_not_zero_checked(refcount_t *r); +extern void refcount_inc_checked(refcount_t *r); + +extern __must_check bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r); + +extern __must_check bool refcount_dec_and_test_checked(refcount_t *r); +extern void refcount_dec_checked(refcount_t *r); + #ifdef CONFIG_REFCOUNT_FULL -extern __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r); -extern void refcount_add(unsigned int i, refcount_t *r); -extern __must_check bool refcount_inc_not_zero(refcount_t *r); -extern void refcount_inc(refcount_t *r); +#define refcount_add_not_zero refcount_add_not_zero_checked +#define refcount_add refcount_add_checked + +#define refcount_inc_not_zero refcount_inc_not_zero_checked +#define refcount_inc refcount_inc_checked + +#define refcount_sub_and_test refcount_sub_and_test_checked -extern __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r); +#define refcount_dec_and_test refcount_dec_and_test_checked +#define refcount_dec refcount_dec_checked -extern __must_check bool refcount_dec_and_test(refcount_t *r); -extern void refcount_dec(refcount_t *r); #else # ifdef CONFIG_ARCH_HAS_REFCOUNT # include -- cgit v1.2.3 From 3d85b2703783636366560c94842affd8608ec9d1 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Mon, 16 Jul 2018 11:06:02 -0700 Subject: locking/spinlock, sched/core: Clarify requirements for smp_mb__after_spinlock() There are 11 interpretations of the requirements described in the header comment for smp_mb__after_spinlock(): one for each LKMM maintainer, and one currently encoded in the Cat file. Stick to the latter (until a more satisfactory solution is available). This also reworks some snippets related to the barrier to illustrate the requirements and to link them to the idioms which are relied upon at its call sites. Suggested-by: Boqun Feng Signed-off-by: Andrea Parri Signed-off-by: Paul E. McKenney Acked-by: Peter Zijlstra Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Will Deacon Cc: akiyks@gmail.com Cc: dhowells@redhat.com Cc: j.alglave@ucl.ac.uk Cc: linux-arch@vger.kernel.org Cc: luc.maranget@inria.fr Cc: npiggin@gmail.com Cc: parri.andrea@gmail.com Cc: stern@rowland.harvard.edu Link: http://lkml.kernel.org/r/20180716180605.16115-11-paulmck@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/spinlock.h | 53 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index fd57888d4942..3190997df9ca 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -114,29 +114,48 @@ do { \ #endif /*arch_spin_is_contended*/ /* - * This barrier must provide two things: + * smp_mb__after_spinlock() provides the equivalent of a full memory barrier + * between program-order earlier lock acquisitions and program-order later + * memory accesses. * - * - it must guarantee a STORE before the spin_lock() is ordered against a - * LOAD after it, see the comments at its two usage sites. + * This guarantees that the following two properties hold: * - * - it must ensure the critical section is RCsc. + * 1) Given the snippet: * - * The latter is important for cases where we observe values written by other - * CPUs in spin-loops, without barriers, while being subject to scheduling. + * { X = 0; Y = 0; } * - * CPU0 CPU1 CPU2 + * CPU0 CPU1 * - * for (;;) { - * if (READ_ONCE(X)) - * break; - * } - * X=1 - * - * - * r = X; + * WRITE_ONCE(X, 1); WRITE_ONCE(Y, 1); + * spin_lock(S); smp_mb(); + * smp_mb__after_spinlock(); r1 = READ_ONCE(X); + * r0 = READ_ONCE(Y); + * spin_unlock(S); * - * without transitivity it could be that CPU1 observes X!=0 breaks the loop, - * we get migrated and CPU2 sees X==0. + * it is forbidden that CPU0 does not observe CPU1's store to Y (r0 = 0) + * and CPU1 does not observe CPU0's store to X (r1 = 0); see the comments + * preceding the call to smp_mb__after_spinlock() in __schedule() and in + * try_to_wake_up(). + * + * 2) Given the snippet: + * + * { X = 0; Y = 0; } + * + * CPU0 CPU1 CPU2 + * + * spin_lock(S); spin_lock(S); r1 = READ_ONCE(Y); + * WRITE_ONCE(X, 1); smp_mb__after_spinlock(); smp_rmb(); + * spin_unlock(S); r0 = READ_ONCE(X); r2 = READ_ONCE(X); + * WRITE_ONCE(Y, 1); + * spin_unlock(S); + * + * it is forbidden that CPU0's critical section executes before CPU1's + * critical section (r0 = 1), CPU2 observes CPU1's store to Y (r1 = 1) + * and CPU2 does not observe CPU0's store to X (r2 = 0); see the comments + * preceding the calls to smp_rmb() in try_to_wake_up() for similar + * snippets but "projected" onto two CPUs. + * + * Property (2) upgrades the lock to an RCsc lock. * * Since most load-store architectures implement ACQUIRE with an smp_mb() after * the LL/SC loop, they need no further barriers. Similarly all our TSO -- cgit v1.2.3 From 7696f9910a9a40b8a952f57d3428515fabd2d889 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Mon, 16 Jul 2018 11:06:03 -0700 Subject: sched/Documentation: Update wake_up() & co. memory-barrier guarantees Both the implementation and the users' expectation [1] for the various wakeup primitives have evolved over time, but the documentation has not kept up with these changes: brings it into 2018. [1] http://lkml.kernel.org/r/20180424091510.GB4064@hirez.programming.kicks-ass.net Also applied feedback from Alan Stern. Suggested-by: Peter Zijlstra Signed-off-by: Andrea Parri Signed-off-by: Paul E. McKenney Acked-by: Peter Zijlstra (Intel) Cc: Akira Yokosawa Cc: Alan Stern Cc: Boqun Feng Cc: Daniel Lustig Cc: David Howells Cc: Jade Alglave Cc: Jonathan Corbet Cc: Linus Torvalds Cc: Luc Maranget Cc: Nicholas Piggin Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arch@vger.kernel.org Cc: parri.andrea@gmail.com Link: http://lkml.kernel.org/r/20180716180605.16115-12-paulmck@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 43731fe51c97..05cd419f962d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -167,8 +167,8 @@ struct task_group; * need_sleep = false; * wake_up_state(p, TASK_UNINTERRUPTIBLE); * - * Where wake_up_state() (and all other wakeup primitives) imply enough - * barriers to order the store of the variable against wakeup. + * where wake_up_state() executes a full memory barrier before accessing the + * task state. * * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is, * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a -- cgit v1.2.3 From df79ed2c064363cdc7d2d896923c1885d4e30520 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 16 Jul 2018 12:30:08 +0100 Subject: locking/atomics: Simplify cmpxchg() instrumentation Currently we define some fairly verbose wrappers for the cmpxchg() family so that we can pass a pointer and size into kasan_check_write(). The wrappers duplicate the size-switching logic necessary in arch code, and only work for scalar types. On some architectures, (cmp)xchg are used on non-scalar types, and thus the instrumented wrappers need to be able to handle this. We could take the type-punning logic from {READ,WRITE}_ONCE(), but this makes the wrappers even more verbose, and requires several local variables in the macros. Instead, let's simplify the wrappers into simple macros which: * snapshot the pointer into a single local variable, called __ai_ptr to avoid conflicts with variables in the scope of the caller. * call kasan_check_write() on __ai_ptr. * invoke the relevant arch_*() function, passing the original arguments, bar __ai_ptr being substituted for ptr. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Boqun Feng Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: andy.shevchenko@gmail.com Cc: arnd@arndb.de Cc: aryabinin@virtuozzo.com Cc: catalin.marinas@arm.com Cc: glider@google.com Cc: linux-arm-kernel@lists.infradead.org Cc: parri.andrea@gmail.com Cc: peter@hurleysoftware.com Link: http://lkml.kernel.org/r/20180716113017.3909-4-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/atomic-instrumented.h | 100 +++++------------------------- 1 file changed, 15 insertions(+), 85 deletions(-) (limited to 'include') diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h index 3c64e95d5ed0..c7c3e4cdd942 100644 --- a/include/asm-generic/atomic-instrumented.h +++ b/include/asm-generic/atomic-instrumented.h @@ -408,109 +408,39 @@ static __always_inline bool atomic64_add_negative(s64 i, atomic64_t *v) } #endif -static __always_inline unsigned long -cmpxchg_size(volatile void *ptr, unsigned long old, unsigned long new, int size) -{ - kasan_check_write(ptr, size); - switch (size) { - case 1: - return arch_cmpxchg((u8 *)ptr, (u8)old, (u8)new); - case 2: - return arch_cmpxchg((u16 *)ptr, (u16)old, (u16)new); - case 4: - return arch_cmpxchg((u32 *)ptr, (u32)old, (u32)new); - case 8: - BUILD_BUG_ON(sizeof(unsigned long) != 8); - return arch_cmpxchg((u64 *)ptr, (u64)old, (u64)new); - } - BUILD_BUG(); - return 0; -} - #define cmpxchg(ptr, old, new) \ ({ \ - ((__typeof__(*(ptr)))cmpxchg_size((ptr), (unsigned long)(old), \ - (unsigned long)(new), sizeof(*(ptr)))); \ + typeof(ptr) __ai_ptr = (ptr); \ + kasan_check_write(__ai_ptr, sizeof(*__ai_ptr)); \ + arch_cmpxchg(__ai_ptr, (old), (new)); \ }) -static __always_inline unsigned long -sync_cmpxchg_size(volatile void *ptr, unsigned long old, unsigned long new, - int size) -{ - kasan_check_write(ptr, size); - switch (size) { - case 1: - return arch_sync_cmpxchg((u8 *)ptr, (u8)old, (u8)new); - case 2: - return arch_sync_cmpxchg((u16 *)ptr, (u16)old, (u16)new); - case 4: - return arch_sync_cmpxchg((u32 *)ptr, (u32)old, (u32)new); - case 8: - BUILD_BUG_ON(sizeof(unsigned long) != 8); - return arch_sync_cmpxchg((u64 *)ptr, (u64)old, (u64)new); - } - BUILD_BUG(); - return 0; -} - #define sync_cmpxchg(ptr, old, new) \ ({ \ - ((__typeof__(*(ptr)))sync_cmpxchg_size((ptr), \ - (unsigned long)(old), (unsigned long)(new), \ - sizeof(*(ptr)))); \ + typeof(ptr) __ai_ptr = (ptr); \ + kasan_check_write(__ai_ptr, sizeof(*__ai_ptr)); \ + arch_sync_cmpxchg(__ai_ptr, (old), (new)); \ }) -static __always_inline unsigned long -cmpxchg_local_size(volatile void *ptr, unsigned long old, unsigned long new, - int size) -{ - kasan_check_write(ptr, size); - switch (size) { - case 1: - return arch_cmpxchg_local((u8 *)ptr, (u8)old, (u8)new); - case 2: - return arch_cmpxchg_local((u16 *)ptr, (u16)old, (u16)new); - case 4: - return arch_cmpxchg_local((u32 *)ptr, (u32)old, (u32)new); - case 8: - BUILD_BUG_ON(sizeof(unsigned long) != 8); - return arch_cmpxchg_local((u64 *)ptr, (u64)old, (u64)new); - } - BUILD_BUG(); - return 0; -} - #define cmpxchg_local(ptr, old, new) \ ({ \ - ((__typeof__(*(ptr)))cmpxchg_local_size((ptr), \ - (unsigned long)(old), (unsigned long)(new), \ - sizeof(*(ptr)))); \ + typeof(ptr) __ai_ptr = (ptr); \ + kasan_check_write(__ai_ptr, sizeof(*__ai_ptr)); \ + arch_cmpxchg_local(__ai_ptr, (old), (new)); \ }) -static __always_inline u64 -cmpxchg64_size(volatile u64 *ptr, u64 old, u64 new) -{ - kasan_check_write(ptr, sizeof(*ptr)); - return arch_cmpxchg64(ptr, old, new); -} - #define cmpxchg64(ptr, old, new) \ ({ \ - ((__typeof__(*(ptr)))cmpxchg64_size((ptr), (u64)(old), \ - (u64)(new))); \ + typeof(ptr) __ai_ptr = (ptr); \ + kasan_check_write(__ai_ptr, sizeof(*__ai_ptr)); \ + arch_cmpxchg64(__ai_ptr, (old), (new)); \ }) -static __always_inline u64 -cmpxchg64_local_size(volatile u64 *ptr, u64 old, u64 new) -{ - kasan_check_write(ptr, sizeof(*ptr)); - return arch_cmpxchg64_local(ptr, old, new); -} - #define cmpxchg64_local(ptr, old, new) \ ({ \ - ((__typeof__(*(ptr)))cmpxchg64_local_size((ptr), (u64)(old), \ - (u64)(new))); \ + typeof(ptr) __ai_ptr = (ptr); \ + kasan_check_write(__ai_ptr, sizeof(*__ai_ptr)); \ + arch_cmpxchg64_local(__ai_ptr, (old), (new)); \ }) /* -- cgit v1.2.3 From f9881cc43b118efc6f82fef2d121166113ee9f8e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 16 Jul 2018 12:30:09 +0100 Subject: locking/atomics: Instrument xchg() While we instrument all of the (non-relaxed) atomic_*() functions and cmpxchg(), we missed xchg(). Let's add instrumentation for xchg(), fixing up x86 to implement arch_xchg(). Signed-off-by: Mark Rutland Acked-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Boqun Feng Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: andy.shevchenko@gmail.com Cc: arnd@arndb.de Cc: aryabinin@virtuozzo.com Cc: catalin.marinas@arm.com Cc: glider@google.com Cc: linux-arm-kernel@lists.infradead.org Cc: parri.andrea@gmail.com Cc: peter@hurleysoftware.com Link: http://lkml.kernel.org/r/20180716113017.3909-5-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/atomic-instrumented.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h index c7c3e4cdd942..53481b6eacdf 100644 --- a/include/asm-generic/atomic-instrumented.h +++ b/include/asm-generic/atomic-instrumented.h @@ -408,6 +408,13 @@ static __always_inline bool atomic64_add_negative(s64 i, atomic64_t *v) } #endif +#define xchg(ptr, new) \ +({ \ + typeof(ptr) __ai_ptr = (ptr); \ + kasan_check_write(__ai_ptr, sizeof(*__ai_ptr)); \ + arch_xchg(__ai_ptr, (new)); \ +}) + #define cmpxchg(ptr, old, new) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ -- cgit v1.2.3 From 4d2b25f630c731218d04f72580b4de68cb7a6e00 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 16 Jul 2018 12:30:10 +0100 Subject: locking/atomics: Instrument cmpxchg_double*() We currently don't instrument cmpxchg_double() and cmpxchg_double_local() due to compilation issues reported in the past, which are supposedly related to GCC bug 72873 [1], reported when GCC 7 was not yet released. This bug only applies to x86-64, and does not apply to other architectures. While the test case for GCC bug 72873 triggers issues with released versions of GCC, the instrumented kernel code compiles fine for all configurations I have tried, and it is unclear how the two cases are/were related. As we can't reproduce the kernel build failures, let's instrument cmpxchg_double*() again. We can revisit the issue if build failures reappear. Signed-off-by: Mark Rutland Acked-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Arnd Bergmann Cc: Boqun Feng Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: andy.shevchenko@gmail.com Cc: aryabinin@virtuozzo.com Cc: catalin.marinas@arm.com Cc: glider@google.com Cc: linux-arm-kernel@lists.infradead.org Cc: parri.andrea@gmail.com Cc: peter@hurleysoftware.com Link: http://lkml.kernel.org/r/20180716113017.3909-6-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/atomic-instrumented.h | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h index 53481b6eacdf..0d4b1d3dbc1e 100644 --- a/include/asm-generic/atomic-instrumented.h +++ b/include/asm-generic/atomic-instrumented.h @@ -450,23 +450,18 @@ static __always_inline bool atomic64_add_negative(s64 i, atomic64_t *v) arch_cmpxchg64_local(__ai_ptr, (old), (new)); \ }) -/* - * Originally we had the following code here: - * __typeof__(p1) ____p1 = (p1); - * kasan_check_write(____p1, 2 * sizeof(*____p1)); - * arch_cmpxchg_double(____p1, (p2), (o1), (o2), (n1), (n2)); - * But it leads to compilation failures (see gcc issue 72873). - * So for now it's left non-instrumented. - * There are few callers of cmpxchg_double(), so it's not critical. - */ #define cmpxchg_double(p1, p2, o1, o2, n1, n2) \ ({ \ - arch_cmpxchg_double((p1), (p2), (o1), (o2), (n1), (n2)); \ + typeof(p1) __ai_p1 = (p1); \ + kasan_check_write(__ai_p1, 2 * sizeof(*__ai_p1)); \ + arch_cmpxchg_double(__ai_p1, (p2), (o1), (o2), (n1), (n2)); \ }) -#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \ -({ \ - arch_cmpxchg_double_local((p1), (p2), (o1), (o2), (n1), (n2)); \ +#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \ +({ \ + typeof(p1) __ai_p1 = (p1); \ + kasan_check_write(__ai_p1, 2 * sizeof(*__ai_p1)); \ + arch_cmpxchg_double_local(__ai_p1, (p2), (o1), (o2), (n1), (n2)); \ }) #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */ -- cgit v1.2.3 From fd2efaa4eb5317c3a86357a83a7d456a1b86a0ac Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 16 Jul 2018 12:30:11 +0100 Subject: locking/atomics: Rework ordering barriers Currently architectures can override __atomic_op_*() to define the barriers used before/after a relaxed atomic when used to build acquire/release/fence variants. This has the unfortunate property of requiring the architecture to define the full wrapper for the atomics, rather than just the barriers they care about, and gets in the way of generating atomics which can be easily read. Instead, this patch has architectures define an optional set of barriers: * __atomic_acquire_fence() * __atomic_release_fence() * __atomic_pre_full_fence() * __atomic_post_full_fence() ... which uses to build the wrappers. It would be nice if we could undef these, along with the __atomic_op_*() wrappers, but that would break the cmpxchg() wrappers, which are written in preprocessor. Undefs would have been nice, but alas. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Andrea Parri Cc: Boqun Feng Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: andy.shevchenko@gmail.com Cc: arnd@arndb.de Cc: aryabinin@virtuozzo.com Cc: catalin.marinas@arm.com Cc: dvyukov@google.com Cc: glider@google.com Cc: linux-arm-kernel@lists.infradead.org Cc: peter@hurleysoftware.com Link: http://lkml.kernel.org/r/20180716113017.3909-7-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 8e04f1f69bd9..1e8e88bdaf09 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -38,40 +38,46 @@ * barriers on top of the relaxed variant. In the case where the relaxed * variant is already fully ordered, no additional barriers are needed. * - * Besides, if an arch has a special barrier for acquire/release, it could - * implement its own __atomic_op_* and use the same framework for building - * variants - * - * If an architecture overrides __atomic_op_acquire() it will probably want - * to define smp_mb__after_spinlock(). + * If an architecture overrides __atomic_acquire_fence() it will probably + * want to define smp_mb__after_spinlock(). */ -#ifndef __atomic_op_acquire +#ifndef __atomic_acquire_fence +#define __atomic_acquire_fence smp_mb__after_atomic +#endif + +#ifndef __atomic_release_fence +#define __atomic_release_fence smp_mb__before_atomic +#endif + +#ifndef __atomic_pre_full_fence +#define __atomic_pre_full_fence smp_mb__before_atomic +#endif + +#ifndef __atomic_post_full_fence +#define __atomic_post_full_fence smp_mb__after_atomic +#endif + #define __atomic_op_acquire(op, args...) \ ({ \ typeof(op##_relaxed(args)) __ret = op##_relaxed(args); \ - smp_mb__after_atomic(); \ + __atomic_acquire_fence(); \ __ret; \ }) -#endif -#ifndef __atomic_op_release #define __atomic_op_release(op, args...) \ ({ \ - smp_mb__before_atomic(); \ + __atomic_release_fence(); \ op##_relaxed(args); \ }) -#endif -#ifndef __atomic_op_fence #define __atomic_op_fence(op, args...) \ ({ \ typeof(op##_relaxed(args)) __ret; \ - smp_mb__before_atomic(); \ + __atomic_pre_full_fence(); \ __ret = op##_relaxed(args); \ - smp_mb__after_atomic(); \ + __atomic_post_full_fence(); \ __ret; \ }) -#endif /* atomic_add_return_relaxed */ #ifndef atomic_add_return_relaxed -- cgit v1.2.3