From c5c0ba953b8c969c5d51bf1c57f239866a97c47c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 31 May 2023 15:08:38 +0200 Subject: percpu: Add {raw,this}_cpu_try_cmpxchg() Add the try_cmpxchg() form to the per-cpu ops. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Mark Rutland Tested-by: Mark Rutland Link: https://lore.kernel.org/r/20230531132323.587480729@infradead.org --- include/asm-generic/percpu.h | 113 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 109 insertions(+), 4 deletions(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 6432a7fade91..96af32c283b2 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -89,16 +89,37 @@ do { \ __ret; \ }) -#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \ +#define __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, _cmpxchg) \ +({ \ + typeof(pcp) __val, __old = *(ovalp); \ + __val = _cmpxchg(pcp, __old, nval); \ + if (__val != __old) \ + *(ovalp) = __val; \ + __val == __old; \ +}) + +#define raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval) \ ({ \ typeof(pcp) *__p = raw_cpu_ptr(&(pcp)); \ - typeof(pcp) __ret; \ - __ret = *__p; \ - if (__ret == (oval)) \ + typeof(pcp) __val = *__p, __old = *(ovalp); \ + bool __ret; \ + if (__val == __old) { \ *__p = nval; \ + __ret = true; \ + } else { \ + *(ovalp) = __val; \ + __ret = false; \ + } \ __ret; \ }) +#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \ +({ \ + typeof(pcp) __old = (oval); \ + raw_cpu_generic_try_cmpxchg(pcp, &__old, nval); \ + __old; \ +}) + #define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ ({ \ typeof(pcp1) *__p1 = raw_cpu_ptr(&(pcp1)); \ @@ -170,6 +191,16 @@ do { \ __ret; \ }) +#define this_cpu_generic_try_cmpxchg(pcp, ovalp, nval) \ +({ \ + bool __ret; \ + unsigned long __flags; \ + raw_local_irq_save(__flags); \ + __ret = raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval); \ + raw_local_irq_restore(__flags); \ + __ret; \ +}) + #define this_cpu_generic_cmpxchg(pcp, oval, nval) \ ({ \ typeof(pcp) __ret; \ @@ -282,6 +313,43 @@ do { \ #define raw_cpu_xchg_8(pcp, nval) raw_cpu_generic_xchg(pcp, nval) #endif +#ifndef raw_cpu_try_cmpxchg_1 +#ifdef raw_cpu_cmpxchg_1 +#define raw_cpu_try_cmpxchg_1(pcp, ovalp, nval) \ + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_1) +#else +#define raw_cpu_try_cmpxchg_1(pcp, ovalp, nval) \ + raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif +#endif +#ifndef raw_cpu_try_cmpxchg_2 +#ifdef raw_cpu_cmpxchg_2 +#define raw_cpu_try_cmpxchg_2(pcp, ovalp, nval) \ + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_2) +#else +#define raw_cpu_try_cmpxchg_2(pcp, ovalp, nval) \ + raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif +#endif +#ifndef raw_cpu_try_cmpxchg_4 +#ifdef raw_cpu_cmpxchg_4 +#define raw_cpu_try_cmpxchg_4(pcp, ovalp, nval) \ + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_4) +#else +#define raw_cpu_try_cmpxchg_4(pcp, ovalp, nval) \ + raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif +#endif +#ifndef raw_cpu_try_cmpxchg_8 +#ifdef raw_cpu_cmpxchg_8 +#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) \ + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg_8) +#else +#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) \ + raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif +#endif + #ifndef raw_cpu_cmpxchg_1 #define raw_cpu_cmpxchg_1(pcp, oval, nval) \ raw_cpu_generic_cmpxchg(pcp, oval, nval) @@ -407,6 +475,43 @@ do { \ #define this_cpu_xchg_8(pcp, nval) this_cpu_generic_xchg(pcp, nval) #endif +#ifndef this_cpu_try_cmpxchg_1 +#ifdef this_cpu_cmpxchg_1 +#define this_cpu_try_cmpxchg_1(pcp, ovalp, nval) \ + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_1) +#else +#define this_cpu_try_cmpxchg_1(pcp, ovalp, nval) \ + this_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif +#endif +#ifndef this_cpu_try_cmpxchg_2 +#ifdef this_cpu_cmpxchg_2 +#define this_cpu_try_cmpxchg_2(pcp, ovalp, nval) \ + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_2) +#else +#define this_cpu_try_cmpxchg_2(pcp, ovalp, nval) \ + this_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif +#endif +#ifndef this_cpu_try_cmpxchg_4 +#ifdef this_cpu_cmpxchg_4 +#define this_cpu_try_cmpxchg_4(pcp, ovalp, nval) \ + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_4) +#else +#define this_cpu_try_cmpxchg_4(pcp, ovalp, nval) \ + this_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif +#endif +#ifndef this_cpu_try_cmpxchg_8 +#ifdef this_cpu_cmpxchg_8 +#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) \ + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg_8) +#else +#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) \ + this_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif +#endif + #ifndef this_cpu_cmpxchg_1 #define this_cpu_cmpxchg_1(pcp, oval, nval) \ this_cpu_generic_cmpxchg(pcp, oval, nval) -- cgit v1.2.3 From 6d12c8d308e68b9b0fa98ca2df4f83db4b4c965d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 31 May 2023 15:08:39 +0200 Subject: percpu: Wire up cmpxchg128 In order to replace cmpxchg_double() with the newly minted cmpxchg128() family of functions, wire it up in this_cpu_cmpxchg(). Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Mark Rutland Tested-by: Mark Rutland Link: https://lore.kernel.org/r/20230531132323.654945124@infradead.org --- include/asm-generic/percpu.h | 56 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'include/asm-generic') diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 96af32c283b2..5c66e4496289 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -350,6 +350,25 @@ do { \ #endif #endif +#ifndef raw_cpu_try_cmpxchg64 +#ifdef raw_cpu_cmpxchg64 +#define raw_cpu_try_cmpxchg64(pcp, ovalp, nval) \ + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg64) +#else +#define raw_cpu_try_cmpxchg64(pcp, ovalp, nval) \ + raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif +#endif +#ifndef raw_cpu_try_cmpxchg128 +#ifdef raw_cpu_cmpxchg128 +#define raw_cpu_try_cmpxchg128(pcp, ovalp, nval) \ + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, raw_cpu_cmpxchg128) +#else +#define raw_cpu_try_cmpxchg128(pcp, ovalp, nval) \ + raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif +#endif + #ifndef raw_cpu_cmpxchg_1 #define raw_cpu_cmpxchg_1(pcp, oval, nval) \ raw_cpu_generic_cmpxchg(pcp, oval, nval) @@ -367,6 +386,15 @@ do { \ raw_cpu_generic_cmpxchg(pcp, oval, nval) #endif +#ifndef raw_cpu_cmpxchg64 +#define raw_cpu_cmpxchg64(pcp, oval, nval) \ + raw_cpu_generic_cmpxchg(pcp, oval, nval) +#endif +#ifndef raw_cpu_cmpxchg128 +#define raw_cpu_cmpxchg128(pcp, oval, nval) \ + raw_cpu_generic_cmpxchg(pcp, oval, nval) +#endif + #ifndef raw_cpu_cmpxchg_double_1 #define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) @@ -512,6 +540,25 @@ do { \ #endif #endif +#ifndef this_cpu_try_cmpxchg64 +#ifdef this_cpu_cmpxchg64 +#define this_cpu_try_cmpxchg64(pcp, ovalp, nval) \ + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg64) +#else +#define this_cpu_try_cmpxchg64(pcp, ovalp, nval) \ + this_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif +#endif +#ifndef this_cpu_try_cmpxchg128 +#ifdef this_cpu_cmpxchg128 +#define this_cpu_try_cmpxchg128(pcp, ovalp, nval) \ + __cpu_fallback_try_cmpxchg(pcp, ovalp, nval, this_cpu_cmpxchg128) +#else +#define this_cpu_try_cmpxchg128(pcp, ovalp, nval) \ + this_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif +#endif + #ifndef this_cpu_cmpxchg_1 #define this_cpu_cmpxchg_1(pcp, oval, nval) \ this_cpu_generic_cmpxchg(pcp, oval, nval) @@ -529,6 +576,15 @@ do { \ this_cpu_generic_cmpxchg(pcp, oval, nval) #endif +#ifndef this_cpu_cmpxchg64 +#define this_cpu_cmpxchg64(pcp, oval, nval) \ + this_cpu_generic_cmpxchg(pcp, oval, nval) +#endif +#ifndef this_cpu_cmpxchg128 +#define this_cpu_cmpxchg128(pcp, oval, nval) \ + this_cpu_generic_cmpxchg(pcp, oval, nval) +#endif + #ifndef this_cpu_cmpxchg_double_1 #define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -- cgit v1.2.3 From febe950dbfb464799beb0339cc6fb10699f4a5da Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 31 May 2023 15:08:44 +0200 Subject: arch: Remove cmpxchg_double No moar users, remove the monster. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Arnd Bergmann Reviewed-by: Mark Rutland Acked-by: Heiko Carstens Tested-by: Mark Rutland Link: https://lore.kernel.org/r/20230531132323.991907085@infradead.org --- include/asm-generic/percpu.h | 58 -------------------------------------------- 1 file changed, 58 deletions(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 5c66e4496289..68c410e85cd7 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -120,19 +120,6 @@ do { \ __old; \ }) -#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ -({ \ - typeof(pcp1) *__p1 = raw_cpu_ptr(&(pcp1)); \ - typeof(pcp2) *__p2 = raw_cpu_ptr(&(pcp2)); \ - int __ret = 0; \ - if (*__p1 == (oval1) && *__p2 == (oval2)) { \ - *__p1 = nval1; \ - *__p2 = nval2; \ - __ret = 1; \ - } \ - (__ret); \ -}) - #define __this_cpu_generic_read_nopreempt(pcp) \ ({ \ typeof(pcp) ___ret; \ @@ -211,17 +198,6 @@ do { \ __ret; \ }) -#define this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ -({ \ - int __ret; \ - unsigned long __flags; \ - raw_local_irq_save(__flags); \ - __ret = raw_cpu_generic_cmpxchg_double(pcp1, pcp2, \ - oval1, oval2, nval1, nval2); \ - raw_local_irq_restore(__flags); \ - __ret; \ -}) - #ifndef raw_cpu_read_1 #define raw_cpu_read_1(pcp) raw_cpu_generic_read(pcp) #endif @@ -395,23 +371,6 @@ do { \ raw_cpu_generic_cmpxchg(pcp, oval, nval) #endif -#ifndef raw_cpu_cmpxchg_double_1 -#define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -#endif -#ifndef raw_cpu_cmpxchg_double_2 -#define raw_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -#endif -#ifndef raw_cpu_cmpxchg_double_4 -#define raw_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -#endif -#ifndef raw_cpu_cmpxchg_double_8 -#define raw_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -#endif - #ifndef this_cpu_read_1 #define this_cpu_read_1(pcp) this_cpu_generic_read(pcp) #endif @@ -585,21 +544,4 @@ do { \ this_cpu_generic_cmpxchg(pcp, oval, nval) #endif -#ifndef this_cpu_cmpxchg_double_1 -#define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -#endif -#ifndef this_cpu_cmpxchg_double_2 -#define this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -#endif -#ifndef this_cpu_cmpxchg_double_4 -#define this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -#endif -#ifndef this_cpu_cmpxchg_double_8 -#define this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -#endif - #endif /* _ASM_GENERIC_PERCPU_H_ */ -- cgit v1.2.3 From d12157efc8e083c77d054675fcdd594f54cc7e2b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 5 Jun 2023 08:01:01 +0100 Subject: locking/atomic: make atomic*_{cmp,}xchg optional Most architectures define the atomic/atomic64 xchg and cmpxchg operations in terms of arch_xchg and arch_cmpxchg respectfully. Add fallbacks for these cases and remove the trivial cases from arch code. On some architectures the existing definitions are kept as these are used to build other arch_atomic*() operations. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20230605070124.3741859-5-mark.rutland@arm.com --- include/asm-generic/atomic.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index e271d6708c87..22142c71d35a 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -130,7 +130,4 @@ ATOMIC_OP(xor, ^) #define arch_atomic_read(v) READ_ONCE((v)->counter) #define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) -#define arch_atomic_xchg(ptr, v) (arch_xchg(&(ptr)->counter, (u32)(v))) -#define arch_atomic_cmpxchg(v, old, new) (arch_cmpxchg(&((v)->counter), (u32)(old), (u32)(new))) - #endif /* __ASM_GENERIC_ATOMIC_H */ -- cgit v1.2.3 From 0f613bfa8268a89be25f2b6b58fc6fe8ccd9a2ba Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 5 Jun 2023 08:01:15 +0100 Subject: locking/atomic: treewide: use raw_atomic*_() Now that we have raw_atomic*_() definitions, there's no need to use arch_atomic*_() definitions outside of the low-level atomic definitions. Move treewide users of arch_atomic*_() over to the equivalent raw_atomic*_(). There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20230605070124.3741859-19-mark.rutland@arm.com --- include/asm-generic/bitops/atomic.h | 12 ++++++------ include/asm-generic/bitops/lock.h | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h index 71ab4ba9c25d..e076e079f6b2 100644 --- a/include/asm-generic/bitops/atomic.h +++ b/include/asm-generic/bitops/atomic.h @@ -15,21 +15,21 @@ static __always_inline void arch_set_bit(unsigned int nr, volatile unsigned long *p) { p += BIT_WORD(nr); - arch_atomic_long_or(BIT_MASK(nr), (atomic_long_t *)p); + raw_atomic_long_or(BIT_MASK(nr), (atomic_long_t *)p); } static __always_inline void arch_clear_bit(unsigned int nr, volatile unsigned long *p) { p += BIT_WORD(nr); - arch_atomic_long_andnot(BIT_MASK(nr), (atomic_long_t *)p); + raw_atomic_long_andnot(BIT_MASK(nr), (atomic_long_t *)p); } static __always_inline void arch_change_bit(unsigned int nr, volatile unsigned long *p) { p += BIT_WORD(nr); - arch_atomic_long_xor(BIT_MASK(nr), (atomic_long_t *)p); + raw_atomic_long_xor(BIT_MASK(nr), (atomic_long_t *)p); } static __always_inline int @@ -39,7 +39,7 @@ arch_test_and_set_bit(unsigned int nr, volatile unsigned long *p) unsigned long mask = BIT_MASK(nr); p += BIT_WORD(nr); - old = arch_atomic_long_fetch_or(mask, (atomic_long_t *)p); + old = raw_atomic_long_fetch_or(mask, (atomic_long_t *)p); return !!(old & mask); } @@ -50,7 +50,7 @@ arch_test_and_clear_bit(unsigned int nr, volatile unsigned long *p) unsigned long mask = BIT_MASK(nr); p += BIT_WORD(nr); - old = arch_atomic_long_fetch_andnot(mask, (atomic_long_t *)p); + old = raw_atomic_long_fetch_andnot(mask, (atomic_long_t *)p); return !!(old & mask); } @@ -61,7 +61,7 @@ arch_test_and_change_bit(unsigned int nr, volatile unsigned long *p) unsigned long mask = BIT_MASK(nr); p += BIT_WORD(nr); - old = arch_atomic_long_fetch_xor(mask, (atomic_long_t *)p); + old = raw_atomic_long_fetch_xor(mask, (atomic_long_t *)p); return !!(old & mask); } diff --git a/include/asm-generic/bitops/lock.h b/include/asm-generic/bitops/lock.h index 630f2f6b9595..40913516e654 100644 --- a/include/asm-generic/bitops/lock.h +++ b/include/asm-generic/bitops/lock.h @@ -25,7 +25,7 @@ arch_test_and_set_bit_lock(unsigned int nr, volatile unsigned long *p) if (READ_ONCE(*p) & mask) return 1; - old = arch_atomic_long_fetch_or_acquire(mask, (atomic_long_t *)p); + old = raw_atomic_long_fetch_or_acquire(mask, (atomic_long_t *)p); return !!(old & mask); } @@ -41,7 +41,7 @@ static __always_inline void arch_clear_bit_unlock(unsigned int nr, volatile unsigned long *p) { p += BIT_WORD(nr); - arch_atomic_long_fetch_andnot_release(BIT_MASK(nr), (atomic_long_t *)p); + raw_atomic_long_fetch_andnot_release(BIT_MASK(nr), (atomic_long_t *)p); } /** @@ -63,7 +63,7 @@ arch___clear_bit_unlock(unsigned int nr, volatile unsigned long *p) p += BIT_WORD(nr); old = READ_ONCE(*p); old &= ~BIT_MASK(nr); - arch_atomic_long_set_release((atomic_long_t *)p, old); + raw_atomic_long_set_release((atomic_long_t *)p, old); } /** @@ -83,7 +83,7 @@ static inline bool arch_clear_bit_unlock_is_negative_byte(unsigned int nr, unsigned long mask = BIT_MASK(nr); p += BIT_WORD(nr); - old = arch_atomic_long_fetch_andnot_release(mask, (atomic_long_t *)p); + old = raw_atomic_long_fetch_andnot_release(mask, (atomic_long_t *)p); return !!(old & BIT(7)); } #define arch_clear_bit_unlock_is_negative_byte arch_clear_bit_unlock_is_negative_byte -- cgit v1.2.3 From 093d9b240a1fa261ff8aeb7c7cc484dedacfda53 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 7 Jun 2023 14:20:59 -0700 Subject: percpu: Fix self-assignment of __old in raw_cpu_generic_try_cmpxchg() After commit c5c0ba953b8c ("percpu: Add {raw,this}_cpu_try_cmpxchg()"), clang built ARCH=arm and ARCH=arm64 kernels with CONFIG_INIT_STACK_NONE started panicking on boot in alloc_vmap_area(): [ 0.000000] kernel BUG at mm/vmalloc.c:1638! [ 0.000000] Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.4.0-rc2-ARCH+ #1 [ 0.000000] Hardware name: linux,dummy-virt (DT) [ 0.000000] pstate: 200000c9 (nzCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 0.000000] pc : alloc_vmap_area+0x7ec/0x7f8 [ 0.000000] lr : alloc_vmap_area+0x7e8/0x7f8 Compiling mm/vmalloc.c with W=2 reveals an instance of -Wshadow, which helps uncover that through macro expansion, '__old = *(ovalp)' in raw_cpu_generic_try_cmpxchg() can become '__old = *(&__old)' through raw_cpu_generic_cmpxchg(), which results in garbage being assigned to the inner __old and the cmpxchg not working properly. Add an extra underscore to __old in raw_cpu_generic_try_cmpxchg() so that there is no more self-assignment, which resolves the panics. Closes: https://github.com/ClangBuiltLinux/linux/issues/1868 Fixes: c5c0ba953b8c ("percpu: Add {raw,this}_cpu_try_cmpxchg()") Debugged-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230607-fix-shadowing-in-raw_cpu_generic_try_cmpxchg-v1-1-8f0a3d930d43@kernel.org --- include/asm-generic/percpu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/asm-generic') diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 68c410e85cd7..94cbd50cc870 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -101,9 +101,9 @@ do { \ #define raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval) \ ({ \ typeof(pcp) *__p = raw_cpu_ptr(&(pcp)); \ - typeof(pcp) __val = *__p, __old = *(ovalp); \ + typeof(pcp) __val = *__p, ___old = *(ovalp); \ bool __ret; \ - if (__val == __old) { \ + if (__val == ___old) { \ *__p = nval; \ __ret = true; \ } else { \ -- cgit v1.2.3