From 51339d99c0131bc0d16d378e9b05bc498d2967e2 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 2 Apr 2025 19:55:14 -0700 Subject: locking/local_lock, mm: replace localtry_ helpers with local_trylock_t type Partially revert commit 0aaddfb06882 ("locking/local_lock: Introduce localtry_lock_t"). Remove localtry_*() helpers, since localtry_lock() name might be misinterpreted as "try lock". Introduce local_trylock[_irqsave]() helpers that only work with newly introduced local_trylock_t type. Note that attempt to use local_trylock[_irqsave]() with local_lock_t will cause compilation failure. Usage and behavior in !PREEMPT_RT: local_lock_t lock; // sizeof(lock) == 0 local_lock(&lock); // preempt disable local_lock_irqsave(&lock, ...); // irq save if (local_trylock_irqsave(&lock, ...)) // compilation error local_trylock_t lock; // sizeof(lock) == 4 local_lock(&lock); // preempt disable, acquired = 1 local_lock_irqsave(&lock, ...); // irq save, acquired = 1 if (local_trylock(&lock)) // if (!acquired) preempt disable, acquired = 1 if (local_trylock_irqsave(&lock, ...)) // if (!acquired) irq save, acquired = 1 The existing local_lock_*() macros can be used either with local_lock_t or local_trylock_t. With local_trylock_t they set acquired = 1 while local_unlock_*() clears it. In !PREEMPT_RT local_lock_irqsave(local_lock_t *) disables interrupts to protect critical section, but it doesn't prevent NMI, so the fully reentrant code cannot use local_lock_irqsave(local_lock_t *) for exclusive access. The local_lock_irqsave(local_trylock_t *) helper disables interrupts and sets acquired=1, so local_trylock_irqsave(local_trylock_t *) from NMI attempting to acquire the same lock will return false. In PREEMPT_RT local_lock_irqsave() maps to preemptible spin_lock(). Map local_trylock_irqsave() to preemptible spin_trylock(). When in hard IRQ or NMI return false right away, since spin_trylock() is not safe due to explicit locking in the underneath rt_spin_trylock() implementation. Removing this explicit locking and attempting only "trylock" is undesired due to PI implications. The local_trylock() without _irqsave can be used to avoid the cost of disabling/enabling interrupts by only disabling preemption, so local_trylock() in an interrupt attempting to acquire the same lock will return false. Note there is no need to use local_inc for acquired variable, since it's a percpu variable with strict nesting scopes. Note that guard(local_lock)(&lock) works only for "local_lock_t lock". The patch also makes sure that local_lock_release(l) is called before WRITE_ONCE(l->acquired, 0). Though IRQs are disabled at this point the local_trylock() from NMI will succeed and local_lock_acquire(l) will warn. Link: https://lkml.kernel.org/r/20250403025514.41186-1-alexei.starovoitov@gmail.com Fixes: 0aaddfb06882 ("locking/local_lock: Introduce localtry_lock_t") Signed-off-by: Alexei Starovoitov Acked-by: Vlastimil Babka Acked-by: Sebastian Andrzej Siewior Reviewed-by: Shakeel Butt Cc: Daniel Borkman Cc: Linus Torvalds Cc: Martin KaFai Lau Cc: Michal Hocko Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/local_lock_internal.h | 207 +++++++++++++++--------------------- 1 file changed, 87 insertions(+), 120 deletions(-) (limited to 'include/linux/local_lock_internal.h') diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index 67bd13d142fa..bf2bf40d7b18 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -15,10 +15,11 @@ typedef struct { #endif } local_lock_t; +/* local_trylock() and local_trylock_irqsave() only work with local_trylock_t */ typedef struct { local_lock_t llock; - unsigned int acquired; -} localtry_lock_t; + u8 acquired; +} local_trylock_t; #ifdef CONFIG_DEBUG_LOCK_ALLOC # define LOCAL_LOCK_DEBUG_INIT(lockname) \ @@ -29,6 +30,9 @@ typedef struct { }, \ .owner = NULL, +# define LOCAL_TRYLOCK_DEBUG_INIT(lockname) \ + .llock = { LOCAL_LOCK_DEBUG_INIT((lockname).llock) }, + static inline void local_lock_acquire(local_lock_t *l) { lock_map_acquire(&l->dep_map); @@ -56,6 +60,7 @@ static inline void local_lock_debug_init(local_lock_t *l) } #else /* CONFIG_DEBUG_LOCK_ALLOC */ # define LOCAL_LOCK_DEBUG_INIT(lockname) +# define LOCAL_TRYLOCK_DEBUG_INIT(lockname) static inline void local_lock_acquire(local_lock_t *l) { } static inline void local_trylock_acquire(local_lock_t *l) { } static inline void local_lock_release(local_lock_t *l) { } @@ -63,7 +68,7 @@ static inline void local_lock_debug_init(local_lock_t *l) { } #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ #define INIT_LOCAL_LOCK(lockname) { LOCAL_LOCK_DEBUG_INIT(lockname) } -#define INIT_LOCALTRY_LOCK(lockname) { .llock = { LOCAL_LOCK_DEBUG_INIT(lockname.llock) }} +#define INIT_LOCAL_TRYLOCK(lockname) { LOCAL_TRYLOCK_DEBUG_INIT(lockname) } #define __local_lock_init(lock) \ do { \ @@ -76,6 +81,8 @@ do { \ local_lock_debug_init(lock); \ } while (0) +#define __local_trylock_init(lock) __local_lock_init(lock.llock) + #define __spinlock_nested_bh_init(lock) \ do { \ static struct lock_class_key __key; \ @@ -87,149 +94,117 @@ do { \ local_lock_debug_init(lock); \ } while (0) +#define __local_lock_acquire(lock) \ + do { \ + local_trylock_t *tl; \ + local_lock_t *l; \ + \ + l = (local_lock_t *)this_cpu_ptr(lock); \ + tl = (local_trylock_t *)l; \ + _Generic((lock), \ + local_trylock_t *: ({ \ + lockdep_assert(tl->acquired == 0); \ + WRITE_ONCE(tl->acquired, 1); \ + }), \ + default:(void)0); \ + local_lock_acquire(l); \ + } while (0) + #define __local_lock(lock) \ do { \ preempt_disable(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ + __local_lock_acquire(lock); \ } while (0) #define __local_lock_irq(lock) \ do { \ local_irq_disable(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ + __local_lock_acquire(lock); \ } while (0) #define __local_lock_irqsave(lock, flags) \ do { \ local_irq_save(flags); \ - local_lock_acquire(this_cpu_ptr(lock)); \ - } while (0) - -#define __local_unlock(lock) \ - do { \ - local_lock_release(this_cpu_ptr(lock)); \ - preempt_enable(); \ + __local_lock_acquire(lock); \ } while (0) -#define __local_unlock_irq(lock) \ - do { \ - local_lock_release(this_cpu_ptr(lock)); \ - local_irq_enable(); \ - } while (0) - -#define __local_unlock_irqrestore(lock, flags) \ - do { \ - local_lock_release(this_cpu_ptr(lock)); \ - local_irq_restore(flags); \ - } while (0) - -#define __local_lock_nested_bh(lock) \ - do { \ - lockdep_assert_in_softirq(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ - } while (0) - -#define __local_unlock_nested_bh(lock) \ - local_lock_release(this_cpu_ptr(lock)) - -/* localtry_lock_t variants */ - -#define __localtry_lock_init(lock) \ -do { \ - __local_lock_init(&(lock)->llock); \ - WRITE_ONCE((lock)->acquired, 0); \ -} while (0) - -#define __localtry_lock(lock) \ - do { \ - localtry_lock_t *lt; \ - preempt_disable(); \ - lt = this_cpu_ptr(lock); \ - local_lock_acquire(<->llock); \ - WRITE_ONCE(lt->acquired, 1); \ - } while (0) - -#define __localtry_lock_irq(lock) \ - do { \ - localtry_lock_t *lt; \ - local_irq_disable(); \ - lt = this_cpu_ptr(lock); \ - local_lock_acquire(<->llock); \ - WRITE_ONCE(lt->acquired, 1); \ - } while (0) - -#define __localtry_lock_irqsave(lock, flags) \ - do { \ - localtry_lock_t *lt; \ - local_irq_save(flags); \ - lt = this_cpu_ptr(lock); \ - local_lock_acquire(<->llock); \ - WRITE_ONCE(lt->acquired, 1); \ - } while (0) - -#define __localtry_trylock(lock) \ +#define __local_trylock(lock) \ ({ \ - localtry_lock_t *lt; \ - bool _ret; \ + local_trylock_t *tl; \ \ preempt_disable(); \ - lt = this_cpu_ptr(lock); \ - if (!READ_ONCE(lt->acquired)) { \ - WRITE_ONCE(lt->acquired, 1); \ - local_trylock_acquire(<->llock); \ - _ret = true; \ - } else { \ - _ret = false; \ + tl = this_cpu_ptr(lock); \ + if (READ_ONCE(tl->acquired)) { \ preempt_enable(); \ + tl = NULL; \ + } else { \ + WRITE_ONCE(tl->acquired, 1); \ + local_trylock_acquire( \ + (local_lock_t *)tl); \ } \ - _ret; \ + !!tl; \ }) -#define __localtry_trylock_irqsave(lock, flags) \ +#define __local_trylock_irqsave(lock, flags) \ ({ \ - localtry_lock_t *lt; \ - bool _ret; \ + local_trylock_t *tl; \ \ local_irq_save(flags); \ - lt = this_cpu_ptr(lock); \ - if (!READ_ONCE(lt->acquired)) { \ - WRITE_ONCE(lt->acquired, 1); \ - local_trylock_acquire(<->llock); \ - _ret = true; \ - } else { \ - _ret = false; \ + tl = this_cpu_ptr(lock); \ + if (READ_ONCE(tl->acquired)) { \ local_irq_restore(flags); \ + tl = NULL; \ + } else { \ + WRITE_ONCE(tl->acquired, 1); \ + local_trylock_acquire( \ + (local_lock_t *)tl); \ } \ - _ret; \ + !!tl; \ }) -#define __localtry_unlock(lock) \ +#define __local_lock_release(lock) \ + do { \ + local_trylock_t *tl; \ + local_lock_t *l; \ + \ + l = (local_lock_t *)this_cpu_ptr(lock); \ + tl = (local_trylock_t *)l; \ + local_lock_release(l); \ + _Generic((lock), \ + local_trylock_t *: ({ \ + lockdep_assert(tl->acquired == 1); \ + WRITE_ONCE(tl->acquired, 0); \ + }), \ + default:(void)0); \ + } while (0) + +#define __local_unlock(lock) \ do { \ - localtry_lock_t *lt; \ - lt = this_cpu_ptr(lock); \ - WRITE_ONCE(lt->acquired, 0); \ - local_lock_release(<->llock); \ + __local_lock_release(lock); \ preempt_enable(); \ } while (0) -#define __localtry_unlock_irq(lock) \ +#define __local_unlock_irq(lock) \ do { \ - localtry_lock_t *lt; \ - lt = this_cpu_ptr(lock); \ - WRITE_ONCE(lt->acquired, 0); \ - local_lock_release(<->llock); \ + __local_lock_release(lock); \ local_irq_enable(); \ } while (0) -#define __localtry_unlock_irqrestore(lock, flags) \ +#define __local_unlock_irqrestore(lock, flags) \ do { \ - localtry_lock_t *lt; \ - lt = this_cpu_ptr(lock); \ - WRITE_ONCE(lt->acquired, 0); \ - local_lock_release(<->llock); \ + __local_lock_release(lock); \ local_irq_restore(flags); \ } while (0) +#define __local_lock_nested_bh(lock) \ + do { \ + lockdep_assert_in_softirq(); \ + local_lock_acquire(this_cpu_ptr(lock)); \ + } while (0) + +#define __local_unlock_nested_bh(lock) \ + local_lock_release(this_cpu_ptr(lock)) + #else /* !CONFIG_PREEMPT_RT */ /* @@ -237,16 +212,18 @@ do { \ * critical section while staying preemptible. */ typedef spinlock_t local_lock_t; -typedef spinlock_t localtry_lock_t; +typedef spinlock_t local_trylock_t; #define INIT_LOCAL_LOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname)) -#define INIT_LOCALTRY_LOCK(lockname) INIT_LOCAL_LOCK(lockname) +#define INIT_LOCAL_TRYLOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname)) #define __local_lock_init(l) \ do { \ local_spin_lock_init((l)); \ } while (0) +#define __local_trylock_init(l) __local_lock_init(l) + #define __local_lock(__lock) \ do { \ migrate_disable(); \ @@ -283,17 +260,7 @@ do { \ spin_unlock(this_cpu_ptr((lock))); \ } while (0) -/* localtry_lock_t variants */ - -#define __localtry_lock_init(lock) __local_lock_init(lock) -#define __localtry_lock(lock) __local_lock(lock) -#define __localtry_lock_irq(lock) __local_lock(lock) -#define __localtry_lock_irqsave(lock, flags) __local_lock_irqsave(lock, flags) -#define __localtry_unlock(lock) __local_unlock(lock) -#define __localtry_unlock_irq(lock) __local_unlock(lock) -#define __localtry_unlock_irqrestore(lock, flags) __local_unlock_irqrestore(lock, flags) - -#define __localtry_trylock(lock) \ +#define __local_trylock(lock) \ ({ \ int __locked; \ \ @@ -308,11 +275,11 @@ do { \ __locked; \ }) -#define __localtry_trylock_irqsave(lock, flags) \ +#define __local_trylock_irqsave(lock, flags) \ ({ \ typecheck(unsigned long, flags); \ flags = 0; \ - __localtry_trylock(lock); \ + __local_trylock(lock); \ }) #endif /* CONFIG_PREEMPT_RT */ -- cgit v1.2.3 From 82efd569a8909f2b13140c1b3de88535aea0b051 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 23 Apr 2025 10:21:29 +0200 Subject: locking/local_lock: fix _Generic() matching of local_trylock_t MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Michael Larabel reported [1] a nginx performance regression in v6.15-rc3 and bisected it to commit 51339d99c013 ("locking/local_lock, mm: replace localtry_ helpers with local_trylock_t type") The problem is the _Generic() usage with a default association that masks the fact that "local_trylock_t *" association is not being selected as expected. Replacing the default with the only other expected type "local_lock_t *" reveals the underlying problem: include/linux/local_lock_internal.h:174:26: error: ‘_Generic’ selector of type ‘__seg_gs local_lock_t *’ is not compatible with any association The local_locki's are part of __percpu structures and thus the __percpu attribute is needed to associate the type properly. Add the attribute and keep the default replaced to turn any further mismatches into compile errors. The failure to recognize local_try_lock_t in __local_lock_release() means that a local_trylock[_irqsave]() operation will set tl->acquired to 1 (there's no _Generic() part in the trylock code), but then local_unlock[_irqrestore]() will not set tl->acquired back to 0, so further trylock operations will always fail on the same cpu+lock, while non-trylock operations continue to work - a lockdep_assert() is also not being executed in the _Generic() part of local_lock() code. This means consume_stock() and refill_stock() operations will fail deterministically, resulting in taking the slow paths and worse performance. Fixes: 51339d99c013 ("locking/local_lock, mm: replace localtry_ helpers with local_trylock_t type") Reported-by: Michael Larabel Closes: https://www.phoronix.com/review/linux-615-nginx-regression/2 [1] Signed-off-by: Vlastimil Babka Acked-by: Alexei Starovoitov Signed-off-by: Linus Torvalds --- include/linux/local_lock_internal.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux/local_lock_internal.h') diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index bf2bf40d7b18..8d5ac16a9b17 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -102,11 +102,11 @@ do { \ l = (local_lock_t *)this_cpu_ptr(lock); \ tl = (local_trylock_t *)l; \ _Generic((lock), \ - local_trylock_t *: ({ \ + __percpu local_trylock_t *: ({ \ lockdep_assert(tl->acquired == 0); \ WRITE_ONCE(tl->acquired, 1); \ }), \ - default:(void)0); \ + __percpu local_lock_t *: (void)0); \ local_lock_acquire(l); \ } while (0) @@ -171,11 +171,11 @@ do { \ tl = (local_trylock_t *)l; \ local_lock_release(l); \ _Generic((lock), \ - local_trylock_t *: ({ \ + __percpu local_trylock_t *: ({ \ lockdep_assert(tl->acquired == 1); \ WRITE_ONCE(tl->acquired, 0); \ }), \ - default:(void)0); \ + __percpu local_lock_t *: (void)0); \ } while (0) #define __local_unlock(lock) \ -- cgit v1.2.3