From 8b65eb52d93e4e496bd26e6867152344554eb39e Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 17 Feb 2026 11:15:10 -0800 Subject: locking/mutex: Rename mutex_init_lockep() Typo, this wants to be _lockdep(). Fixes: 51d7a054521d ("locking/mutex: Redo __mutex_init() to reduce generated code size") Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260217191512.1180151-2-dave@stgolabs.net --- include/linux/mutex.h | 4 ++-- kernel/locking/mutex.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/mutex.h b/include/linux/mutex.h index ecaa0440f6ec..8126da959088 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -87,12 +87,12 @@ do { \ struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) #ifdef CONFIG_DEBUG_LOCK_ALLOC -void mutex_init_lockep(struct mutex *lock, const char *name, struct lock_class_key *key); +void mutex_init_lockdep(struct mutex *lock, const char *name, struct lock_class_key *key); static inline void __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) { - mutex_init_lockep(lock, name, key); + mutex_init_lockdep(lock, name, key); } #else extern void mutex_init_generic(struct mutex *lock); diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 2a1d165b3167..c867f6c15530 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -171,7 +171,7 @@ static __always_inline bool __mutex_unlock_fast(struct mutex *lock) #else /* !CONFIG_DEBUG_LOCK_ALLOC */ -void mutex_init_lockep(struct mutex *lock, const char *name, struct lock_class_key *key) +void mutex_init_lockdep(struct mutex *lock, const char *name, struct lock_class_key *key) { __mutex_init_generic(lock); @@ -181,7 +181,7 @@ void mutex_init_lockep(struct mutex *lock, const char *name, struct lock_class_k debug_check_no_locks_freed((void *)lock, sizeof(*lock)); lockdep_init_map_wait(&lock->dep_map, name, key, 0, LD_WAIT_SLEEP); } -EXPORT_SYMBOL(mutex_init_lockep); +EXPORT_SYMBOL(mutex_init_lockdep); #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ static inline void __mutex_set_flag(struct mutex *lock, unsigned long flag) -- cgit v1.2.3 From babcde3be8c9148aa60a14b17831e8f249854963 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 17 Feb 2026 11:15:11 -0800 Subject: locking/mutex: Fix wrong comment for CONFIG_DEBUG_LOCK_ALLOC ... that endif block should be CONFIG_DEBUG_LOCK_ALLOC, not CONFIG_LOCKDEP. Fixes: 51d7a054521d ("locking/mutex: Redo __mutex_init() to reduce generated code size") Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260217191512.1180151-3-dave@stgolabs.net --- include/linux/mutex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 8126da959088..f57d2a97da57 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -146,7 +146,7 @@ static inline void __mutex_init(struct mutex *lock, const char *name, { mutex_rt_init_generic(lock); } -#endif /* !CONFIG_LOCKDEP */ +#endif /* !CONFIG_DEBUG_LOCK_ALLOC */ #endif /* CONFIG_PREEMPT_RT */ #ifdef CONFIG_DEBUG_MUTEXES -- cgit v1.2.3 From 50214dc4382055352fb1d7b9779550dabf5059e5 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 17 Feb 2026 11:15:12 -0800 Subject: locking/mutex: Add killable flavor to guard definitions The mutex guard family defines _try and _intr variants but is missing the killable one. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260217191512.1180151-4-dave@stgolabs.net --- include/linux/mutex.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/mutex.h b/include/linux/mutex.h index f57d2a97da57..2f648ee204e7 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -253,6 +253,7 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) __cond_a DEFINE_LOCK_GUARD_1(mutex, struct mutex, mutex_lock(_T->lock), mutex_unlock(_T->lock)) DEFINE_LOCK_GUARD_1_COND(mutex, _try, mutex_trylock(_T->lock)) DEFINE_LOCK_GUARD_1_COND(mutex, _intr, mutex_lock_interruptible(_T->lock), _RET == 0) +DEFINE_LOCK_GUARD_1_COND(mutex, _kill, mutex_lock_killable(_T->lock), _RET == 0) DEFINE_LOCK_GUARD_1(mutex_init, struct mutex, mutex_init(_T->lock), /* */) DECLARE_LOCK_GUARD_1_ATTRS(mutex, __acquires(_T), __releases(*(struct mutex **)_T)) @@ -261,6 +262,8 @@ DECLARE_LOCK_GUARD_1_ATTRS(mutex_try, __acquires(_T), __releases(*(struct mutex #define class_mutex_try_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(mutex_try, _T) DECLARE_LOCK_GUARD_1_ATTRS(mutex_intr, __acquires(_T), __releases(*(struct mutex **)_T)) #define class_mutex_intr_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(mutex_intr, _T) +DECLARE_LOCK_GUARD_1_ATTRS(mutex_kill, __acquires(_T), __releases(*(struct mutex **)_T)) +#define class_mutex_kill_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(mutex_kill, _T) DECLARE_LOCK_GUARD_1_ATTRS(mutex_init, __acquires(_T), __releases(*(struct mutex **)_T)) #define class_mutex_init_constructor(_T) WITH_LOCK_GUARD_1_ATTRS(mutex_init, _T) -- cgit v1.2.3 From abf1be684dc270b94b7c8782f562959b33766fc0 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 16 Feb 2026 15:16:22 +0100 Subject: arm64: Optimize __READ_ONCE() with CONFIG_LTO=y Rework arm64 LTO __READ_ONCE() to improve code generation as follows: 1. Replace _Generic-based __unqual_scalar_typeof() with more complete __rwonce_typeof_unqual(). This strips qualifiers from all types, not just integer types, which is required to be able to assign (must be non-const) to __u.__val in the non-atomic case (required for #2). One subtle point here is that non-integer types of __val could be const or volatile within the union with the old __unqual_scalar_typeof(), if the passed variable is const or volatile. This would then result in a forced load from the stack if __u.__val is volatile; in the case of const, it does look odd if the underlying storage changes, but the compiler is told said member is "const" -- it smells like UB. 2. Eliminate the atomic flag and ternary conditional expression. Move the fallback volatile load into the default case of the switch, ensuring __u is unconditionally initialized across all paths. The statement expression now unconditionally returns __u.__val. This refactoring appears to help the compiler improve (or fix) code generation. With a defconfig + LTO + debug options builds, we observe different codegen for the following functions: btrfs_reclaim_sweep (708 -> 1032 bytes) btrfs_sinfo_bg_reclaim_threshold_store (200 -> 204 bytes) check_mem_access (3652 -> 3692 bytes) [inlined bpf_map_is_rdonly] console_flush_all (1268 -> 1264 bytes) console_lock_spinning_disable_and_check (180 -> 176 bytes) igb_add_filter (640 -> 636 bytes) igb_config_tx_modes (2404 -> 2400 bytes) kvm_vcpu_on_spin (480 -> 476 bytes) map_freeze (376 -> 380 bytes) netlink_bind (1664 -> 1656 bytes) nmi_cpu_backtrace (404 -> 400 bytes) set_rps_cpu (516 -> 520 bytes) swap_cluster_readahead (944 -> 932 bytes) tcp_accecn_third_ack (328 -> 336 bytes) tcp_create_openreq_child (1764 -> 1772 bytes) tcp_data_queue (5784 -> 5892 bytes) tcp_ecn_rcv_synack (620 -> 628 bytes) xen_manage_runstate_time (944 -> 896 bytes) xen_steal_clock (340 -> 296 bytes) Increase of some functions are due to more aggressive inlining due to better codegen (in this build, e.g. bpf_map_is_rdonly is no longer present due to being inlined completely). NOTE: The return-value-of-function-drops-qualifiers hack was first suggested by Al Viro in [1], which notes some of its limitations which make it unsuitable for a general __unqual_scalar_typeof() replacement. Notably, array types are not supported, and GCC 8.1-8.3 still fail. Why should we use it here? READ_ONCE() does not support reading whole arrays, and the GCC version problem only affects 3 minor releases of a very ancient still-supported GCC version; not only that, this arm64 READ_ONCE() version is currently only activated by LTO builds, which to-date are *only supported by Clang*! Link: https://lore.kernel.org/all/20260111182010.GH3634291@ZenIV/ [1] Signed-off-by: Marco Elver Signed-off-by: Will Deacon --- arch/arm64/include/asm/rwonce.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h index fc0fb42b0b64..9fd24cef3376 100644 --- a/arch/arm64/include/asm/rwonce.h +++ b/arch/arm64/include/asm/rwonce.h @@ -19,6 +19,17 @@ "ldapr" #sfx "\t" #regs, \ ARM64_HAS_LDAPR) +/* + * Replace this with typeof_unqual() when minimum compiler versions are + * increased to GCC 14 and Clang 19. For the time being, we need this + * workaround, which relies on function return values dropping qualifiers. + */ +#define __rwonce_typeof_unqual(x) typeof(({ \ + __diag_push() \ + __diag_ignore_all("-Wignored-qualifiers", "") \ + ((typeof(x)(*)(void))0)(); \ + __diag_pop() })) + /* * When building with LTO, there is an increased risk of the compiler * converting an address dependency headed by a READ_ONCE() invocation @@ -32,8 +43,7 @@ #define __READ_ONCE(x) \ ({ \ typeof(&(x)) __x = &(x); \ - int atomic = 1; \ - union { __unqual_scalar_typeof(*__x) __val; char __c[1]; } __u; \ + union { __rwonce_typeof_unqual(*__x) __val; char __c[1]; } __u; \ switch (sizeof(x)) { \ case 1: \ asm volatile(__LOAD_RCPC(b, %w0, %1) \ @@ -56,9 +66,9 @@ : "Q" (*__x) : "memory"); \ break; \ default: \ - atomic = 0; \ + __u.__val = *(volatile typeof(*__x) *)__x; \ } \ - atomic ? (typeof(*__x))__u.__val : (*(volatile typeof(*__x) *)__x);\ + __u.__val; \ }) #endif /* !BUILD_VDSO */ -- cgit v1.2.3 From 773b24bcedc16a4a29e8579d66ec67ca7aa0014f Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Mon, 16 Feb 2026 15:16:23 +0100 Subject: arm64, compiler-context-analysis: Permit alias analysis through __READ_ONCE() with CONFIG_LTO=y When enabling Clang's Context Analysis (aka. Thread Safety Analysis) on kernel/futex/core.o (see Peter's changes at [1]), in arm64 LTO builds we could see: | kernel/futex/core.c:982:1: warning: spinlock 'atomic ? __u.__val : q->lock_ptr' is still held at the end of function [-Wthread-safety-analysis] | 982 | } | | ^ | kernel/futex/core.c:976:2: note: spinlock acquired here | 976 | spin_lock(lock_ptr); | | ^ | kernel/futex/core.c:982:1: warning: expecting spinlock 'q->lock_ptr' to be held at the end of function [-Wthread-safety-analysis] | 982 | } | | ^ | kernel/futex/core.c:966:6: note: spinlock acquired here | 966 | void futex_q_lockptr_lock(struct futex_q *q) | | ^ | 2 warnings generated. Where we have: extern void futex_q_lockptr_lock(struct futex_q *q) __acquires(q->lock_ptr); .. void futex_q_lockptr_lock(struct futex_q *q) { spinlock_t *lock_ptr; /* * See futex_unqueue() why lock_ptr can change. */ guard(rcu)(); retry: >> lock_ptr = READ_ONCE(q->lock_ptr); spin_lock(lock_ptr); ... } At the time of the above report (prior to removal of the 'atomic' flag), Clang Thread Safety Analysis's alias analysis resolved 'lock_ptr' to 'atomic ? __u.__val : q->lock_ptr' (now just '__u.__val'), and used this as the identity of the context lock given it cannot "see through" the inline assembly; however, we want 'q->lock_ptr' as the canonical context lock. While for code generation the compiler simplified to '__u.__val' for pointers (8 byte case -> 'atomic' was set), TSA's analysis (a) happens much earlier on the AST, and (b) would be the wrong deduction. Now that we've gotten rid of the 'atomic' ternary comparison, we can return '__u.__val' through a pointer that we initialize with '&x', but then update via a pointer-to-pointer. When READ_ONCE()'ing a context lock pointer, TSA's alias analysis does not invalidate the initial alias when updated through the pointer-to-pointer, and we make it effectively "see through" the __READ_ONCE(). Code generation is unchanged. Link: https://lkml.kernel.org/r/20260121110704.221498346@infradead.org [1] Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202601221040.TeM0ihff-lkp@intel.com/ Cc: Peter Zijlstra Tested-by: Boqun Feng Reviewed-by: David Laight Signed-off-by: Marco Elver Signed-off-by: Will Deacon --- arch/arm64/include/asm/rwonce.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h index 9fd24cef3376..0f3a01d30f66 100644 --- a/arch/arm64/include/asm/rwonce.h +++ b/arch/arm64/include/asm/rwonce.h @@ -42,8 +42,12 @@ */ #define __READ_ONCE(x) \ ({ \ - typeof(&(x)) __x = &(x); \ - union { __rwonce_typeof_unqual(*__x) __val; char __c[1]; } __u; \ + auto __x = &(x); \ + auto __ret = (__rwonce_typeof_unqual(*__x) *)__x; \ + /* Hides alias reassignment from Clang's -Wthread-safety. */ \ + auto __retp = &__ret; \ + union { typeof(*__ret) __val; char __c[1]; } __u; \ + *__retp = &__u.__val; \ switch (sizeof(x)) { \ case 1: \ asm volatile(__LOAD_RCPC(b, %w0, %1) \ @@ -68,7 +72,7 @@ default: \ __u.__val = *(volatile typeof(*__x) *)__x; \ } \ - __u.__val; \ + *__ret; \ }) #endif /* !BUILD_VDSO */ -- cgit v1.2.3 From 38e18d825f7281fdc16d3241df5115ce6eaeaf79 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 25 Feb 2026 10:32:41 -0800 Subject: locking: Fix rwlock and spinlock lock context annotations Fix two incorrect rwlock_t lock context annotations. Add the raw_spinlock_t lock context annotations that are missing. Fixes: f16a802d402d ("locking/rwlock, spinlock: Support Clang's context analysis") Signed-off-by: Bart Van Assche Signed-off-by: Peter Zijlstra (Intel) Acked-by: Marco Elver Link: https://patch.msgid.link/20260225183244.4035378-2-bvanassche@acm.org --- include/linux/rwlock.h | 4 ++-- include/linux/rwlock_api_smp.h | 6 ++++-- include/linux/spinlock.h | 3 ++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h index 3390d21c95dd..21ceefc4a49f 100644 --- a/include/linux/rwlock.h +++ b/include/linux/rwlock.h @@ -30,10 +30,10 @@ do { \ #ifdef CONFIG_DEBUG_SPINLOCK extern void do_raw_read_lock(rwlock_t *lock) __acquires_shared(lock); - extern int do_raw_read_trylock(rwlock_t *lock); + extern int do_raw_read_trylock(rwlock_t *lock) __cond_acquires_shared(true, lock); extern void do_raw_read_unlock(rwlock_t *lock) __releases_shared(lock); extern void do_raw_write_lock(rwlock_t *lock) __acquires(lock); - extern int do_raw_write_trylock(rwlock_t *lock); +extern int do_raw_write_trylock(rwlock_t *lock) __cond_acquires(true, lock); extern void do_raw_write_unlock(rwlock_t *lock) __releases(lock); #else # define do_raw_read_lock(rwlock) do {__acquire_shared(lock); arch_read_lock(&(rwlock)->raw_lock); } while (0) diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h index 61a852609eab..9e02a5f28cd1 100644 --- a/include/linux/rwlock_api_smp.h +++ b/include/linux/rwlock_api_smp.h @@ -23,7 +23,7 @@ void __lockfunc _raw_write_lock_bh(rwlock_t *lock) __acquires(lock); void __lockfunc _raw_read_lock_irq(rwlock_t *lock) __acquires_shared(lock); void __lockfunc _raw_write_lock_irq(rwlock_t *lock) __acquires(lock); unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock) - __acquires(lock); + __acquires_shared(lock); unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock) __acquires(lock); int __lockfunc _raw_read_trylock(rwlock_t *lock) __cond_acquires_shared(true, lock); @@ -36,7 +36,7 @@ void __lockfunc _raw_read_unlock_irq(rwlock_t *lock) __releases_shared(lock); void __lockfunc _raw_write_unlock_irq(rwlock_t *lock) __releases(lock); void __lockfunc _raw_read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) - __releases(lock); + __releases_shared(lock); void __lockfunc _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) __releases(lock); @@ -116,6 +116,7 @@ _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) #endif static inline int __raw_read_trylock(rwlock_t *lock) + __cond_acquires_shared(true, lock) { preempt_disable(); if (do_raw_read_trylock(lock)) { @@ -127,6 +128,7 @@ static inline int __raw_read_trylock(rwlock_t *lock) } static inline int __raw_write_trylock(rwlock_t *lock) + __cond_acquires(true, lock) { preempt_disable(); if (do_raw_write_trylock(lock)) { diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index e1e2f144af9b..241277cd34cf 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -178,7 +178,7 @@ do { \ #ifdef CONFIG_DEBUG_SPINLOCK extern void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock); - extern int do_raw_spin_trylock(raw_spinlock_t *lock); + extern int do_raw_spin_trylock(raw_spinlock_t *lock) __cond_acquires(true, lock); extern void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock); #else static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock) @@ -189,6 +189,7 @@ static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock) } static inline int do_raw_spin_trylock(raw_spinlock_t *lock) + __cond_acquires(true, lock) { int ret = arch_spin_trylock(&(lock)->raw_lock); -- cgit v1.2.3 From 39be7b21af24d1d2ed3b18caac57dd219fef226e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 25 Feb 2026 10:32:42 -0800 Subject: signal: Fix the lock_task_sighand() annotation lock_task_sighand() may return NULL. Make this clear in its lock context annotation. Fixes: 04e49d926f43 ("sched: Enable context analysis for core.c and fair.c") Signed-off-by: Bart Van Assche Signed-off-by: Peter Zijlstra (Intel) Acked-by: Marco Elver Link: https://patch.msgid.link/20260225183244.4035378-3-bvanassche@acm.org --- include/linux/sched/signal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index a22248aebcf9..a4835a7de07e 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -739,7 +739,7 @@ static inline int thread_group_empty(struct task_struct *p) extern struct sighand_struct *lock_task_sighand(struct task_struct *task, unsigned long *flags) - __acquires(&task->sighand->siglock); + __cond_acquires(nonnull, &task->sighand->siglock); static inline void unlock_task_sighand(struct task_struct *task, unsigned long *flags) -- cgit v1.2.3 From 3dcef70e41ab13483803c536ddea8d5f1803ee25 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 25 Feb 2026 10:32:43 -0800 Subject: ww-mutex: Fix the ww_acquire_ctx function annotations The ww_acquire_done() call is optional. Reflect this in the annotations of ww_acquire_done(). Fixes: 47907461e4f6 ("locking/ww_mutex: Support Clang's context analysis") Signed-off-by: Bart Van Assche Signed-off-by: Peter Zijlstra (Intel) Acked-by: Maarten Lankhorst Acked-by: Marco Elver Link: https://patch.msgid.link/20260225183244.4035378-4-bvanassche@acm.org --- include/linux/ww_mutex.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index 85b1fff02fde..0c95ead5a297 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -181,7 +181,7 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx, * data structures. */ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) - __releases(ctx) __acquires_shared(ctx) __no_context_analysis + __must_hold(ctx) { #ifdef DEBUG_WW_MUTEXES lockdep_assert_held(ctx); @@ -199,7 +199,7 @@ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) * mutexes have been released with ww_mutex_unlock. */ static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) - __releases_shared(ctx) __no_context_analysis + __releases(ctx) __no_context_analysis { #ifdef CONFIG_DEBUG_LOCK_ALLOC mutex_release(&ctx->first_lock_dep_map, _THIS_IP_); -- cgit v1.2.3 From 0da9ca4c08e709144a1bd2f765c14205960ac64d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 3 Mar 2026 16:50:03 -0800 Subject: futex: add missing function parameter comments Correct or add the missing function parameter kernel-doc comments to avoid warnings: Warning: include/asm-generic/futex.h:38 function parameter 'op' not described in 'futex_atomic_op_inuser_local' Warning: include/asm-generic/futex.h:38 function parameter 'oparg' not described in 'futex_atomic_op_inuser_local' Warning: include/asm-generic/futex.h:38 function parameter 'oval' not described in 'futex_atomic_op_inuser_local' Signed-off-by: Randy Dunlap Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260304005008.409858-1-rdunlap@infradead.org --- include/asm-generic/futex.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h index 2a19215baae5..fbbcfd801cd0 100644 --- a/include/asm-generic/futex.h +++ b/include/asm-generic/futex.h @@ -25,7 +25,9 @@ * argument and comparison of the previous * futex value with another constant. * - * @encoded_op: encoded operation to execute + * @op: operation to execute + * @oparg: argument of the operation + * @oval: previous value at @uaddr on successful return * @uaddr: pointer to user space address * * Return: -- cgit v1.2.3 From 4a5dc632e0b603ec1cbbf87b78de86b4b6359cff Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 3 Mar 2026 12:16:49 -0800 Subject: rust: sync: atomic: Remove bound `T: Sync` for `Atomic::from_ptr()` Originally, `Atomic::from_ptr()` requires `T` being a `Sync` because I thought having the ability to do `from_ptr()` meant multiplle `&Atomic`s shared by different threads, which was identical (or similar) to multiple `&T`s shared by different threads. Hence `T` was required to be `Sync`. However this is not true, since `&Atomic` is not the same at `&T`. Moreover, having this bound makes `Atomic::<*mut T>::from_ptr()` impossible, which is definitely not intended. Therefore remove the `T: Sync` bound. [boqun: Fix title typo spotted by Alice & Gary] Fixes: 29c32c405e53 ("rust: sync: atomic: Add generic atomics") Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Alice Ryhl Reviewed-by: Gary Guo Link: https://patch.msgid.link/20260120115207.55318-2-boqun.feng@gmail.com Link: https://patch.msgid.link/20260303201701.12204-2-boqun@kernel.org --- rust/kernel/sync/atomic.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/rust/kernel/sync/atomic.rs b/rust/kernel/sync/atomic.rs index 4aebeacb961a..296b25e83bbb 100644 --- a/rust/kernel/sync/atomic.rs +++ b/rust/kernel/sync/atomic.rs @@ -204,10 +204,7 @@ impl Atomic { /// // no data race. /// unsafe { Atomic::from_ptr(foo_a_ptr) }.store(2, Release); /// ``` - pub unsafe fn from_ptr<'a>(ptr: *mut T) -> &'a Self - where - T: Sync, - { + pub unsafe fn from_ptr<'a>(ptr: *mut T) -> &'a Self { // CAST: `T` and `Atomic` have the same size, alignment and bit validity. // SAFETY: Per function safety requirement, `ptr` is a valid pointer and the object will // live long enough. It's safe to return a `&Atomic` because function safety requirement -- cgit v1.2.3 From bebf7bdc62537b9ef4700c6402f1c2aa206a9b50 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 3 Mar 2026 12:16:50 -0800 Subject: rust: sync: atomic: Add example for Atomic::get_mut() Add an example for Atomic::get_mut(). No functional change. Signed-off-by: FUJITA Tomonori Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Alice Ryhl Reviewed-by: Gary Guo Link: https://patch.msgid.link/20260128123313.3850604-1-tomo@aliasing.net Link: https://patch.msgid.link/20260303201701.12204-3-boqun@kernel.org --- rust/kernel/sync/atomic.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/rust/kernel/sync/atomic.rs b/rust/kernel/sync/atomic.rs index 296b25e83bbb..e262b0cb53ae 100644 --- a/rust/kernel/sync/atomic.rs +++ b/rust/kernel/sync/atomic.rs @@ -232,6 +232,17 @@ impl Atomic { /// Returns a mutable reference to the underlying atomic `T`. /// /// This is safe because the mutable reference of the atomic `T` guarantees exclusive access. + /// + /// # Examples + /// + /// ``` + /// use kernel::sync::atomic::{Atomic, Relaxed}; + /// + /// let mut atomic_val = Atomic::new(0u32); + /// let val_mut = atomic_val.get_mut(); + /// *val_mut = 101; + /// assert_eq!(101, atomic_val.load(Relaxed)); + /// ``` pub fn get_mut(&mut self) -> &mut T { // CAST: `T` and `T::Repr` has the same size and alignment per the safety requirement of // `AtomicType`, and per the type invariants `self.0` is a valid `T`, therefore the casting -- cgit v1.2.3 From ecc8e9fbaac35c8e5cced26f740f846506c4737b Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 3 Mar 2026 12:16:51 -0800 Subject: rust: helpers: Generify the definitions of rust_helper_*_{read,set}* To support atomic pointers, more {read,set} helpers will be introduced, hence define macros to generate these helpers to ease the introduction of the future helpers. Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260117122243.24404-2-boqun.feng@gmail.com Link: https://patch.msgid.link/20260303201701.12204-4-boqun@kernel.org --- rust/helpers/atomic_ext.c | 53 ++++++++++++++++++++--------------------------- 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/rust/helpers/atomic_ext.c b/rust/helpers/atomic_ext.c index 7d0c2bd340da..f471c1ff123d 100644 --- a/rust/helpers/atomic_ext.c +++ b/rust/helpers/atomic_ext.c @@ -4,45 +4,38 @@ #include #include -__rust_helper s8 rust_helper_atomic_i8_read(s8 *ptr) -{ - return READ_ONCE(*ptr); -} - -__rust_helper s8 rust_helper_atomic_i8_read_acquire(s8 *ptr) -{ - return smp_load_acquire(ptr); -} - -__rust_helper s16 rust_helper_atomic_i16_read(s16 *ptr) -{ - return READ_ONCE(*ptr); +#define GEN_READ_HELPER(tname, type) \ +__rust_helper type rust_helper_atomic_##tname##_read(type *ptr) \ +{ \ + return READ_ONCE(*ptr); \ } -__rust_helper s16 rust_helper_atomic_i16_read_acquire(s16 *ptr) -{ - return smp_load_acquire(ptr); +#define GEN_SET_HELPER(tname, type) \ +__rust_helper void rust_helper_atomic_##tname##_set(type *ptr, type val) \ +{ \ + WRITE_ONCE(*ptr, val); \ } -__rust_helper void rust_helper_atomic_i8_set(s8 *ptr, s8 val) -{ - WRITE_ONCE(*ptr, val); +#define GEN_READ_ACQUIRE_HELPER(tname, type) \ +__rust_helper type rust_helper_atomic_##tname##_read_acquire(type *ptr) \ +{ \ + return smp_load_acquire(ptr); \ } -__rust_helper void rust_helper_atomic_i8_set_release(s8 *ptr, s8 val) -{ - smp_store_release(ptr, val); +#define GEN_SET_RELEASE_HELPER(tname, type) \ +__rust_helper void rust_helper_atomic_##tname##_set_release(type *ptr, type val)\ +{ \ + smp_store_release(ptr, val); \ } -__rust_helper void rust_helper_atomic_i16_set(s16 *ptr, s16 val) -{ - WRITE_ONCE(*ptr, val); -} +#define GEN_READ_SET_HELPERS(tname, type) \ + GEN_READ_HELPER(tname, type) \ + GEN_SET_HELPER(tname, type) \ + GEN_READ_ACQUIRE_HELPER(tname, type) \ + GEN_SET_RELEASE_HELPER(tname, type) \ -__rust_helper void rust_helper_atomic_i16_set_release(s16 *ptr, s16 val) -{ - smp_store_release(ptr, val); -} +GEN_READ_SET_HELPERS(i8, s8) +GEN_READ_SET_HELPERS(i16, s16) /* * xchg helpers depend on ARCH_SUPPORTS_ATOMIC_RMW and on the -- cgit v1.2.3 From f92d22b00e3f75fad2efd965b20d49b4e763b792 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 3 Mar 2026 12:16:52 -0800 Subject: rust: helpers: Generify the definitions of rust_helper_*_xchg* To support atomic pointers, more xchg helpers will be introduced, hence define macros to generate these helpers to ease the introduction of the future helpers. Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260117122243.24404-3-boqun.feng@gmail.com Link: https://patch.msgid.link/20260303201701.12204-5-boqun@kernel.org --- rust/helpers/atomic_ext.c | 48 ++++++++++++----------------------------------- 1 file changed, 12 insertions(+), 36 deletions(-) diff --git a/rust/helpers/atomic_ext.c b/rust/helpers/atomic_ext.c index f471c1ff123d..c5f665bbe785 100644 --- a/rust/helpers/atomic_ext.c +++ b/rust/helpers/atomic_ext.c @@ -44,45 +44,21 @@ GEN_READ_SET_HELPERS(i16, s16) * The architectures that currently support Rust (x86_64, armv7, * arm64, riscv, and loongarch) satisfy these requirements. */ -__rust_helper s8 rust_helper_atomic_i8_xchg(s8 *ptr, s8 new) -{ - return xchg(ptr, new); -} - -__rust_helper s16 rust_helper_atomic_i16_xchg(s16 *ptr, s16 new) -{ - return xchg(ptr, new); -} - -__rust_helper s8 rust_helper_atomic_i8_xchg_acquire(s8 *ptr, s8 new) -{ - return xchg_acquire(ptr, new); -} - -__rust_helper s16 rust_helper_atomic_i16_xchg_acquire(s16 *ptr, s16 new) -{ - return xchg_acquire(ptr, new); -} - -__rust_helper s8 rust_helper_atomic_i8_xchg_release(s8 *ptr, s8 new) -{ - return xchg_release(ptr, new); -} - -__rust_helper s16 rust_helper_atomic_i16_xchg_release(s16 *ptr, s16 new) -{ - return xchg_release(ptr, new); +#define GEN_XCHG_HELPER(tname, type, suffix) \ +__rust_helper type \ +rust_helper_atomic_##tname##_xchg##suffix(type *ptr, type new) \ +{ \ + return xchg##suffix(ptr, new); \ } -__rust_helper s8 rust_helper_atomic_i8_xchg_relaxed(s8 *ptr, s8 new) -{ - return xchg_relaxed(ptr, new); -} +#define GEN_XCHG_HELPERS(tname, type) \ + GEN_XCHG_HELPER(tname, type, ) \ + GEN_XCHG_HELPER(tname, type, _acquire) \ + GEN_XCHG_HELPER(tname, type, _release) \ + GEN_XCHG_HELPER(tname, type, _relaxed) \ -__rust_helper s16 rust_helper_atomic_i16_xchg_relaxed(s16 *ptr, s16 new) -{ - return xchg_relaxed(ptr, new); -} +GEN_XCHG_HELPERS(i8, s8) +GEN_XCHG_HELPERS(i16, s16) /* * try_cmpxchg helpers depend on ARCH_SUPPORTS_ATOMIC_RMW and on the -- cgit v1.2.3 From a92236bf239cc01fd40d9cbe98fc8b9924c42a82 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 3 Mar 2026 12:16:53 -0800 Subject: rust: helpers: Generify the definitions of rust_helper_*_cmpxchg* To support atomic pointers, more cmpxchg helpers will be introduced, hence define macros to generate these helpers to ease the introduction of the future helpers. Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260117122243.24404-4-boqun.feng@gmail.com Link: https://patch.msgid.link/20260303201701.12204-6-boqun@kernel.org --- rust/helpers/atomic_ext.c | 48 ++++++++++++----------------------------------- 1 file changed, 12 insertions(+), 36 deletions(-) diff --git a/rust/helpers/atomic_ext.c b/rust/helpers/atomic_ext.c index c5f665bbe785..240218e2e708 100644 --- a/rust/helpers/atomic_ext.c +++ b/rust/helpers/atomic_ext.c @@ -67,42 +67,18 @@ GEN_XCHG_HELPERS(i16, s16) * The architectures that currently support Rust (x86_64, armv7, * arm64, riscv, and loongarch) satisfy these requirements. */ -__rust_helper bool rust_helper_atomic_i8_try_cmpxchg(s8 *ptr, s8 *old, s8 new) -{ - return try_cmpxchg(ptr, old, new); -} - -__rust_helper bool rust_helper_atomic_i16_try_cmpxchg(s16 *ptr, s16 *old, s16 new) -{ - return try_cmpxchg(ptr, old, new); -} - -__rust_helper bool rust_helper_atomic_i8_try_cmpxchg_acquire(s8 *ptr, s8 *old, s8 new) -{ - return try_cmpxchg_acquire(ptr, old, new); -} - -__rust_helper bool rust_helper_atomic_i16_try_cmpxchg_acquire(s16 *ptr, s16 *old, s16 new) -{ - return try_cmpxchg_acquire(ptr, old, new); -} - -__rust_helper bool rust_helper_atomic_i8_try_cmpxchg_release(s8 *ptr, s8 *old, s8 new) -{ - return try_cmpxchg_release(ptr, old, new); -} - -__rust_helper bool rust_helper_atomic_i16_try_cmpxchg_release(s16 *ptr, s16 *old, s16 new) -{ - return try_cmpxchg_release(ptr, old, new); +#define GEN_TRY_CMPXCHG_HELPER(tname, type, suffix) \ +__rust_helper bool \ +rust_helper_atomic_##tname##_try_cmpxchg##suffix(type *ptr, type *old, type new)\ +{ \ + return try_cmpxchg##suffix(ptr, old, new); \ } -__rust_helper bool rust_helper_atomic_i8_try_cmpxchg_relaxed(s8 *ptr, s8 *old, s8 new) -{ - return try_cmpxchg_relaxed(ptr, old, new); -} +#define GEN_TRY_CMPXCHG_HELPERS(tname, type) \ + GEN_TRY_CMPXCHG_HELPER(tname, type, ) \ + GEN_TRY_CMPXCHG_HELPER(tname, type, _acquire) \ + GEN_TRY_CMPXCHG_HELPER(tname, type, _release) \ + GEN_TRY_CMPXCHG_HELPER(tname, type, _relaxed) \ -__rust_helper bool rust_helper_atomic_i16_try_cmpxchg_relaxed(s16 *ptr, s16 *old, s16 new) -{ - return try_cmpxchg_relaxed(ptr, old, new); -} +GEN_TRY_CMPXCHG_HELPERS(i8, s8) +GEN_TRY_CMPXCHG_HELPERS(i16, s16) -- cgit v1.2.3 From 553c02fb588d4310193eba80f75b43b20befd1d2 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 3 Mar 2026 12:16:54 -0800 Subject: rust: sync: atomic: Clarify the need of CONFIG_ARCH_SUPPORTS_ATOMIC_RMW Currently, since all the architectures that support Rust all have CONFIG_ARCH_SUPPORTS_ATOMIC_RMW selected, the helpers of atomic load/store on i8 and i16 relies on CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y. It's generally fine since most of architectures support that. The plan for CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=n architectures is adding their (probably lock-based) atomic load/store for i8 and i16 as their atomic_{read,set}() and atomic64_{read,set}() counterpart when they plans to support Rust. Hence use a statis_assert!() to check this and remind the future us the need of the helpers. This is more clear than the #[cfg] on impl blocks of i8 and i16. Suggested-by: Dirk Behme Suggested-by: Benno Lossin Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Gary Guo Link: https://patch.msgid.link/20260120140503.62804-2-boqun.feng@gmail.com Link: https://patch.msgid.link/20260303201701.12204-7-boqun@kernel.org --- rust/kernel/sync/atomic/internal.rs | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/rust/kernel/sync/atomic/internal.rs b/rust/kernel/sync/atomic/internal.rs index 0dac58bca2b3..ef516bcb02ee 100644 --- a/rust/kernel/sync/atomic/internal.rs +++ b/rust/kernel/sync/atomic/internal.rs @@ -37,16 +37,23 @@ pub trait AtomicImpl: Sized + Send + Copy + private::Sealed { type Delta; } -// The current helpers of load/store uses `{WRITE,READ}_ONCE()` hence the atomicity is only -// guaranteed against read-modify-write operations if the architecture supports native atomic RmW. -#[cfg(CONFIG_ARCH_SUPPORTS_ATOMIC_RMW)] +// The current helpers of load/store of atomic `i8` and `i16` use `{WRITE,READ}_ONCE()` hence the +// atomicity is only guaranteed against read-modify-write operations if the architecture supports +// native atomic RmW. +// +// In the future when a CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=n architecture plans to support Rust, the +// load/store helpers that guarantee atomicity against RmW operations (usually via a lock) need to +// be added. +crate::static_assert!( + cfg!(CONFIG_ARCH_SUPPORTS_ATOMIC_RMW), + "The current implementation of atomic i8/i16/ptr relies on the architecure being \ + ARCH_SUPPORTS_ATOMIC_RMW" +); + impl AtomicImpl for i8 { type Delta = Self; } -// The current helpers of load/store uses `{WRITE,READ}_ONCE()` hence the atomicity is only -// guaranteed against read-modify-write operations if the architecture supports native atomic RmW. -#[cfg(CONFIG_ARCH_SUPPORTS_ATOMIC_RMW)] impl AtomicImpl for i16 { type Delta = Self; } -- cgit v1.2.3 From ac8f06ade38a49f7725cc219fc6e90d1d4708d2b Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 3 Mar 2026 12:16:55 -0800 Subject: rust: sync: atomic: Add Atomic<*{mut,const} T> support Atomic pointer support is an important piece of synchronization algorithm, e.g. RCU, hence provide the support for that. Note that instead of relying on atomic_long or the implementation of `Atomic`, a new set of helpers (atomic_ptr_*) is introduced for atomic pointer specifically, this is because ptr2int casting would lose the provenance of a pointer and even though in theory there are a few tricks the provenance can be restored, it'll still be a simpler implementation if C could provide atomic pointers directly. The side effects of this approach are: we don't have the arithmetic and logical operations for pointers yet and the current implementation only works on ARCH_SUPPORTS_ATOMIC_RMW architectures, but these are implementation issues and can be added later. Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Gary Guo Reviewed-by: FUJITA Tomonori Link: https://patch.msgid.link/20260120140503.62804-3-boqun.feng@gmail.com Link: https://patch.msgid.link/20260303201701.12204-8-boqun@kernel.org --- rust/helpers/atomic_ext.c | 3 +++ rust/kernel/sync/atomic.rs | 12 +++++++++- rust/kernel/sync/atomic/internal.rs | 24 ++++++++++++------- rust/kernel/sync/atomic/predefine.rs | 46 ++++++++++++++++++++++++++++++++++++ 4 files changed, 75 insertions(+), 10 deletions(-) diff --git a/rust/helpers/atomic_ext.c b/rust/helpers/atomic_ext.c index 240218e2e708..c267d5190529 100644 --- a/rust/helpers/atomic_ext.c +++ b/rust/helpers/atomic_ext.c @@ -36,6 +36,7 @@ __rust_helper void rust_helper_atomic_##tname##_set_release(type *ptr, type val) GEN_READ_SET_HELPERS(i8, s8) GEN_READ_SET_HELPERS(i16, s16) +GEN_READ_SET_HELPERS(ptr, const void *) /* * xchg helpers depend on ARCH_SUPPORTS_ATOMIC_RMW and on the @@ -59,6 +60,7 @@ rust_helper_atomic_##tname##_xchg##suffix(type *ptr, type new) \ GEN_XCHG_HELPERS(i8, s8) GEN_XCHG_HELPERS(i16, s16) +GEN_XCHG_HELPERS(ptr, const void *) /* * try_cmpxchg helpers depend on ARCH_SUPPORTS_ATOMIC_RMW and on the @@ -82,3 +84,4 @@ rust_helper_atomic_##tname##_try_cmpxchg##suffix(type *ptr, type *old, type new) GEN_TRY_CMPXCHG_HELPERS(i8, s8) GEN_TRY_CMPXCHG_HELPERS(i16, s16) +GEN_TRY_CMPXCHG_HELPERS(ptr, const void *) diff --git a/rust/kernel/sync/atomic.rs b/rust/kernel/sync/atomic.rs index e262b0cb53ae..f4c3ab15c8a7 100644 --- a/rust/kernel/sync/atomic.rs +++ b/rust/kernel/sync/atomic.rs @@ -51,6 +51,10 @@ use ordering::OrderingType; #[repr(transparent)] pub struct Atomic(AtomicRepr); +// SAFETY: `Atomic` is safe to transfer between execution contexts because of the safety +// requirement of `AtomicType`. +unsafe impl Send for Atomic {} + // SAFETY: `Atomic` is safe to share among execution contexts because all accesses are atomic. unsafe impl Sync for Atomic {} @@ -68,6 +72,11 @@ unsafe impl Sync for Atomic {} /// /// - [`Self`] must have the same size and alignment as [`Self::Repr`]. /// - [`Self`] must be [round-trip transmutable] to [`Self::Repr`]. +/// - [`Self`] must be safe to transfer between execution contexts, if it's [`Send`], this is +/// automatically satisfied. The exception is pointer types that are even though marked as +/// `!Send` (e.g. raw pointers and [`NonNull`]) but requiring `unsafe` to do anything +/// meaningful on them. This is because transferring pointer values between execution contexts is +/// safe as long as the actual `unsafe` dereferencing is justified. /// /// Note that this is more relaxed than requiring the bi-directional transmutability (i.e. /// [`transmute()`] is always sound between `U` and `T`) because of the support for atomic @@ -108,7 +117,8 @@ unsafe impl Sync for Atomic {} /// [`transmute()`]: core::mem::transmute /// [round-trip transmutable]: AtomicType#round-trip-transmutability /// [Examples]: AtomicType#examples -pub unsafe trait AtomicType: Sized + Send + Copy { +/// [`NonNull`]: core::ptr::NonNull +pub unsafe trait AtomicType: Sized + Copy { /// The backing atomic implementation type. type Repr: AtomicImpl; } diff --git a/rust/kernel/sync/atomic/internal.rs b/rust/kernel/sync/atomic/internal.rs index ef516bcb02ee..e301db4eaf91 100644 --- a/rust/kernel/sync/atomic/internal.rs +++ b/rust/kernel/sync/atomic/internal.rs @@ -7,6 +7,7 @@ use crate::bindings; use crate::macros::paste; use core::cell::UnsafeCell; +use ffi::c_void; mod private { /// Sealed trait marker to disable customized impls on atomic implementation traits. @@ -14,10 +15,11 @@ mod private { } // The C side supports atomic primitives only for `i32` and `i64` (`atomic_t` and `atomic64_t`), -// while the Rust side also layers provides atomic support for `i8` and `i16` -// on top of lower-level C primitives. +// while the Rust side also provides atomic support for `i8`, `i16` and `*const c_void` on top of +// lower-level C primitives. impl private::Sealed for i8 {} impl private::Sealed for i16 {} +impl private::Sealed for *const c_void {} impl private::Sealed for i32 {} impl private::Sealed for i64 {} @@ -26,10 +28,10 @@ impl private::Sealed for i64 {} /// This trait is sealed, and only types that map directly to the C side atomics /// or can be implemented with lower-level C primitives are allowed to implement this: /// -/// - `i8` and `i16` are implemented with lower-level C primitives. +/// - `i8`, `i16` and `*const c_void` are implemented with lower-level C primitives. /// - `i32` map to `atomic_t` /// - `i64` map to `atomic64_t` -pub trait AtomicImpl: Sized + Send + Copy + private::Sealed { +pub trait AtomicImpl: Sized + Copy + private::Sealed { /// The type of the delta in arithmetic or logical operations. /// /// For example, in `atomic_add(ptr, v)`, it's the type of `v`. Usually it's the same type of @@ -37,9 +39,9 @@ pub trait AtomicImpl: Sized + Send + Copy + private::Sealed { type Delta; } -// The current helpers of load/store of atomic `i8` and `i16` use `{WRITE,READ}_ONCE()` hence the -// atomicity is only guaranteed against read-modify-write operations if the architecture supports -// native atomic RmW. +// The current helpers of load/store of atomic `i8`, `i16` and pointers use `{WRITE,READ}_ONCE()` +// hence the atomicity is only guaranteed against read-modify-write operations if the architecture +// supports native atomic RmW. // // In the future when a CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=n architecture plans to support Rust, the // load/store helpers that guarantee atomicity against RmW operations (usually via a lock) need to @@ -58,6 +60,10 @@ impl AtomicImpl for i16 { type Delta = Self; } +impl AtomicImpl for *const c_void { + type Delta = isize; +} + // `atomic_t` implements atomic operations on `i32`. impl AtomicImpl for i32 { type Delta = Self; @@ -269,7 +275,7 @@ macro_rules! declare_and_impl_atomic_methods { } declare_and_impl_atomic_methods!( - [ i8 => atomic_i8, i16 => atomic_i16, i32 => atomic, i64 => atomic64 ] + [ i8 => atomic_i8, i16 => atomic_i16, *const c_void => atomic_ptr, i32 => atomic, i64 => atomic64 ] /// Basic atomic operations pub trait AtomicBasicOps { /// Atomic read (load). @@ -287,7 +293,7 @@ declare_and_impl_atomic_methods!( ); declare_and_impl_atomic_methods!( - [ i8 => atomic_i8, i16 => atomic_i16, i32 => atomic, i64 => atomic64 ] + [ i8 => atomic_i8, i16 => atomic_i16, *const c_void => atomic_ptr, i32 => atomic, i64 => atomic64 ] /// Exchange and compare-and-exchange atomic operations pub trait AtomicExchangeOps { /// Atomic exchange. diff --git a/rust/kernel/sync/atomic/predefine.rs b/rust/kernel/sync/atomic/predefine.rs index 67a0406d3ea4..6f2c60529b64 100644 --- a/rust/kernel/sync/atomic/predefine.rs +++ b/rust/kernel/sync/atomic/predefine.rs @@ -4,6 +4,7 @@ use crate::static_assert; use core::mem::{align_of, size_of}; +use ffi::c_void; // Ensure size and alignment requirements are checked. static_assert!(size_of::() == size_of::()); @@ -28,6 +29,26 @@ unsafe impl super::AtomicType for i16 { type Repr = i16; } +// SAFETY: +// +// - `*mut T` has the same size and alignment with `*const c_void`, and is round-trip +// transmutable to `*const c_void`. +// - `*mut T` is safe to transfer between execution contexts. See the safety requirement of +// [`AtomicType`]. +unsafe impl super::AtomicType for *mut T { + type Repr = *const c_void; +} + +// SAFETY: +// +// - `*const T` has the same size and alignment with `*const c_void`, and is round-trip +// transmutable to `*const c_void`. +// - `*const T` is safe to transfer between execution contexts. See the safety requirement of +// [`AtomicType`]. +unsafe impl super::AtomicType for *const T { + type Repr = *const c_void; +} + // SAFETY: `i32` has the same size and alignment with itself, and is round-trip transmutable to // itself. unsafe impl super::AtomicType for i32 { @@ -226,4 +247,29 @@ mod tests { assert_eq!(false, x.load(Relaxed)); assert_eq!(Ok(false), x.cmpxchg(false, true, Full)); } + + #[test] + fn atomic_ptr_tests() { + let mut v = 42; + let mut u = 43; + let x = Atomic::new(&raw mut v); + + assert_eq!(x.load(Acquire), &raw mut v); + assert_eq!(x.cmpxchg(&raw mut u, &raw mut u, Relaxed), Err(&raw mut v)); + assert_eq!(x.cmpxchg(&raw mut v, &raw mut u, Relaxed), Ok(&raw mut v)); + assert_eq!(x.load(Relaxed), &raw mut u); + + let x = Atomic::new(&raw const v); + + assert_eq!(x.load(Acquire), &raw const v); + assert_eq!( + x.cmpxchg(&raw const u, &raw const u, Relaxed), + Err(&raw const v) + ); + assert_eq!( + x.cmpxchg(&raw const v, &raw const u, Relaxed), + Ok(&raw const v) + ); + assert_eq!(x.load(Relaxed), &raw const u); + } } -- cgit v1.2.3 From ec6fc66ac39b1a6c0b06a828eff8d21928e56b60 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 3 Mar 2026 12:16:56 -0800 Subject: rust: sync: atomic: Add performance-optimal Flag type for atomic booleans Add AtomicFlag type for boolean flags. Document when AtomicFlag is generally preferable to Atomic: in particular, when RMW operations such as xchg()/cmpxchg() may be used and minimizing memory usage is not the top priority. On some architectures without byte-sized RMW instructions, Atomic can be slower for RMW operations. Signed-off-by: FUJITA Tomonori Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Gary Guo Link: https://patch.msgid.link/20260129122622.3896144-2-tomo@aliasing.net Link: https://patch.msgid.link/20260303201701.12204-9-boqun@kernel.org --- rust/kernel/sync/atomic.rs | 125 +++++++++++++++++++++++++++++++++++ rust/kernel/sync/atomic/predefine.rs | 17 +++++ 2 files changed, 142 insertions(+) diff --git a/rust/kernel/sync/atomic.rs b/rust/kernel/sync/atomic.rs index f4c3ab15c8a7..f80cebce5bc1 100644 --- a/rust/kernel/sync/atomic.rs +++ b/rust/kernel/sync/atomic.rs @@ -578,3 +578,128 @@ where unsafe { from_repr(ret) } } } + +#[cfg(any(CONFIG_X86_64, CONFIG_UML, CONFIG_ARM, CONFIG_ARM64))] +#[repr(C)] +#[derive(Clone, Copy)] +struct Flag { + bool_field: bool, +} + +/// # Invariants +/// +/// `padding` must be all zeroes. +#[cfg(not(any(CONFIG_X86_64, CONFIG_UML, CONFIG_ARM, CONFIG_ARM64)))] +#[repr(C, align(4))] +#[derive(Clone, Copy)] +struct Flag { + #[cfg(target_endian = "big")] + padding: [u8; 3], + bool_field: bool, + #[cfg(target_endian = "little")] + padding: [u8; 3], +} + +impl Flag { + #[inline(always)] + const fn new(b: bool) -> Self { + // INVARIANT: `padding` is all zeroes. + Self { + bool_field: b, + #[cfg(not(any(CONFIG_X86_64, CONFIG_UML, CONFIG_ARM, CONFIG_ARM64)))] + padding: [0; 3], + } + } +} + +// SAFETY: `Flag` and `Repr` have the same size and alignment, and `Flag` is round-trip +// transmutable to the selected representation (`i8` or `i32`). +unsafe impl AtomicType for Flag { + #[cfg(any(CONFIG_X86_64, CONFIG_UML, CONFIG_ARM, CONFIG_ARM64))] + type Repr = i8; + #[cfg(not(any(CONFIG_X86_64, CONFIG_UML, CONFIG_ARM, CONFIG_ARM64)))] + type Repr = i32; +} + +/// An atomic flag type intended to be backed by performance-optimal integer type. +/// +/// The backing integer type is an implementation detail; it may vary by architecture and change +/// in the future. +/// +/// [`AtomicFlag`] is generally preferable to [`Atomic`] when you need read-modify-write +/// (RMW) operations (e.g. [`Atomic::xchg()`]/[`Atomic::cmpxchg()`]) or when [`Atomic`] does +/// not save memory due to padding. On some architectures that do not support byte-sized atomic +/// RMW operations, RMW operations on [`Atomic`] are slower. +/// +/// If you only use [`Atomic::load()`]/[`Atomic::store()`], [`Atomic`] is fine. +/// +/// # Examples +/// +/// ``` +/// use kernel::sync::atomic::{AtomicFlag, Relaxed}; +/// +/// let flag = AtomicFlag::new(false); +/// assert_eq!(false, flag.load(Relaxed)); +/// flag.store(true, Relaxed); +/// assert_eq!(true, flag.load(Relaxed)); +/// ``` +pub struct AtomicFlag(Atomic); + +impl AtomicFlag { + /// Creates a new atomic flag. + #[inline(always)] + pub const fn new(b: bool) -> Self { + Self(Atomic::new(Flag::new(b))) + } + + /// Returns a mutable reference to the underlying flag as a [`bool`]. + /// + /// This is safe because the mutable reference of the atomic flag guarantees exclusive access. + /// + /// # Examples + /// + /// ``` + /// use kernel::sync::atomic::{AtomicFlag, Relaxed}; + /// + /// let mut atomic_flag = AtomicFlag::new(false); + /// assert_eq!(false, atomic_flag.load(Relaxed)); + /// *atomic_flag.get_mut() = true; + /// assert_eq!(true, atomic_flag.load(Relaxed)); + /// ``` + #[inline(always)] + pub fn get_mut(&mut self) -> &mut bool { + &mut self.0.get_mut().bool_field + } + + /// Loads the value from the atomic flag. + #[inline(always)] + pub fn load(&self, o: Ordering) -> bool { + self.0.load(o).bool_field + } + + /// Stores a value to the atomic flag. + #[inline(always)] + pub fn store(&self, v: bool, o: Ordering) { + self.0.store(Flag::new(v), o); + } + + /// Stores a value to the atomic flag and returns the previous value. + #[inline(always)] + pub fn xchg(&self, new: bool, o: Ordering) -> bool { + self.0.xchg(Flag::new(new), o).bool_field + } + + /// Store a value to the atomic flag if the current value is equal to `old`. + #[inline(always)] + pub fn cmpxchg( + &self, + old: bool, + new: bool, + o: Ordering, + ) -> Result { + match self.0.cmpxchg(Flag::new(old), Flag::new(new), o) { + Ok(_) => Ok(old), + Err(f) => Err(f.bool_field), + } + } +} diff --git a/rust/kernel/sync/atomic/predefine.rs b/rust/kernel/sync/atomic/predefine.rs index 6f2c60529b64..ceb3caed9784 100644 --- a/rust/kernel/sync/atomic/predefine.rs +++ b/rust/kernel/sync/atomic/predefine.rs @@ -272,4 +272,21 @@ mod tests { ); assert_eq!(x.load(Relaxed), &raw const u); } + + #[test] + fn atomic_flag_tests() { + let mut flag = AtomicFlag::new(false); + + assert_eq!(false, flag.load(Relaxed)); + + *flag.get_mut() = true; + assert_eq!(true, flag.load(Relaxed)); + + assert_eq!(true, flag.xchg(false, Relaxed)); + assert_eq!(false, flag.load(Relaxed)); + + *flag.get_mut() = true; + assert_eq!(Ok(true), flag.cmpxchg(true, false, Full)); + assert_eq!(false, flag.load(Relaxed)); + } } -- cgit v1.2.3 From 282866207020b15c2afc4d43b1ca0c5d96c9032d Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 3 Mar 2026 12:16:57 -0800 Subject: rust: list: Use AtomicFlag in AtomicTracker Make AtomicTracker use AtomicFlag instead of Atomic to avoid slow byte-sized RMWs on architectures that don't support them. Signed-off-by: FUJITA Tomonori Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Gary Guo Link: https://patch.msgid.link/20260129122622.3896144-3-tomo@aliasing.net Link: https://patch.msgid.link/20260303201701.12204-10-boqun@kernel.org --- rust/kernel/list/arc.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rust/kernel/list/arc.rs b/rust/kernel/list/arc.rs index 2282f33913ee..5e84f500a3fe 100644 --- a/rust/kernel/list/arc.rs +++ b/rust/kernel/list/arc.rs @@ -6,7 +6,7 @@ use crate::alloc::{AllocError, Flags}; use crate::prelude::*; -use crate::sync::atomic::{ordering, Atomic}; +use crate::sync::atomic::{ordering, AtomicFlag}; use crate::sync::{Arc, ArcBorrow, UniqueArc}; use core::marker::PhantomPinned; use core::ops::Deref; @@ -469,7 +469,7 @@ where /// If the boolean is `false`, then there is no [`ListArc`] for this value. #[repr(transparent)] pub struct AtomicTracker { - inner: Atomic, + inner: AtomicFlag, // This value needs to be pinned to justify the INVARIANT: comment in `AtomicTracker::new`. _pin: PhantomPinned, } @@ -480,12 +480,12 @@ impl AtomicTracker { // INVARIANT: Pin-init initializers can't be used on an existing `Arc`, so this value will // not be constructed in an `Arc` that already has a `ListArc`. Self { - inner: Atomic::new(false), + inner: AtomicFlag::new(false), _pin: PhantomPinned, } } - fn project_inner(self: Pin<&mut Self>) -> &mut Atomic { + fn project_inner(self: Pin<&mut Self>) -> &mut AtomicFlag { // SAFETY: The `inner` field is not structurally pinned, so we may obtain a mutable // reference to it even if we only have a pinned reference to `self`. unsafe { &mut Pin::into_inner_unchecked(self).inner } -- cgit v1.2.3 From e2f9c86f33abb89d3e52436018f58e5fb951cc04 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 3 Mar 2026 12:16:58 -0800 Subject: rust: sync: atomic: Add atomic operation helpers over raw pointers In order to synchronize with C or external memory, atomic operations over raw pointers are need. Although there is already an `Atomic::from_ptr()` to provide a `&Atomic`, it's more convenient to have helpers that directly perform atomic operations on raw pointers. Hence a few are added, which are basically an `Atomic::from_ptr().op()` wrapper. Note: for naming, since `atomic_xchg()` and `atomic_cmpxchg()` have a conflict naming to 32bit C atomic xchg/cmpxchg, hence the helpers are just named as `xchg()` and `cmpxchg()`. For `atomic_load()` and `atomic_store()`, their 32bit C counterparts are `atomic_read()` and `atomic_set()`, so keep the `atomic_` prefix. [boqun: Fix typo spotted by Alice and fix broken sentence spotted by Gary] Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Alice Ryhl Reviewed-by: Gary Guo Link: https://patch.msgid.link/20260120115207.55318-3-boqun.feng@gmail.com Link: https://patch.msgid.link/20260303201701.12204-11-boqun@kernel.org --- rust/kernel/sync/atomic.rs | 104 +++++++++++++++++++++++++++++++++++ rust/kernel/sync/atomic/predefine.rs | 46 ++++++++++++++++ 2 files changed, 150 insertions(+) diff --git a/rust/kernel/sync/atomic.rs b/rust/kernel/sync/atomic.rs index f80cebce5bc1..1bb1fc2be177 100644 --- a/rust/kernel/sync/atomic.rs +++ b/rust/kernel/sync/atomic.rs @@ -703,3 +703,107 @@ impl AtomicFlag { } } } + +/// Atomic load over raw pointers. +/// +/// This function provides a short-cut of `Atomic::from_ptr().load(..)`, and can be used to work +/// with C side on synchronizations: +/// +/// - `atomic_load(.., Relaxed)` maps to `READ_ONCE()` when used for inter-thread communication. +/// - `atomic_load(.., Acquire)` maps to `smp_load_acquire()`. +/// +/// # Safety +/// +/// - `ptr` is a valid pointer to `T` and aligned to `align_of::()`. +/// - If there is a concurrent store from kernel (C or Rust), it has to be atomic. +#[doc(alias("READ_ONCE", "smp_load_acquire"))] +#[inline(always)] +pub unsafe fn atomic_load( + ptr: *mut T, + o: Ordering, +) -> T +where + T::Repr: AtomicBasicOps, +{ + // SAFETY: Per the function safety requirement, `ptr` is valid and aligned to + // `align_of::()`, and all concurrent stores from kernel are atomic, hence no data race per + // LKMM. + unsafe { Atomic::from_ptr(ptr) }.load(o) +} + +/// Atomic store over raw pointers. +/// +/// This function provides a short-cut of `Atomic::from_ptr().load(..)`, and can be used to work +/// with C side on synchronizations: +/// +/// - `atomic_store(.., Relaxed)` maps to `WRITE_ONCE()` when used for inter-thread communication. +/// - `atomic_load(.., Release)` maps to `smp_store_release()`. +/// +/// # Safety +/// +/// - `ptr` is a valid pointer to `T` and aligned to `align_of::()`. +/// - If there is a concurrent access from kernel (C or Rust), it has to be atomic. +#[doc(alias("WRITE_ONCE", "smp_store_release"))] +#[inline(always)] +pub unsafe fn atomic_store( + ptr: *mut T, + v: T, + o: Ordering, +) where + T::Repr: AtomicBasicOps, +{ + // SAFETY: Per the function safety requirement, `ptr` is valid and aligned to + // `align_of::()`, and all concurrent accesses from kernel are atomic, hence no data race + // per LKMM. + unsafe { Atomic::from_ptr(ptr) }.store(v, o); +} + +/// Atomic exchange over raw pointers. +/// +/// This function provides a short-cut of `Atomic::from_ptr().xchg(..)`, and can be used to work +/// with C side on synchronizations. +/// +/// # Safety +/// +/// - `ptr` is a valid pointer to `T` and aligned to `align_of::()`. +/// - If there is a concurrent access from kernel (C or Rust), it has to be atomic. +#[inline(always)] +pub unsafe fn xchg( + ptr: *mut T, + new: T, + o: Ordering, +) -> T +where + T::Repr: AtomicExchangeOps, +{ + // SAFETY: Per the function safety requirement, `ptr` is valid and aligned to + // `align_of::()`, and all concurrent accesses from kernel are atomic, hence no data race + // per LKMM. + unsafe { Atomic::from_ptr(ptr) }.xchg(new, o) +} + +/// Atomic compare and exchange over raw pointers. +/// +/// This function provides a short-cut of `Atomic::from_ptr().cmpxchg(..)`, and can be used to work +/// with C side on synchronizations. +/// +/// # Safety +/// +/// - `ptr` is a valid pointer to `T` and aligned to `align_of::()`. +/// - If there is a concurrent access from kernel (C or Rust), it has to be atomic. +#[doc(alias("try_cmpxchg"))] +#[inline(always)] +pub unsafe fn cmpxchg( + ptr: *mut T, + old: T, + new: T, + o: Ordering, +) -> Result +where + T::Repr: AtomicExchangeOps, +{ + // SAFETY: Per the function safety requirement, `ptr` is valid and aligned to + // `align_of::()`, and all concurrent accesses from kernel are atomic, hence no data race + // per LKMM. + unsafe { Atomic::from_ptr(ptr) }.cmpxchg(old, new, o) +} diff --git a/rust/kernel/sync/atomic/predefine.rs b/rust/kernel/sync/atomic/predefine.rs index ceb3caed9784..1d53834fcb12 100644 --- a/rust/kernel/sync/atomic/predefine.rs +++ b/rust/kernel/sync/atomic/predefine.rs @@ -178,6 +178,14 @@ mod tests { assert_eq!(v, x.load(Relaxed)); }); + + for_each_type!(42 in [i8, i16, i32, i64, u32, u64, isize, usize] |v| { + let x = Atomic::new(v); + let ptr = x.as_ptr(); + + // SAFETY: `ptr` is a valid pointer and no concurrent access. + assert_eq!(v, unsafe { atomic_load(ptr, Relaxed) }); + }); } #[test] @@ -188,6 +196,17 @@ mod tests { x.store(v, Release); assert_eq!(v, x.load(Acquire)); }); + + for_each_type!(42 in [i8, i16, i32, i64, u32, u64, isize, usize] |v| { + let x = Atomic::new(0); + let ptr = x.as_ptr(); + + // SAFETY: `ptr` is a valid pointer and no concurrent access. + unsafe { atomic_store(ptr, v, Release) }; + + // SAFETY: `ptr` is a valid pointer and no concurrent access. + assert_eq!(v, unsafe { atomic_load(ptr, Acquire) }); + }); } #[test] @@ -201,6 +220,18 @@ mod tests { assert_eq!(old, x.xchg(new, Full)); assert_eq!(new, x.load(Relaxed)); }); + + for_each_type!(42 in [i8, i16, i32, i64, u32, u64, isize, usize] |v| { + let x = Atomic::new(v); + let ptr = x.as_ptr(); + + let old = v; + let new = v + 1; + + // SAFETY: `ptr` is a valid pointer and no concurrent access. + assert_eq!(old, unsafe { xchg(ptr, new, Full) }); + assert_eq!(new, x.load(Relaxed)); + }); } #[test] @@ -216,6 +247,21 @@ mod tests { assert_eq!(Ok(old), x.cmpxchg(old, new, Relaxed)); assert_eq!(new, x.load(Relaxed)); }); + + for_each_type!(42 in [i8, i16, i32, i64, u32, u64, isize, usize] |v| { + let x = Atomic::new(v); + let ptr = x.as_ptr(); + + let old = v; + let new = v + 1; + + // SAFETY: `ptr` is a valid pointer and no concurrent access. + assert_eq!(Err(old), unsafe { cmpxchg(ptr, new, new, Full) }); + assert_eq!(old, x.load(Relaxed)); + // SAFETY: `ptr` is a valid pointer and no concurrent access. + assert_eq!(Ok(old), unsafe { cmpxchg(ptr, old, new, Relaxed) }); + assert_eq!(new, x.load(Relaxed)); + }); } #[test] -- cgit v1.2.3 From c49cf341090b53d2afa4dc7c8007ddeefbb3b37f Mon Sep 17 00:00:00 2001 From: Andreas Hindborg Date: Tue, 3 Mar 2026 12:16:59 -0800 Subject: rust: sync: atomic: Add fetch_sub() Add `Atomic::fetch_sub()` with implementation and documentation in line with existing `Atomic::fetch_add()` implementation. Signed-off-by: Andreas Hindborg Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Alice Ryhl Link: https://patch.msgid.link/20260220-atomic-sub-v3-1-e63cbed1d2aa@kernel.org Link: https://patch.msgid.link/20260303201701.12204-12-boqun@kernel.org --- rust/kernel/sync/atomic.rs | 43 +++++++++++++++++++++++++++++++++++++ rust/kernel/sync/atomic/internal.rs | 5 +++++ 2 files changed, 48 insertions(+) diff --git a/rust/kernel/sync/atomic.rs b/rust/kernel/sync/atomic.rs index 1bb1fc2be177..545a8d37ba78 100644 --- a/rust/kernel/sync/atomic.rs +++ b/rust/kernel/sync/atomic.rs @@ -577,6 +577,49 @@ where // SAFETY: `ret` comes from reading `self.0`, which is a valid `T` per type invariants. unsafe { from_repr(ret) } } + + /// Atomic fetch and subtract. + /// + /// Atomically updates `*self` to `(*self).wrapping_sub(v)`, and returns the value of `*self` + /// before the update. + /// + /// # Examples + /// + /// ``` + /// use kernel::sync::atomic::{Atomic, Acquire, Full, Relaxed}; + /// + /// let x = Atomic::new(42); + /// assert_eq!(42, x.load(Relaxed)); + /// assert_eq!(42, x.fetch_sub(12, Acquire)); + /// assert_eq!(30, x.load(Relaxed)); + /// + /// let x = Atomic::new(42); + /// assert_eq!(42, x.load(Relaxed)); + /// assert_eq!(42, x.fetch_sub(12, Full)); + /// assert_eq!(30, x.load(Relaxed)); + /// ``` + #[inline(always)] + pub fn fetch_sub(&self, v: Rhs, _: Ordering) -> T + where + // Types that support addition also support subtraction. + T: AtomicAdd, + { + let v = T::rhs_into_delta(v); + + // INVARIANT: `self.0` is a valid `T` after `atomic_fetch_sub*()` due to safety requirement + // of `AtomicAdd`. + let ret = { + match Ordering::TYPE { + OrderingType::Full => T::Repr::atomic_fetch_sub(&self.0, v), + OrderingType::Acquire => T::Repr::atomic_fetch_sub_acquire(&self.0, v), + OrderingType::Release => T::Repr::atomic_fetch_sub_release(&self.0, v), + OrderingType::Relaxed => T::Repr::atomic_fetch_sub_relaxed(&self.0, v), + } + }; + + // SAFETY: `ret` comes from reading `self.0`, which is a valid `T` per type invariants. + unsafe { from_repr(ret) } + } } #[cfg(any(CONFIG_X86_64, CONFIG_UML, CONFIG_ARM, CONFIG_ARM64))] diff --git a/rust/kernel/sync/atomic/internal.rs b/rust/kernel/sync/atomic/internal.rs index e301db4eaf91..b762dbdf6d18 100644 --- a/rust/kernel/sync/atomic/internal.rs +++ b/rust/kernel/sync/atomic/internal.rs @@ -340,5 +340,10 @@ declare_and_impl_atomic_methods!( // SAFETY: `a.as_ptr()` is valid and properly aligned. unsafe { bindings::#call(v, a.as_ptr().cast()) } } + + fn fetch_sub[acquire, release, relaxed](a: &AtomicRepr, v: Self::Delta) -> Self { + // SAFETY: `a.as_ptr()` guarantees the returned pointer is valid and properly aligned. + unsafe { bindings::#call(v, a.as_ptr().cast()) } + } } ); -- cgit v1.2.3 From 0b864375d93d1509821def9c4b15f845d314a5d2 Mon Sep 17 00:00:00 2001 From: Andreas Hindborg Date: Tue, 3 Mar 2026 12:17:00 -0800 Subject: rust: sync: atomic: Update documentation for `fetch_add()` The documentation for `fetch_add()` does not indicate that the original value is returned by `fetch_add()`. Update the documentation so this is clear. Signed-off-by: Andreas Hindborg Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Alice Ryhl Link: https://patch.msgid.link/20260220-atomic-sub-v3-2-e63cbed1d2aa@kernel.org Link: https://patch.msgid.link/20260303201701.12204-13-boqun@kernel.org --- rust/kernel/sync/atomic.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/rust/kernel/sync/atomic.rs b/rust/kernel/sync/atomic.rs index 545a8d37ba78..9cd009d57e35 100644 --- a/rust/kernel/sync/atomic.rs +++ b/rust/kernel/sync/atomic.rs @@ -545,16 +545,14 @@ where /// use kernel::sync::atomic::{Atomic, Acquire, Full, Relaxed}; /// /// let x = Atomic::new(42); - /// /// assert_eq!(42, x.load(Relaxed)); - /// - /// assert_eq!(54, { x.fetch_add(12, Acquire); x.load(Relaxed) }); + /// assert_eq!(42, x.fetch_add(12, Acquire)); + /// assert_eq!(54, x.load(Relaxed)); /// /// let x = Atomic::new(42); - /// /// assert_eq!(42, x.load(Relaxed)); - /// - /// assert_eq!(54, { x.fetch_add(12, Full); x.load(Relaxed) } ); + /// assert_eq!(42, x.fetch_add(12, Full)); + /// assert_eq!(54, x.load(Relaxed)); /// ``` #[inline(always)] pub fn fetch_add(&self, v: Rhs, _: Ordering) -> T -- cgit v1.2.3 From b91d5d4bcf1266257a9e0199e1b4ad7fa8771baa Mon Sep 17 00:00:00 2001 From: Andreas Hindborg Date: Tue, 3 Mar 2026 12:17:01 -0800 Subject: rust: atomic: Update a safety comment in impl of `fetch_add()` The safety comment used in the implementation of `fetch_add()` could be read as just saying something it is true without justifying it. Update the safety comment to include justification. Suggested-by: Miguel Ojeda Signed-off-by: Andreas Hindborg Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Alice Ryhl Link: https://patch.msgid.link/20260220-atomic-sub-v3-3-e63cbed1d2aa@kernel.org Link: https://patch.msgid.link/20260303201701.12204-14-boqun@kernel.org --- rust/kernel/sync/atomic/internal.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/sync/atomic/internal.rs b/rust/kernel/sync/atomic/internal.rs index b762dbdf6d18..ad810c2172ec 100644 --- a/rust/kernel/sync/atomic/internal.rs +++ b/rust/kernel/sync/atomic/internal.rs @@ -337,7 +337,7 @@ declare_and_impl_atomic_methods!( /// Atomically updates `*a` to `(*a).wrapping_add(v)`, and returns the value of `*a` /// before the update. fn fetch_add[acquire, release, relaxed](a: &AtomicRepr, v: Self::Delta) -> Self { - // SAFETY: `a.as_ptr()` is valid and properly aligned. + // SAFETY: `a.as_ptr()` guarantees the returned pointer is valid and properly aligned. unsafe { bindings::#call(v, a.as_ptr().cast()) } } -- cgit v1.2.3 From 1ea4b473504b6dc6a0d21c298519aff2d52433c9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 5 Mar 2026 19:55:41 +0000 Subject: locking/rwsem: Remove the list_head from struct rw_semaphore Instead of embedding a list_head in struct rw_semaphore, store a pointer to the first waiter. The list of waiters remains a doubly linked list so we can efficiently add to the tail of the list, remove from the front (or middle) of the list. Some of the list manipulation becomes more complicated, but it's a reasonable tradeoff on the slow paths to shrink some core data structures like struct inode. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260305195545.3707590-2-willy@infradead.org --- include/linux/rwsem.h | 8 ++--- kernel/locking/rwsem.c | 90 ++++++++++++++++++++++++++++++++------------------ 2 files changed, 62 insertions(+), 36 deletions(-) diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 9bf1d93d3d7b..e7829531c4ba 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -57,7 +57,7 @@ context_lock_struct(rw_semaphore) { struct optimistic_spin_queue osq; /* spinner MCS lock */ #endif raw_spinlock_t wait_lock; - struct list_head wait_list; + struct rwsem_waiter *first_waiter; #ifdef CONFIG_DEBUG_RWSEMS void *magic; #endif @@ -106,7 +106,7 @@ static inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore * .owner = ATOMIC_LONG_INIT(0), \ __RWSEM_OPT_INIT(name) \ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),\ - .wait_list = LIST_HEAD_INIT((name).wait_list), \ + .first_waiter = NULL, \ __RWSEM_DEBUG_INIT(name) \ __RWSEM_DEP_MAP_INIT(name) } @@ -129,9 +129,9 @@ do { \ * rwsem to see if somebody from an incompatible type is wanting access to the * lock. */ -static inline int rwsem_is_contended(struct rw_semaphore *sem) +static inline bool rwsem_is_contended(struct rw_semaphore *sem) { - return !list_empty(&sem->wait_list); + return sem->first_waiter != NULL; } #if defined(CONFIG_DEBUG_RWSEMS) || defined(CONFIG_DETECT_HUNG_TASK_BLOCKER) diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 24df4d98f7d2..e66f37ebc6f6 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -72,7 +72,7 @@ #c, atomic_long_read(&(sem)->count), \ (unsigned long) sem->magic, \ atomic_long_read(&(sem)->owner), (long)current, \ - list_empty(&(sem)->wait_list) ? "" : "not ")) \ + (sem)->first_waiter ? "" : "not ")) \ debug_locks_off(); \ } while (0) #else @@ -321,7 +321,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, #endif atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE); raw_spin_lock_init(&sem->wait_lock); - INIT_LIST_HEAD(&sem->wait_list); + sem->first_waiter = NULL; atomic_long_set(&sem->owner, 0L); #ifdef CONFIG_RWSEM_SPIN_ON_OWNER osq_lock_init(&sem->osq); @@ -341,8 +341,6 @@ struct rwsem_waiter { unsigned long timeout; bool handoff_set; }; -#define rwsem_first_waiter(sem) \ - list_first_entry(&sem->wait_list, struct rwsem_waiter, list) enum rwsem_wake_type { RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */ @@ -365,12 +363,21 @@ enum rwsem_wake_type { */ #define MAX_READERS_WAKEUP 0x100 -static inline void -rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) +static inline +bool __rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) { - lockdep_assert_held(&sem->wait_lock); - list_add_tail(&waiter->list, &sem->wait_list); - /* caller will set RWSEM_FLAG_WAITERS */ + if (list_empty(&waiter->list)) { + sem->first_waiter = NULL; + return true; + } + + if (sem->first_waiter == waiter) { + sem->first_waiter = list_first_entry(&waiter->list, + struct rwsem_waiter, list); + } + list_del(&waiter->list); + + return false; } /* @@ -385,14 +392,23 @@ static inline bool rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) { lockdep_assert_held(&sem->wait_lock); - list_del(&waiter->list); - if (likely(!list_empty(&sem->wait_list))) + if (__rwsem_del_waiter(sem, waiter)) return true; - atomic_long_andnot(RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS, &sem->count); return false; } +static inline +struct rwsem_waiter *next_waiter(const struct rw_semaphore *sem, + const struct rwsem_waiter *waiter) +{ + struct rwsem_waiter *next = list_first_entry(&waiter->list, + struct rwsem_waiter, list); + if (next == sem->first_waiter) + return NULL; + return next; +} + /* * handle the lock release when processes blocked on it that can now run * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must @@ -411,7 +427,7 @@ static void rwsem_mark_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type, struct wake_q_head *wake_q) { - struct rwsem_waiter *waiter, *tmp; + struct rwsem_waiter *waiter, *next; long oldcount, woken = 0, adjustment = 0; struct list_head wlist; @@ -421,7 +437,7 @@ static void rwsem_mark_wake(struct rw_semaphore *sem, * Take a peek at the queue head waiter such that we can determine * the wakeup(s) to perform. */ - waiter = rwsem_first_waiter(sem); + waiter = sem->first_waiter; if (waiter->type == RWSEM_WAITING_FOR_WRITE) { if (wake_type == RWSEM_WAKE_ANY) { @@ -506,25 +522,28 @@ static void rwsem_mark_wake(struct rw_semaphore *sem, * put them into wake_q to be woken up later. */ INIT_LIST_HEAD(&wlist); - list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) { + do { + next = next_waiter(sem, waiter); if (waiter->type == RWSEM_WAITING_FOR_WRITE) continue; woken++; list_move_tail(&waiter->list, &wlist); + if (sem->first_waiter == waiter) + sem->first_waiter = next; /* * Limit # of readers that can be woken up per wakeup call. */ if (unlikely(woken >= MAX_READERS_WAKEUP)) break; - } + } while ((waiter = next) != NULL); adjustment = woken * RWSEM_READER_BIAS - adjustment; lockevent_cond_inc(rwsem_wake_reader, woken); oldcount = atomic_long_read(&sem->count); - if (list_empty(&sem->wait_list)) { + if (!sem->first_waiter) { /* * Combined with list_move_tail() above, this implies * rwsem_del_waiter(). @@ -545,7 +564,7 @@ static void rwsem_mark_wake(struct rw_semaphore *sem, atomic_long_add(adjustment, &sem->count); /* 2nd pass */ - list_for_each_entry_safe(waiter, tmp, &wlist, list) { + list_for_each_entry_safe(waiter, next, &wlist, list) { struct task_struct *tsk; tsk = waiter->task; @@ -577,7 +596,7 @@ rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter, struct wake_q_head *wake_q) __releases(&sem->wait_lock) { - bool first = rwsem_first_waiter(sem) == waiter; + bool first = sem->first_waiter == waiter; wake_q_init(wake_q); @@ -603,7 +622,7 @@ rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter, static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, struct rwsem_waiter *waiter) { - struct rwsem_waiter *first = rwsem_first_waiter(sem); + struct rwsem_waiter *first = sem->first_waiter; long count, new; lockdep_assert_held(&sem->wait_lock); @@ -639,7 +658,7 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, new |= RWSEM_WRITER_LOCKED; new &= ~RWSEM_FLAG_HANDOFF; - if (list_is_singular(&sem->wait_list)) + if (list_empty(&first->list)) new &= ~RWSEM_FLAG_WAITERS; } } while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new)); @@ -659,7 +678,8 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, * Have rwsem_try_write_lock() fully imply rwsem_del_waiter() on * success. */ - list_del(&waiter->list); + __rwsem_del_waiter(sem, waiter); + rwsem_set_owner(sem); return true; } @@ -994,7 +1014,7 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat { long adjustment = -RWSEM_READER_BIAS; long rcnt = (count >> RWSEM_READER_SHIFT); - struct rwsem_waiter waiter; + struct rwsem_waiter waiter, *first; DEFINE_WAKE_Q(wake_q); /* @@ -1019,7 +1039,7 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat */ if ((rcnt == 1) && (count & RWSEM_FLAG_WAITERS)) { raw_spin_lock_irq(&sem->wait_lock); - if (!list_empty(&sem->wait_list)) + if (sem->first_waiter) rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q); raw_spin_unlock_irq(&sem->wait_lock); @@ -1035,7 +1055,8 @@ queue: waiter.handoff_set = false; raw_spin_lock_irq(&sem->wait_lock); - if (list_empty(&sem->wait_list)) { + first = sem->first_waiter; + if (!first) { /* * In case the wait queue is empty and the lock isn't owned * by a writer, this reader can exit the slowpath and return @@ -1051,8 +1072,11 @@ queue: return sem; } adjustment += RWSEM_FLAG_WAITERS; + INIT_LIST_HEAD(&waiter.list); + sem->first_waiter = &waiter; + } else { + list_add_tail(&waiter.list, &first->list); } - rwsem_add_waiter(sem, &waiter); /* we're now waiting on the lock, but no longer actively locking */ count = atomic_long_add_return(adjustment, &sem->count); @@ -1110,7 +1134,7 @@ out_nolock: static struct rw_semaphore __sched * rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) { - struct rwsem_waiter waiter; + struct rwsem_waiter waiter, *first; DEFINE_WAKE_Q(wake_q); /* do optimistic spinning and steal lock if possible */ @@ -1129,10 +1153,10 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) waiter.handoff_set = false; raw_spin_lock_irq(&sem->wait_lock); - rwsem_add_waiter(sem, &waiter); - /* we're now waiting on the lock */ - if (rwsem_first_waiter(sem) != &waiter) { + first = sem->first_waiter; + if (first) { + list_add_tail(&waiter.list, &first->list); rwsem_cond_wake_waiter(sem, atomic_long_read(&sem->count), &wake_q); if (!wake_q_empty(&wake_q)) { @@ -1145,6 +1169,8 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) raw_spin_lock_irq(&sem->wait_lock); } } else { + INIT_LIST_HEAD(&waiter.list); + sem->first_waiter = &waiter; atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count); } @@ -1218,7 +1244,7 @@ static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) raw_spin_lock_irqsave(&sem->wait_lock, flags); - if (!list_empty(&sem->wait_list)) + if (sem->first_waiter) rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); @@ -1239,7 +1265,7 @@ static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) raw_spin_lock_irqsave(&sem->wait_lock, flags); - if (!list_empty(&sem->wait_list)) + if (sem->first_waiter) rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); -- cgit v1.2.3 From b9bdd4b6840454ef87f61b6506c9635c57a81650 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 5 Mar 2026 19:55:42 +0000 Subject: locking/semaphore: Remove the list_head from struct semaphore Instead of embedding a list_head in struct semaphore, store a pointer to the first waiter. The list of waiters remains a doubly linked list so we can efficiently add to the tail of the list and remove from the front (or middle) of the list. Some of the list manipulation becomes more complicated, but it's a reasonable tradeoff on the slow paths to shrink data structures which embed a semaphore. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260305195545.3707590-3-willy@infradead.org --- drivers/acpi/osl.c | 2 +- include/linux/semaphore.h | 4 ++-- kernel/locking/semaphore.c | 41 +++++++++++++++++++++++++++++++---------- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 5b777316b9ac..2af0db9210fe 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -1257,7 +1257,7 @@ acpi_status acpi_os_delete_semaphore(acpi_handle handle) ACPI_DEBUG_PRINT((ACPI_DB_MUTEX, "Deleting semaphore[%p].\n", handle)); - BUG_ON(!list_empty(&sem->wait_list)); + BUG_ON(sem->first_waiter); kfree(sem); sem = NULL; diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h index 89706157e622..a4c8651ef021 100644 --- a/include/linux/semaphore.h +++ b/include/linux/semaphore.h @@ -15,7 +15,7 @@ struct semaphore { raw_spinlock_t lock; unsigned int count; - struct list_head wait_list; + struct semaphore_waiter *first_waiter; #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER unsigned long last_holder; @@ -33,7 +33,7 @@ struct semaphore { { \ .lock = __RAW_SPIN_LOCK_UNLOCKED((name).lock), \ .count = n, \ - .wait_list = LIST_HEAD_INIT((name).wait_list) \ + .first_waiter = NULL \ __LAST_HOLDER_SEMAPHORE_INITIALIZER \ } diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c index 3ef032e22f7e..74d41433ba13 100644 --- a/kernel/locking/semaphore.c +++ b/kernel/locking/semaphore.c @@ -21,7 +21,7 @@ * too. * * The ->count variable represents how many more tasks can acquire this - * semaphore. If it's zero, there may be tasks waiting on the wait_list. + * semaphore. If it's zero, there may be waiters. */ #include @@ -226,7 +226,7 @@ void __sched up(struct semaphore *sem) hung_task_sem_clear_if_holder(sem); - if (likely(list_empty(&sem->wait_list))) + if (likely(!sem->first_waiter)) sem->count++; else __up(sem, &wake_q); @@ -244,6 +244,21 @@ struct semaphore_waiter { bool up; }; +static inline +void sem_del_waiter(struct semaphore *sem, struct semaphore_waiter *waiter) +{ + if (list_empty(&waiter->list)) { + sem->first_waiter = NULL; + return; + } + + if (sem->first_waiter == waiter) { + sem->first_waiter = list_first_entry(&waiter->list, + struct semaphore_waiter, list); + } + list_del(&waiter->list); +} + /* * Because this function is inlined, the 'state' parameter will be * constant, and thus optimised away by the compiler. Likewise the @@ -252,9 +267,15 @@ struct semaphore_waiter { static inline int __sched ___down_common(struct semaphore *sem, long state, long timeout) { - struct semaphore_waiter waiter; - - list_add_tail(&waiter.list, &sem->wait_list); + struct semaphore_waiter waiter, *first; + + first = sem->first_waiter; + if (first) { + list_add_tail(&waiter.list, &first->list); + } else { + INIT_LIST_HEAD(&waiter.list); + sem->first_waiter = &waiter; + } waiter.task = current; waiter.up = false; @@ -274,11 +295,11 @@ static inline int __sched ___down_common(struct semaphore *sem, long state, } timed_out: - list_del(&waiter.list); + sem_del_waiter(sem, &waiter); return -ETIME; interrupted: - list_del(&waiter.list); + sem_del_waiter(sem, &waiter); return -EINTR; } @@ -321,9 +342,9 @@ static noinline int __sched __down_timeout(struct semaphore *sem, long timeout) static noinline void __sched __up(struct semaphore *sem, struct wake_q_head *wake_q) { - struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list, - struct semaphore_waiter, list); - list_del(&waiter->list); + struct semaphore_waiter *waiter = sem->first_waiter; + + sem_del_waiter(sem, waiter); waiter->up = true; wake_q_add(wake_q, waiter->task); } -- cgit v1.2.3 From 25500ba7e77ce9d3d9b5a1929d41a2ee2e23f6fe Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 5 Mar 2026 19:55:43 +0000 Subject: locking/mutex: Remove the list_head from struct mutex Instead of embedding a list_head in struct mutex, store a pointer to the first waiter. The list of waiters remains a doubly linked list so we can efficiently add to the tail of the list, remove from the front (or middle) of the list. Some of the list manipulation becomes more complicated, but it's a reasonable tradeoff on the slow paths to shrink data structures which embed a mutex like struct file. Some of the debug checks have to be deleted because there's no equivalent to checking them in the new scheme (eg an empty waiter->list now means that it is the only waiter, not that the waiter is no longer on the list). Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260305195545.3707590-4-willy@infradead.org --- include/linux/mutex.h | 2 +- include/linux/mutex_types.h | 2 +- kernel/locking/mutex-debug.c | 5 +---- kernel/locking/mutex.c | 49 ++++++++++++++++++++++++-------------------- kernel/locking/ww_mutex.h | 25 +++++++--------------- 5 files changed, 37 insertions(+), 46 deletions(-) diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 2f648ee204e7..c471b129f703 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -79,7 +79,7 @@ do { \ #define __MUTEX_INITIALIZER(lockname) \ { .owner = ATOMIC_LONG_INIT(0) \ , .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ - , .wait_list = LIST_HEAD_INIT(lockname.wait_list) \ + , .first_waiter = NULL \ __DEBUG_MUTEX_INITIALIZER(lockname) \ __DEP_MAP_MUTEX_INITIALIZER(lockname) } diff --git a/include/linux/mutex_types.h b/include/linux/mutex_types.h index 80975935ec48..a8f119f81177 100644 --- a/include/linux/mutex_types.h +++ b/include/linux/mutex_types.h @@ -44,7 +44,7 @@ context_lock_struct(mutex) { #ifdef CONFIG_MUTEX_SPIN_ON_OWNER struct optimistic_spin_queue osq; /* Spinner MCS lock */ #endif - struct list_head wait_list; + struct mutex_waiter *first_waiter; #ifdef CONFIG_DEBUG_MUTEXES void *magic; #endif diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index 2c6b02d4699b..94930d506bcf 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -37,9 +37,8 @@ void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter) { lockdep_assert_held(&lock->wait_lock); - DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list)); + DEBUG_LOCKS_WARN_ON(!lock->first_waiter); DEBUG_LOCKS_WARN_ON(waiter->magic != waiter); - DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); } void debug_mutex_free_waiter(struct mutex_waiter *waiter) @@ -62,7 +61,6 @@ void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, { struct mutex *blocked_on = __get_task_blocked_on(task); - DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); DEBUG_LOCKS_WARN_ON(waiter->task != task); DEBUG_LOCKS_WARN_ON(blocked_on && blocked_on != lock); @@ -74,7 +72,6 @@ void debug_mutex_unlock(struct mutex *lock) { if (likely(debug_locks)) { DEBUG_LOCKS_WARN_ON(lock->magic != lock); - DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); } } diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index c867f6c15530..95f1822122a1 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -47,7 +47,7 @@ static void __mutex_init_generic(struct mutex *lock) { atomic_long_set(&lock->owner, 0); raw_spin_lock_init(&lock->wait_lock); - INIT_LIST_HEAD(&lock->wait_list); + lock->first_waiter = NULL; #ifdef CONFIG_MUTEX_SPIN_ON_OWNER osq_lock_init(&lock->osq); #endif @@ -194,33 +194,42 @@ static inline void __mutex_clear_flag(struct mutex *lock, unsigned long flag) atomic_long_andnot(flag, &lock->owner); } -static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_waiter *waiter) -{ - return list_first_entry(&lock->wait_list, struct mutex_waiter, list) == waiter; -} - /* * Add @waiter to a given location in the lock wait_list and set the * FLAG_WAITERS flag if it's the first waiter. */ static void __mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, - struct list_head *list) + struct mutex_waiter *first) { hung_task_set_blocker(lock, BLOCKER_TYPE_MUTEX); debug_mutex_add_waiter(lock, waiter, current); - list_add_tail(&waiter->list, list); - if (__mutex_waiter_is_first(lock, waiter)) + if (!first) + first = lock->first_waiter; + + if (first) { + list_add_tail(&waiter->list, &first->list); + } else { + INIT_LIST_HEAD(&waiter->list); + lock->first_waiter = waiter; __mutex_set_flag(lock, MUTEX_FLAG_WAITERS); + } } static void __mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter) { - list_del(&waiter->list); - if (likely(list_empty(&lock->wait_list))) + if (list_empty(&waiter->list)) { __mutex_clear_flag(lock, MUTEX_FLAGS); + lock->first_waiter = NULL; + } else { + if (lock->first_waiter == waiter) { + lock->first_waiter = list_first_entry(&waiter->list, + struct mutex_waiter, list); + } + list_del(&waiter->list); + } debug_mutex_remove_waiter(lock, waiter, current); hung_task_clear_blocker(); @@ -340,7 +349,7 @@ bool ww_mutex_spin_on_owner(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, * Similarly, stop spinning if we are no longer the * first waiter. */ - if (waiter && !__mutex_waiter_is_first(lock, waiter)) + if (waiter && lock->first_waiter != waiter) return false; return true; @@ -645,7 +654,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas if (!use_ww_ctx) { /* add waiting tasks to the end of the waitqueue (FIFO): */ - __mutex_add_waiter(lock, &waiter, &lock->wait_list); + __mutex_add_waiter(lock, &waiter, NULL); } else { /* * Add in stamp order, waking up waiters that must kill @@ -691,7 +700,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas schedule_preempt_disabled(); - first = __mutex_waiter_is_first(lock, &waiter); + first = lock->first_waiter == &waiter; /* * As we likely have been woken up by task @@ -734,8 +743,7 @@ acquired: * Wound-Wait; we stole the lock (!first_waiter), check the * waiters as anyone might want to wound us. */ - if (!ww_ctx->is_wait_die && - !__mutex_waiter_is_first(lock, &waiter)) + if (!ww_ctx->is_wait_die && lock->first_waiter != &waiter) __ww_mutex_check_waiters(lock, ww_ctx, &wake_q); } @@ -931,6 +939,7 @@ EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible); static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip) { struct task_struct *next = NULL; + struct mutex_waiter *waiter; DEFINE_WAKE_Q(wake_q); unsigned long owner; unsigned long flags; @@ -962,12 +971,8 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne raw_spin_lock_irqsave(&lock->wait_lock, flags); debug_mutex_unlock(lock); - if (!list_empty(&lock->wait_list)) { - /* get the first entry from the wait-list: */ - struct mutex_waiter *waiter = - list_first_entry(&lock->wait_list, - struct mutex_waiter, list); - + waiter = lock->first_waiter; + if (waiter) { next = waiter->task; debug_mutex_wake_waiter(lock, waiter); diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h index 31a785afee6c..a0847e91ae04 100644 --- a/kernel/locking/ww_mutex.h +++ b/kernel/locking/ww_mutex.h @@ -8,20 +8,14 @@ static inline struct mutex_waiter * __ww_waiter_first(struct mutex *lock) { - struct mutex_waiter *w; - - w = list_first_entry(&lock->wait_list, struct mutex_waiter, list); - if (list_entry_is_head(w, &lock->wait_list, list)) - return NULL; - - return w; + return lock->first_waiter; } static inline struct mutex_waiter * __ww_waiter_next(struct mutex *lock, struct mutex_waiter *w) { w = list_next_entry(w, list); - if (list_entry_is_head(w, &lock->wait_list, list)) + if (lock->first_waiter == w) return NULL; return w; @@ -31,7 +25,7 @@ static inline struct mutex_waiter * __ww_waiter_prev(struct mutex *lock, struct mutex_waiter *w) { w = list_prev_entry(w, list); - if (list_entry_is_head(w, &lock->wait_list, list)) + if (lock->first_waiter == w) return NULL; return w; @@ -40,22 +34,17 @@ __ww_waiter_prev(struct mutex *lock, struct mutex_waiter *w) static inline struct mutex_waiter * __ww_waiter_last(struct mutex *lock) { - struct mutex_waiter *w; - - w = list_last_entry(&lock->wait_list, struct mutex_waiter, list); - if (list_entry_is_head(w, &lock->wait_list, list)) - return NULL; + struct mutex_waiter *w = lock->first_waiter; + if (w) + w = list_prev_entry(w, list); return w; } static inline void __ww_waiter_add(struct mutex *lock, struct mutex_waiter *waiter, struct mutex_waiter *pos) { - struct list_head *p = &lock->wait_list; - if (pos) - p = &pos->list; - __mutex_add_waiter(lock, waiter, p); + __mutex_add_waiter(lock, waiter, pos); } static inline struct task_struct * -- cgit v1.2.3 From 07574b8ebaac7927e2355b4f343b03b50e04494c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 20 Jan 2026 13:40:30 +0100 Subject: compiler-context-analysys: Add __cond_releases() Useful for things like unlock fastpaths, which on success release the lock. Suggested-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Marco Elver Link: https://patch.msgid.link/20260121111213.634625032@infradead.org --- include/linux/compiler-context-analysis.h | 32 +++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/include/linux/compiler-context-analysis.h b/include/linux/compiler-context-analysis.h index 00c074a2ccb0..a9317571e6af 100644 --- a/include/linux/compiler-context-analysis.h +++ b/include/linux/compiler-context-analysis.h @@ -320,6 +320,38 @@ static inline void _context_unsafe_alias(void **p) { } */ #define __releases(...) __releases_ctx_lock(__VA_ARGS__) +/* + * Clang's analysis does not care precisely about the value, only that it is + * either zero or non-zero. So the __cond_acquires() interface might be + * misleading if we say that @ret is the value returned if acquired. Instead, + * provide symbolic variants which we translate. + */ +#define __cond_acquires_impl_not_true(x, ...) __try_acquires##__VA_ARGS__##_ctx_lock(0, x) +#define __cond_acquires_impl_not_false(x, ...) __try_acquires##__VA_ARGS__##_ctx_lock(1, x) +#define __cond_acquires_impl_not_nonzero(x, ...) __try_acquires##__VA_ARGS__##_ctx_lock(0, x) +#define __cond_acquires_impl_not_0(x, ...) __try_acquires##__VA_ARGS__##_ctx_lock(1, x) +#define __cond_acquires_impl_not_nonnull(x, ...) __try_acquires##__VA_ARGS__##_ctx_lock(0, x) +#define __cond_acquires_impl_not_NULL(x, ...) __try_acquires##__VA_ARGS__##_ctx_lock(1, x) + +/** + * __cond_releases() - function attribute, function conditionally + * releases a context lock exclusively + * @ret: abstract value returned by function if context lock releases + * @x: context lock instance pointer + * + * Function attribute declaring that the function conditionally releases the + * given context lock instance @x exclusively. The associated context(s) must + * be active on entry. The function return value @ret denotes when the context + * lock is released. + * + * @ret may be one of: true, false, nonzero, 0, nonnull, NULL. + * + * NOTE: clang does not have a native attribute for this; instead implement + * it as an unconditional release and a conditional acquire for the + * inverted condition -- which is semantically equivalent. + */ +#define __cond_releases(ret, x) __releases(x) __cond_acquires_impl_not_##ret(x) + /** * __acquire() - function to acquire context lock exclusively * @x: context lock instance pointer -- cgit v1.2.3 From 5c4326231cde36fd5e90c41e403df9fac6238f4b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 20 Jan 2026 10:06:08 +0100 Subject: locking/mutex: Add context analysis Add compiler context analysis annotations. Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260121111213.745353747@infradead.org --- include/linux/mutex.h | 2 +- include/linux/mutex_types.h | 2 +- kernel/locking/Makefile | 2 ++ kernel/locking/mutex.c | 33 ++++++++++++++++++++++++++++----- kernel/locking/mutex.h | 1 + kernel/locking/ww_mutex.h | 12 ++++++++++++ 6 files changed, 45 insertions(+), 7 deletions(-) diff --git a/include/linux/mutex.h b/include/linux/mutex.h index c471b129f703..734048c02f4f 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -183,7 +183,7 @@ static inline int __must_check __devm_mutex_init(struct device *dev, struct mute */ #ifdef CONFIG_DEBUG_LOCK_ALLOC extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass) __acquires(lock); -extern void _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); +extern void _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock) __acquires(lock); extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) __cond_acquires(0, lock); extern int __must_check _mutex_lock_killable(struct mutex *lock, diff --git a/include/linux/mutex_types.h b/include/linux/mutex_types.h index a8f119f81177..24ed599fdda8 100644 --- a/include/linux/mutex_types.h +++ b/include/linux/mutex_types.h @@ -44,7 +44,7 @@ context_lock_struct(mutex) { #ifdef CONFIG_MUTEX_SPIN_ON_OWNER struct optimistic_spin_queue osq; /* Spinner MCS lock */ #endif - struct mutex_waiter *first_waiter; + struct mutex_waiter *first_waiter __guarded_by(&wait_lock); #ifdef CONFIG_DEBUG_MUTEXES void *magic; #endif diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index a114949eeed5..264447d606a6 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -3,6 +3,8 @@ # and is generally not a function of system call inputs. KCOV_INSTRUMENT := n +CONTEXT_ANALYSIS_mutex.o := y + obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o # Avoid recursion lockdep -> sanitizer -> ... -> lockdep & improve performance. diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 95f1822122a1..427187ff02db 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -46,8 +46,9 @@ static void __mutex_init_generic(struct mutex *lock) { atomic_long_set(&lock->owner, 0); - raw_spin_lock_init(&lock->wait_lock); - lock->first_waiter = NULL; + scoped_guard (raw_spinlock_init, &lock->wait_lock) { + lock->first_waiter = NULL; + } #ifdef CONFIG_MUTEX_SPIN_ON_OWNER osq_lock_init(&lock->osq); #endif @@ -150,6 +151,7 @@ EXPORT_SYMBOL(mutex_init_generic); * follow with a __mutex_trylock() before failing. */ static __always_inline bool __mutex_trylock_fast(struct mutex *lock) + __cond_acquires(true, lock) { unsigned long curr = (unsigned long)current; unsigned long zero = 0UL; @@ -163,6 +165,7 @@ static __always_inline bool __mutex_trylock_fast(struct mutex *lock) } static __always_inline bool __mutex_unlock_fast(struct mutex *lock) + __cond_releases(true, lock) { unsigned long curr = (unsigned long)current; @@ -201,6 +204,7 @@ static inline void __mutex_clear_flag(struct mutex *lock, unsigned long flag) static void __mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, struct mutex_waiter *first) + __must_hold(&lock->wait_lock) { hung_task_set_blocker(lock, BLOCKER_TYPE_MUTEX); debug_mutex_add_waiter(lock, waiter, current); @@ -219,6 +223,7 @@ __mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, static void __mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter) + __must_hold(&lock->wait_lock) { if (list_empty(&waiter->list)) { __mutex_clear_flag(lock, MUTEX_FLAGS); @@ -268,7 +273,8 @@ static void __mutex_handoff(struct mutex *lock, struct task_struct *task) * We also put the fastpath first in the kernel image, to make sure the * branch is predicted by the CPU as default-untaken. */ -static void __sched __mutex_lock_slowpath(struct mutex *lock); +static void __sched __mutex_lock_slowpath(struct mutex *lock) + __acquires(lock); /** * mutex_lock - acquire the mutex @@ -349,7 +355,7 @@ bool ww_mutex_spin_on_owner(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, * Similarly, stop spinning if we are no longer the * first waiter. */ - if (waiter && lock->first_waiter != waiter) + if (waiter && data_race(lock->first_waiter != waiter)) return false; return true; @@ -534,7 +540,8 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, } #endif -static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip); +static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip) + __releases(lock); /** * mutex_unlock - release the mutex @@ -574,6 +581,7 @@ EXPORT_SYMBOL(mutex_unlock); * of a unlocked mutex is not allowed. */ void __sched ww_mutex_unlock(struct ww_mutex *lock) + __no_context_analysis { __ww_mutex_unlock(lock); mutex_unlock(&lock->base); @@ -587,6 +595,7 @@ static __always_inline int __sched __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclass, struct lockdep_map *nest_lock, unsigned long ip, struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) + __cond_acquires(0, lock) { DEFINE_WAKE_Q(wake_q); struct mutex_waiter waiter; @@ -780,6 +789,7 @@ err_early_kill: static int __sched __mutex_lock(struct mutex *lock, unsigned int state, unsigned int subclass, struct lockdep_map *nest_lock, unsigned long ip) + __cond_acquires(0, lock) { return __mutex_lock_common(lock, state, subclass, nest_lock, ip, NULL, false); } @@ -787,6 +797,7 @@ __mutex_lock(struct mutex *lock, unsigned int state, unsigned int subclass, static int __sched __ww_mutex_lock(struct mutex *lock, unsigned int state, unsigned int subclass, unsigned long ip, struct ww_acquire_ctx *ww_ctx) + __cond_acquires(0, lock) { return __mutex_lock_common(lock, state, subclass, NULL, ip, ww_ctx, true); } @@ -834,6 +845,7 @@ void __sched mutex_lock_nested(struct mutex *lock, unsigned int subclass) { __mutex_lock(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_); + __acquire(lock); } EXPORT_SYMBOL_GPL(mutex_lock_nested); @@ -842,6 +854,7 @@ void __sched _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) { __mutex_lock(lock, TASK_UNINTERRUPTIBLE, 0, nest, _RET_IP_); + __acquire(lock); } EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock); @@ -870,12 +883,14 @@ mutex_lock_io_nested(struct mutex *lock, unsigned int subclass) token = io_schedule_prepare(); __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_, NULL, 0); + __acquire(lock); io_schedule_finish(token); } EXPORT_SYMBOL_GPL(mutex_lock_io_nested); static inline int ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) + __cond_releases(nonzero, lock) { #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH unsigned tmp; @@ -937,6 +952,7 @@ EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible); * Release the lock, slowpath: */ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip) + __releases(lock) { struct task_struct *next = NULL; struct mutex_waiter *waiter; @@ -945,6 +961,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne unsigned long flags; mutex_release(&lock->dep_map, ip); + __release(lock); /* * Release the lock before (potentially) taking the spinlock such that @@ -1066,24 +1083,29 @@ EXPORT_SYMBOL_GPL(mutex_lock_io); static noinline void __sched __mutex_lock_slowpath(struct mutex *lock) + __acquires(lock) { __mutex_lock(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_); + __acquire(lock); } static noinline int __sched __mutex_lock_killable_slowpath(struct mutex *lock) + __cond_acquires(0, lock) { return __mutex_lock(lock, TASK_KILLABLE, 0, NULL, _RET_IP_); } static noinline int __sched __mutex_lock_interruptible_slowpath(struct mutex *lock) + __cond_acquires(0, lock) { return __mutex_lock(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_); } static noinline int __sched __ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) + __cond_acquires(0, lock) { return __ww_mutex_lock(&lock->base, TASK_UNINTERRUPTIBLE, 0, _RET_IP_, ctx); @@ -1092,6 +1114,7 @@ __ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) static noinline int __sched __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) + __cond_acquires(0, lock) { return __ww_mutex_lock(&lock->base, TASK_INTERRUPTIBLE, 0, _RET_IP_, ctx); diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h index 9ad4da8cea00..b94ef40c1f48 100644 --- a/kernel/locking/mutex.h +++ b/kernel/locking/mutex.h @@ -7,6 +7,7 @@ * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar */ #ifndef CONFIG_PREEMPT_RT +#include /* * This is the control structure for tasks blocked on mutex, which resides * on the blocked task's kernel stack: diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h index a0847e91ae04..c50ea5dd3c44 100644 --- a/kernel/locking/ww_mutex.h +++ b/kernel/locking/ww_mutex.h @@ -7,12 +7,14 @@ static inline struct mutex_waiter * __ww_waiter_first(struct mutex *lock) + __must_hold(&lock->wait_lock) { return lock->first_waiter; } static inline struct mutex_waiter * __ww_waiter_next(struct mutex *lock, struct mutex_waiter *w) + __must_hold(&lock->wait_lock) { w = list_next_entry(w, list); if (lock->first_waiter == w) @@ -23,6 +25,7 @@ __ww_waiter_next(struct mutex *lock, struct mutex_waiter *w) static inline struct mutex_waiter * __ww_waiter_prev(struct mutex *lock, struct mutex_waiter *w) + __must_hold(&lock->wait_lock) { w = list_prev_entry(w, list); if (lock->first_waiter == w) @@ -33,6 +36,7 @@ __ww_waiter_prev(struct mutex *lock, struct mutex_waiter *w) static inline struct mutex_waiter * __ww_waiter_last(struct mutex *lock) + __must_hold(&lock->wait_lock) { struct mutex_waiter *w = lock->first_waiter; @@ -43,6 +47,7 @@ __ww_waiter_last(struct mutex *lock) static inline void __ww_waiter_add(struct mutex *lock, struct mutex_waiter *waiter, struct mutex_waiter *pos) + __must_hold(&lock->wait_lock) { __mutex_add_waiter(lock, waiter, pos); } @@ -60,16 +65,19 @@ __ww_mutex_has_waiters(struct mutex *lock) } static inline void lock_wait_lock(struct mutex *lock, unsigned long *flags) + __acquires(&lock->wait_lock) { raw_spin_lock_irqsave(&lock->wait_lock, *flags); } static inline void unlock_wait_lock(struct mutex *lock, unsigned long *flags) + __releases(&lock->wait_lock) { raw_spin_unlock_irqrestore(&lock->wait_lock, *flags); } static inline void lockdep_assert_wait_lock_held(struct mutex *lock) + __must_hold(&lock->wait_lock) { lockdep_assert_held(&lock->wait_lock); } @@ -296,6 +304,7 @@ static bool __ww_mutex_wound(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx, struct ww_acquire_ctx *hold_ctx, struct wake_q_head *wake_q) + __must_hold(&lock->wait_lock) { struct task_struct *owner = __ww_mutex_owner(lock); @@ -360,6 +369,7 @@ static bool __ww_mutex_wound(struct MUTEX *lock, static void __ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx, struct wake_q_head *wake_q) + __must_hold(&lock->wait_lock) { struct MUTEX_WAITER *cur; @@ -453,6 +463,7 @@ __ww_mutex_kill(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx) static inline int __ww_mutex_check_kill(struct MUTEX *lock, struct MUTEX_WAITER *waiter, struct ww_acquire_ctx *ctx) + __must_hold(&lock->wait_lock) { struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx); @@ -503,6 +514,7 @@ __ww_mutex_add_waiter(struct MUTEX_WAITER *waiter, struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx, struct wake_q_head *wake_q) + __must_hold(&lock->wait_lock) { struct MUTEX_WAITER *cur, *pos = NULL; bool is_wait_die; -- cgit v1.2.3 From 90bb681dcdf7e69c90b56a18f06c0389a0810b92 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 20 Jan 2026 18:17:50 +0100 Subject: locking/rtmutex: Add context analysis Add compiler context analysis annotations. Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260121111213.851599178@infradead.org --- include/linux/rtmutex.h | 8 ++++---- kernel/locking/Makefile | 2 ++ kernel/locking/rtmutex.c | 18 +++++++++++++++++- kernel/locking/rtmutex_api.c | 2 ++ kernel/locking/rtmutex_common.h | 27 +++++++++++++++++++-------- kernel/locking/ww_mutex.h | 20 +++++++++++++++----- kernel/locking/ww_rt_mutex.c | 1 + scripts/context-analysis-suppression.txt | 1 + 8 files changed, 61 insertions(+), 18 deletions(-) diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index ede4c6bf6f22..78e7e588817c 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -22,8 +22,8 @@ extern int max_lock_depth; struct rt_mutex_base { raw_spinlock_t wait_lock; - struct rb_root_cached waiters; - struct task_struct *owner; + struct rb_root_cached waiters __guarded_by(&wait_lock); + struct task_struct *owner __guarded_by(&wait_lock); }; #define __RT_MUTEX_BASE_INITIALIZER(rtbasename) \ @@ -41,7 +41,7 @@ struct rt_mutex_base { */ static inline bool rt_mutex_base_is_locked(struct rt_mutex_base *lock) { - return READ_ONCE(lock->owner) != NULL; + return data_race(READ_ONCE(lock->owner) != NULL); } #ifdef CONFIG_RT_MUTEXES @@ -49,7 +49,7 @@ static inline bool rt_mutex_base_is_locked(struct rt_mutex_base *lock) static inline struct task_struct *rt_mutex_owner(struct rt_mutex_base *lock) { - unsigned long owner = (unsigned long) READ_ONCE(lock->owner); + unsigned long owner = (unsigned long) data_race(READ_ONCE(lock->owner)); return (struct task_struct *) (owner & ~RT_MUTEX_HAS_WAITERS); } diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index 264447d606a6..0c07de79388c 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -4,6 +4,8 @@ KCOV_INSTRUMENT := n CONTEXT_ANALYSIS_mutex.o := y +CONTEXT_ANALYSIS_rtmutex_api.o := y +CONTEXT_ANALYSIS_ww_rt_mutex.o := y obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index c80902eacd79..ccaba6148b61 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -94,6 +94,7 @@ static inline int __ww_mutex_check_kill(struct rt_mutex *lock, static __always_inline struct task_struct * rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner) + __must_hold(&lock->wait_lock) { unsigned long val = (unsigned long)owner; @@ -105,6 +106,7 @@ rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner) static __always_inline void rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner) + __must_hold(&lock->wait_lock) { /* * lock->wait_lock is held but explicit acquire semantics are needed @@ -114,12 +116,14 @@ rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner) } static __always_inline void rt_mutex_clear_owner(struct rt_mutex_base *lock) + __must_hold(&lock->wait_lock) { /* lock->wait_lock is held so the unlock provides release semantics. */ WRITE_ONCE(lock->owner, rt_mutex_owner_encode(lock, NULL)); } static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock) + __must_hold(&lock->wait_lock) { lock->owner = (struct task_struct *) ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS); @@ -127,6 +131,7 @@ static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock) static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock, bool acquire_lock) + __must_hold(&lock->wait_lock) { unsigned long owner, *p = (unsigned long *) &lock->owner; @@ -328,6 +333,7 @@ static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, } static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock) + __must_hold(&lock->wait_lock) { lock->owner = (struct task_struct *) ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); @@ -1206,6 +1212,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, struct ww_acquire_ctx *ww_ctx, enum rtmutex_chainwalk chwalk, struct wake_q_head *wake_q) + __must_hold(&lock->wait_lock) { struct task_struct *owner = rt_mutex_owner(lock); struct rt_mutex_waiter *top_waiter = waiter; @@ -1249,6 +1256,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, /* Check whether the waiter should back out immediately */ rtm = container_of(lock, struct rt_mutex, rtmutex); + __assume_ctx_lock(&rtm->rtmutex.wait_lock); res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx, wake_q); if (res) { raw_spin_lock(&task->pi_lock); @@ -1356,6 +1364,7 @@ static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh, } static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock) + __must_hold(&lock->wait_lock) { int ret = try_to_take_rt_mutex(lock, current, NULL); @@ -1505,7 +1514,7 @@ static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, * - the VCPU on which owner runs is preempted */ if (!owner_on_cpu(owner) || need_resched() || - !rt_mutex_waiter_is_top_waiter(lock, waiter)) { + !data_race(rt_mutex_waiter_is_top_waiter(lock, waiter))) { res = false; break; } @@ -1538,6 +1547,7 @@ static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, */ static void __sched remove_waiter(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) + __must_hold(&lock->wait_lock) { bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); struct task_struct *owner = rt_mutex_owner(lock); @@ -1613,6 +1623,8 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, struct task_struct *owner; int ret = 0; + __assume_ctx_lock(&rtm->rtmutex.wait_lock); + lockevent_inc(rtmutex_slow_block); for (;;) { /* Try to acquire the lock: */ @@ -1658,6 +1670,7 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, struct rt_mutex_base *lock, struct rt_mutex_waiter *w) + __must_hold(&lock->wait_lock) { /* * If the result is not -EDEADLOCK or the caller requested @@ -1694,11 +1707,13 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, enum rtmutex_chainwalk chwalk, struct rt_mutex_waiter *waiter, struct wake_q_head *wake_q) + __must_hold(&lock->wait_lock) { struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); struct ww_mutex *ww = ww_container_of(rtm); int ret; + __assume_ctx_lock(&rtm->rtmutex.wait_lock); lockdep_assert_held(&lock->wait_lock); lockevent_inc(rtmutex_slowlock); @@ -1750,6 +1765,7 @@ static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock, struct ww_acquire_ctx *ww_ctx, unsigned int state, struct wake_q_head *wake_q) + __must_hold(&lock->wait_lock) { struct rt_mutex_waiter waiter; int ret; diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c index 59dbd29cb219..124219aea46e 100644 --- a/kernel/locking/rtmutex_api.c +++ b/kernel/locking/rtmutex_api.c @@ -526,6 +526,7 @@ static __always_inline int __mutex_lock_common(struct mutex *lock, unsigned int subclass, struct lockdep_map *nest_lock, unsigned long ip) + __acquires(lock) __no_context_analysis { int ret; @@ -647,6 +648,7 @@ EXPORT_SYMBOL(mutex_trylock); #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ void __sched mutex_unlock(struct mutex *lock) + __releases(lock) __no_context_analysis { mutex_release(&lock->dep_map, _RET_IP_); __rt_mutex_unlock(&lock->rtmutex); diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index cf6ddd1b23a2..c38b7bdea7b3 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -79,12 +79,18 @@ struct rt_wake_q_head { * PI-futex support (proxy locking functions, etc.): */ extern void rt_mutex_init_proxy_locked(struct rt_mutex_base *lock, - struct task_struct *proxy_owner); -extern void rt_mutex_proxy_unlock(struct rt_mutex_base *lock); + struct task_struct *proxy_owner) + __must_hold(&lock->wait_lock); + +extern void rt_mutex_proxy_unlock(struct rt_mutex_base *lock) + __must_hold(&lock->wait_lock); + extern int __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter, struct task_struct *task, - struct wake_q_head *); + struct wake_q_head *) + __must_hold(&lock->wait_lock); + extern int rt_mutex_start_proxy_lock(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter, struct task_struct *task); @@ -94,8 +100,9 @@ extern int rt_mutex_wait_proxy_lock(struct rt_mutex_base *lock, extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter); -extern int rt_mutex_futex_trylock(struct rt_mutex_base *l); -extern int __rt_mutex_futex_trylock(struct rt_mutex_base *l); +extern int rt_mutex_futex_trylock(struct rt_mutex_base *lock); +extern int __rt_mutex_futex_trylock(struct rt_mutex_base *lock) + __must_hold(&lock->wait_lock); extern void rt_mutex_futex_unlock(struct rt_mutex_base *lock); extern bool __rt_mutex_futex_unlock(struct rt_mutex_base *lock, @@ -109,6 +116,7 @@ extern void rt_mutex_postunlock(struct rt_wake_q_head *wqh); */ #ifdef CONFIG_RT_MUTEXES static inline int rt_mutex_has_waiters(struct rt_mutex_base *lock) + __must_hold(&lock->wait_lock) { return !RB_EMPTY_ROOT(&lock->waiters.rb_root); } @@ -120,6 +128,7 @@ static inline int rt_mutex_has_waiters(struct rt_mutex_base *lock) */ static inline bool rt_mutex_waiter_is_top_waiter(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) + __must_hold(&lock->wait_lock) { struct rb_node *leftmost = rb_first_cached(&lock->waiters); @@ -127,6 +136,7 @@ static inline bool rt_mutex_waiter_is_top_waiter(struct rt_mutex_base *lock, } static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *lock) + __must_hold(&lock->wait_lock) { struct rb_node *leftmost = rb_first_cached(&lock->waiters); struct rt_mutex_waiter *w = NULL; @@ -170,9 +180,10 @@ enum rtmutex_chainwalk { static inline void __rt_mutex_base_init(struct rt_mutex_base *lock) { - raw_spin_lock_init(&lock->wait_lock); - lock->waiters = RB_ROOT_CACHED; - lock->owner = NULL; + scoped_guard (raw_spinlock_init, &lock->wait_lock) { + lock->waiters = RB_ROOT_CACHED; + lock->owner = NULL; + } } /* Debug functions */ diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h index c50ea5dd3c44..b1834ab7e782 100644 --- a/kernel/locking/ww_mutex.h +++ b/kernel/locking/ww_mutex.h @@ -4,6 +4,7 @@ #define MUTEX mutex #define MUTEX_WAITER mutex_waiter +#define WAIT_LOCK wait_lock static inline struct mutex_waiter * __ww_waiter_first(struct mutex *lock) @@ -86,9 +87,11 @@ static inline void lockdep_assert_wait_lock_held(struct mutex *lock) #define MUTEX rt_mutex #define MUTEX_WAITER rt_mutex_waiter +#define WAIT_LOCK rtmutex.wait_lock static inline struct rt_mutex_waiter * __ww_waiter_first(struct rt_mutex *lock) + __must_hold(&lock->rtmutex.wait_lock) { struct rb_node *n = rb_first(&lock->rtmutex.waiters.rb_root); if (!n) @@ -116,6 +119,7 @@ __ww_waiter_prev(struct rt_mutex *lock, struct rt_mutex_waiter *w) static inline struct rt_mutex_waiter * __ww_waiter_last(struct rt_mutex *lock) + __must_hold(&lock->rtmutex.wait_lock) { struct rb_node *n = rb_last(&lock->rtmutex.waiters.rb_root); if (!n) @@ -137,21 +141,25 @@ __ww_mutex_owner(struct rt_mutex *lock) static inline bool __ww_mutex_has_waiters(struct rt_mutex *lock) + __must_hold(&lock->rtmutex.wait_lock) { return rt_mutex_has_waiters(&lock->rtmutex); } static inline void lock_wait_lock(struct rt_mutex *lock, unsigned long *flags) + __acquires(&lock->rtmutex.wait_lock) { raw_spin_lock_irqsave(&lock->rtmutex.wait_lock, *flags); } static inline void unlock_wait_lock(struct rt_mutex *lock, unsigned long *flags) + __releases(&lock->rtmutex.wait_lock) { raw_spin_unlock_irqrestore(&lock->rtmutex.wait_lock, *flags); } static inline void lockdep_assert_wait_lock_held(struct rt_mutex *lock) + __must_hold(&lock->rtmutex.wait_lock) { lockdep_assert_held(&lock->rtmutex.wait_lock); } @@ -304,7 +312,7 @@ static bool __ww_mutex_wound(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx, struct ww_acquire_ctx *hold_ctx, struct wake_q_head *wake_q) - __must_hold(&lock->wait_lock) + __must_hold(&lock->WAIT_LOCK) { struct task_struct *owner = __ww_mutex_owner(lock); @@ -369,7 +377,7 @@ static bool __ww_mutex_wound(struct MUTEX *lock, static void __ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx, struct wake_q_head *wake_q) - __must_hold(&lock->wait_lock) + __must_hold(&lock->WAIT_LOCK) { struct MUTEX_WAITER *cur; @@ -396,6 +404,7 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { DEFINE_WAKE_Q(wake_q); unsigned long flags; + bool has_waiters; ww_mutex_lock_acquired(lock, ctx); @@ -417,7 +426,8 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) * __ww_mutex_add_waiter() and makes sure we either observe ww->ctx * and/or !empty list. */ - if (likely(!__ww_mutex_has_waiters(&lock->base))) + has_waiters = data_race(__ww_mutex_has_waiters(&lock->base)); + if (likely(!has_waiters)) return; /* @@ -463,7 +473,7 @@ __ww_mutex_kill(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx) static inline int __ww_mutex_check_kill(struct MUTEX *lock, struct MUTEX_WAITER *waiter, struct ww_acquire_ctx *ctx) - __must_hold(&lock->wait_lock) + __must_hold(&lock->WAIT_LOCK) { struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx); @@ -514,7 +524,7 @@ __ww_mutex_add_waiter(struct MUTEX_WAITER *waiter, struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx, struct wake_q_head *wake_q) - __must_hold(&lock->wait_lock) + __must_hold(&lock->WAIT_LOCK) { struct MUTEX_WAITER *cur, *pos = NULL; bool is_wait_die; diff --git a/kernel/locking/ww_rt_mutex.c b/kernel/locking/ww_rt_mutex.c index c7196de838ed..e07fb3b96bc3 100644 --- a/kernel/locking/ww_rt_mutex.c +++ b/kernel/locking/ww_rt_mutex.c @@ -90,6 +90,7 @@ ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) EXPORT_SYMBOL(ww_mutex_lock_interruptible); void __sched ww_mutex_unlock(struct ww_mutex *lock) + __no_context_analysis { struct rt_mutex *rtm = &lock->base; diff --git a/scripts/context-analysis-suppression.txt b/scripts/context-analysis-suppression.txt index fd8951d06706..1c51b6153f08 100644 --- a/scripts/context-analysis-suppression.txt +++ b/scripts/context-analysis-suppression.txt @@ -24,6 +24,7 @@ src:*include/linux/mutex*.h=emit src:*include/linux/rcupdate.h=emit src:*include/linux/refcount.h=emit src:*include/linux/rhashtable.h=emit +src:*include/linux/rtmutex*.h=emit src:*include/linux/rwlock*.h=emit src:*include/linux/rwsem.h=emit src:*include/linux/sched*=emit -- cgit v1.2.3 From 739690915ce1f017223ef4e6f3cc966ccfa3c861 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 6 Mar 2026 10:43:56 +0100 Subject: locking/rwsem: Add context analysis Add compiler context analysis annotations. Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260306101417.GT1282955@noisy.programming.kicks-ass.net --- include/linux/rwsem.h | 4 ++-- kernel/locking/Makefile | 1 + kernel/locking/rwbase_rt.c | 1 + kernel/locking/rwsem.c | 27 ++++++++++++++++++++++++--- 4 files changed, 28 insertions(+), 5 deletions(-) diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index e7829531c4ba..6a1a7bae5f81 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -57,7 +57,7 @@ context_lock_struct(rw_semaphore) { struct optimistic_spin_queue osq; /* spinner MCS lock */ #endif raw_spinlock_t wait_lock; - struct rwsem_waiter *first_waiter; + struct rwsem_waiter *first_waiter __guarded_by(&wait_lock); #ifdef CONFIG_DEBUG_RWSEMS void *magic; #endif @@ -131,7 +131,7 @@ do { \ */ static inline bool rwsem_is_contended(struct rw_semaphore *sem) { - return sem->first_waiter != NULL; + return data_race(sem->first_waiter != NULL); } #if defined(CONFIG_DEBUG_RWSEMS) || defined(CONFIG_DETECT_HUNG_TASK_BLOCKER) diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index 0c07de79388c..cee1901d4cff 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -6,6 +6,7 @@ KCOV_INSTRUMENT := n CONTEXT_ANALYSIS_mutex.o := y CONTEXT_ANALYSIS_rtmutex_api.o := y CONTEXT_ANALYSIS_ww_rt_mutex.o := y +CONTEXT_ANALYSIS_rwsem.o := y obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c index 9f4322c07486..82e078c0665a 100644 --- a/kernel/locking/rwbase_rt.c +++ b/kernel/locking/rwbase_rt.c @@ -186,6 +186,7 @@ static __always_inline void rwbase_read_unlock(struct rwbase_rt *rwb, static inline void __rwbase_write_unlock(struct rwbase_rt *rwb, int bias, unsigned long flags) + __releases(&rwb->rtmutex.wait_lock) { struct rt_mutex_base *rtm = &rwb->rtmutex; diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index e66f37ebc6f6..ba4cb74de064 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -72,7 +72,7 @@ #c, atomic_long_read(&(sem)->count), \ (unsigned long) sem->magic, \ atomic_long_read(&(sem)->owner), (long)current, \ - (sem)->first_waiter ? "" : "not ")) \ + rwsem_is_contended(sem) ? "" : "not ")) \ debug_locks_off(); \ } while (0) #else @@ -320,9 +320,10 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, sem->magic = sem; #endif atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE); - raw_spin_lock_init(&sem->wait_lock); - sem->first_waiter = NULL; atomic_long_set(&sem->owner, 0L); + scoped_guard (raw_spinlock_init, &sem->wait_lock) { + sem->first_waiter = NULL; + } #ifdef CONFIG_RWSEM_SPIN_ON_OWNER osq_lock_init(&sem->osq); #endif @@ -365,6 +366,7 @@ enum rwsem_wake_type { static inline bool __rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) + __must_hold(&sem->wait_lock) { if (list_empty(&waiter->list)) { sem->first_waiter = NULL; @@ -401,6 +403,7 @@ rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) static inline struct rwsem_waiter *next_waiter(const struct rw_semaphore *sem, const struct rwsem_waiter *waiter) + __must_hold(&sem->wait_lock) { struct rwsem_waiter *next = list_first_entry(&waiter->list, struct rwsem_waiter, list); @@ -621,6 +624,7 @@ rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter, */ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, struct rwsem_waiter *waiter) + __must_hold(&sem->wait_lock) { struct rwsem_waiter *first = sem->first_waiter; long count, new; @@ -1558,6 +1562,7 @@ static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem) * lock for reading */ void __sched down_read(struct rw_semaphore *sem) + __no_context_analysis { might_sleep(); rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); @@ -1567,6 +1572,7 @@ void __sched down_read(struct rw_semaphore *sem) EXPORT_SYMBOL(down_read); int __sched down_read_interruptible(struct rw_semaphore *sem) + __no_context_analysis { might_sleep(); rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); @@ -1581,6 +1587,7 @@ int __sched down_read_interruptible(struct rw_semaphore *sem) EXPORT_SYMBOL(down_read_interruptible); int __sched down_read_killable(struct rw_semaphore *sem) + __no_context_analysis { might_sleep(); rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); @@ -1598,6 +1605,7 @@ EXPORT_SYMBOL(down_read_killable); * trylock for reading -- returns 1 if successful, 0 if contention */ int down_read_trylock(struct rw_semaphore *sem) + __no_context_analysis { int ret = __down_read_trylock(sem); @@ -1611,6 +1619,7 @@ EXPORT_SYMBOL(down_read_trylock); * lock for writing */ void __sched down_write(struct rw_semaphore *sem) + __no_context_analysis { might_sleep(); rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); @@ -1622,6 +1631,7 @@ EXPORT_SYMBOL(down_write); * lock for writing */ int __sched down_write_killable(struct rw_semaphore *sem) + __no_context_analysis { might_sleep(); rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); @@ -1640,6 +1650,7 @@ EXPORT_SYMBOL(down_write_killable); * trylock for writing -- returns 1 if successful, 0 if contention */ int down_write_trylock(struct rw_semaphore *sem) + __no_context_analysis { int ret = __down_write_trylock(sem); @@ -1654,6 +1665,7 @@ EXPORT_SYMBOL(down_write_trylock); * release a read lock */ void up_read(struct rw_semaphore *sem) + __no_context_analysis { rwsem_release(&sem->dep_map, _RET_IP_); __up_read(sem); @@ -1664,6 +1676,7 @@ EXPORT_SYMBOL(up_read); * release a write lock */ void up_write(struct rw_semaphore *sem) + __no_context_analysis { rwsem_release(&sem->dep_map, _RET_IP_); __up_write(sem); @@ -1674,6 +1687,7 @@ EXPORT_SYMBOL(up_write); * downgrade write lock to read lock */ void downgrade_write(struct rw_semaphore *sem) + __no_context_analysis { lock_downgrade(&sem->dep_map, _RET_IP_); __downgrade_write(sem); @@ -1683,6 +1697,7 @@ EXPORT_SYMBOL(downgrade_write); #ifdef CONFIG_DEBUG_LOCK_ALLOC void down_read_nested(struct rw_semaphore *sem, int subclass) + __no_context_analysis { might_sleep(); rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); @@ -1691,6 +1706,7 @@ void down_read_nested(struct rw_semaphore *sem, int subclass) EXPORT_SYMBOL(down_read_nested); int down_read_killable_nested(struct rw_semaphore *sem, int subclass) + __no_context_analysis { might_sleep(); rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); @@ -1705,6 +1721,7 @@ int down_read_killable_nested(struct rw_semaphore *sem, int subclass) EXPORT_SYMBOL(down_read_killable_nested); void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest) + __no_context_analysis { might_sleep(); rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); @@ -1713,6 +1730,7 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest) EXPORT_SYMBOL(_down_write_nest_lock); void down_read_non_owner(struct rw_semaphore *sem) + __no_context_analysis { might_sleep(); __down_read(sem); @@ -1727,6 +1745,7 @@ void down_read_non_owner(struct rw_semaphore *sem) EXPORT_SYMBOL(down_read_non_owner); void down_write_nested(struct rw_semaphore *sem, int subclass) + __no_context_analysis { might_sleep(); rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); @@ -1735,6 +1754,7 @@ void down_write_nested(struct rw_semaphore *sem, int subclass) EXPORT_SYMBOL(down_write_nested); int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass) + __no_context_analysis { might_sleep(); rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); @@ -1750,6 +1770,7 @@ int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass) EXPORT_SYMBOL(down_write_killable_nested); void up_read_non_owner(struct rw_semaphore *sem) + __no_context_analysis { DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem); __up_read(sem); -- cgit v1.2.3 From 68bcd8b6e0b10d902f7fc8bf3f08f335f5d1640e Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 14 Mar 2026 18:26:07 +0000 Subject: locking/rwsem: Fix logic error in rwsem_del_waiter() Commit 1ea4b473504b ("locking/rwsem: Remove the list_head from struct rw_semaphore") introduced a logic error in rwsem_del_waiter(). The root cause of this issue is an inconsistency in the return values of __rwsem_del_waiter() and rwsem_del_waiter(). Specifically, __rwsem_del_waiter() returns true when the wait list becomes empty, whereas rwsem_del_waiter() is supposed to return true if the wait list is NOT empty. This caused a null pointer dereference in rwsem_mark_wake() because it was being called when sem->first_waiter was NULL. Fixes: 1ea4b473504b ("locking/rwsem: Remove the list_head from struct rw_semaphore") Reported-by: syzbot+3d2ff92c67127d337463@syzkaller.appspotmail.com Signed-off-by: Andrei Vagin Signed-off-by: Peter Zijlstra (Intel) Tested-by: syzbot+3d2ff92c67127d337463@syzkaller.appspotmail.com Link: https://patch.msgid.link/20260314182607.3343346-1-avagin@google.com --- kernel/locking/rwsem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index ba4cb74de064..bf647097369c 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -370,7 +370,7 @@ bool __rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) { if (list_empty(&waiter->list)) { sem->first_waiter = NULL; - return true; + return false; } if (sem->first_waiter == waiter) { @@ -379,7 +379,7 @@ bool __rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) } list_del(&waiter->list); - return false; + return true; } /* -- cgit v1.2.3 From 16df04446e34a1e7dba57f657af6ad5f51199763 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Jan 2026 12:08:28 +0100 Subject: futex: Convert to compiler context analysis Convert the sparse annotations over to the new compiler context analysis stuff. Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260121111213.950376128@infradead.org --- kernel/futex/Makefile | 2 ++ kernel/futex/core.c | 9 ++++++--- kernel/futex/futex.h | 17 ++++++++++++++--- kernel/futex/pi.c | 9 +++++++++ kernel/futex/waitwake.c | 4 ++++ 5 files changed, 35 insertions(+), 6 deletions(-) diff --git a/kernel/futex/Makefile b/kernel/futex/Makefile index b77188d1fa07..dce70f8a322b 100644 --- a/kernel/futex/Makefile +++ b/kernel/futex/Makefile @@ -1,3 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 +CONTEXT_ANALYSIS := y + obj-y += core.o syscalls.o pi.o requeue.o waitwake.o diff --git a/kernel/futex/core.c b/kernel/futex/core.c index cf7e610eac42..4bacf5565368 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -864,7 +864,6 @@ void __futex_unqueue(struct futex_q *q) /* The key must be already stored in q->key. */ void futex_q_lock(struct futex_q *q, struct futex_hash_bucket *hb) - __acquires(&hb->lock) { /* * Increment the counter before taking the lock so that @@ -879,10 +878,10 @@ void futex_q_lock(struct futex_q *q, struct futex_hash_bucket *hb) q->lock_ptr = &hb->lock; spin_lock(&hb->lock); + __acquire(q->lock_ptr); } void futex_q_unlock(struct futex_hash_bucket *hb) - __releases(&hb->lock) { futex_hb_waiters_dec(hb); spin_unlock(&hb->lock); @@ -1443,12 +1442,15 @@ static void futex_cleanup(struct task_struct *tsk) void futex_exit_recursive(struct task_struct *tsk) { /* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */ - if (tsk->futex_state == FUTEX_STATE_EXITING) + if (tsk->futex_state == FUTEX_STATE_EXITING) { + __assume_ctx_lock(&tsk->futex_exit_mutex); mutex_unlock(&tsk->futex_exit_mutex); + } tsk->futex_state = FUTEX_STATE_DEAD; } static void futex_cleanup_begin(struct task_struct *tsk) + __acquires(&tsk->futex_exit_mutex) { /* * Prevent various race issues against a concurrent incoming waiter @@ -1475,6 +1477,7 @@ static void futex_cleanup_begin(struct task_struct *tsk) } static void futex_cleanup_end(struct task_struct *tsk, int state) + __releases(&tsk->futex_exit_mutex) { /* * Lockless store. The only side effect is that an observer might diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index 30c2afa03889..9f6bf6f585fc 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -217,7 +217,7 @@ enum futex_access { extern int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key, enum futex_access rw); -extern void futex_q_lockptr_lock(struct futex_q *q); +extern void futex_q_lockptr_lock(struct futex_q *q) __acquires(q->lock_ptr); extern struct hrtimer_sleeper * futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, int flags, u64 range_ns); @@ -311,9 +311,11 @@ extern int futex_unqueue(struct futex_q *q); static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb, struct task_struct *task) __releases(&hb->lock) + __releases(q->lock_ptr) { __futex_queue(q, hb, task); spin_unlock(&hb->lock); + __release(q->lock_ptr); } extern void futex_unqueue_pi(struct futex_q *q); @@ -358,9 +360,12 @@ static inline int futex_hb_waiters_pending(struct futex_hash_bucket *hb) #endif } -extern void futex_q_lock(struct futex_q *q, struct futex_hash_bucket *hb); -extern void futex_q_unlock(struct futex_hash_bucket *hb); +extern void futex_q_lock(struct futex_q *q, struct futex_hash_bucket *hb) + __acquires(&hb->lock) + __acquires(q->lock_ptr); +extern void futex_q_unlock(struct futex_hash_bucket *hb) + __releases(&hb->lock); extern int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, union futex_key *key, @@ -379,6 +384,9 @@ extern int fixup_pi_owner(u32 __user *uaddr, struct futex_q *q, int locked); */ static inline void double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) + __acquires(&hb1->lock) + __acquires(&hb2->lock) + __no_context_analysis { if (hb1 > hb2) swap(hb1, hb2); @@ -390,6 +398,9 @@ double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) static inline void double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2) + __releases(&hb1->lock) + __releases(&hb2->lock) + __no_context_analysis { spin_unlock(&hb1->lock); if (hb1 != hb2) diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c index bc1f7e83a37e..49ab5f40e86c 100644 --- a/kernel/futex/pi.c +++ b/kernel/futex/pi.c @@ -389,6 +389,7 @@ static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key, * Initialize the pi_mutex in locked state and make @p * the owner of it: */ + __assume_ctx_lock(&pi_state->pi_mutex.wait_lock); rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p); /* Store the key for possible exit cleanups: */ @@ -614,6 +615,8 @@ int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state, struct rt_mutex_waiter *top_waiter) + __must_hold(&pi_state->pi_mutex.wait_lock) + __releases(&pi_state->pi_mutex.wait_lock) { struct task_struct *new_owner; bool postunlock = false; @@ -670,6 +673,8 @@ out_unlock: static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, struct task_struct *argowner) + __must_hold(&q->pi_state->pi_mutex.wait_lock) + __must_hold(q->lock_ptr) { struct futex_pi_state *pi_state = q->pi_state; struct task_struct *oldowner, *newowner; @@ -966,6 +971,7 @@ retry_private: * - EAGAIN: The user space value changed. */ futex_q_unlock(hb); + __release(q.lock_ptr); /* * Handle the case where the owner is in the middle of * exiting. Wait for the exit to complete otherwise @@ -1090,6 +1096,7 @@ no_block: if (res) ret = (res < 0) ? res : 0; + __release(&hb->lock); futex_unqueue_pi(&q); spin_unlock(q.lock_ptr); if (q.drop_hb_ref) { @@ -1101,10 +1108,12 @@ no_block: out_unlock_put_key: futex_q_unlock(hb); + __release(q.lock_ptr); goto out; uaddr_faulted: futex_q_unlock(hb); + __release(q.lock_ptr); ret = fault_in_user_writeable(uaddr); if (ret) diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c index 1c2dd03f11ec..ceed9d879059 100644 --- a/kernel/futex/waitwake.c +++ b/kernel/futex/waitwake.c @@ -462,6 +462,7 @@ retry: } futex_q_unlock(hb); + __release(q->lock_ptr); } __set_current_state(TASK_RUNNING); @@ -628,6 +629,7 @@ retry_private: if (ret) { futex_q_unlock(hb); + __release(q->lock_ptr); ret = get_user(uval, uaddr); if (ret) @@ -641,11 +643,13 @@ retry_private: if (uval != val) { futex_q_unlock(hb); + __release(q->lock_ptr); return -EWOULDBLOCK; } if (key2 && futex_match(&q->key, key2)) { futex_q_unlock(hb); + __release(q->lock_ptr); return -EINVAL; } -- cgit v1.2.3 From 428c56525bf5dbc3bd5e30014df1f5213f8bd7c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 13 Mar 2026 09:22:18 +0100 Subject: jump_label: use ATOMIC_INIT() for initialization of .enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently ATOMIC_INIT() is not used because in the past that macro was provided by linux/atomic.h which is not usable from linux/jump_label.h. However since commit 7ca8cf5347f7 ("locking/atomic: Move ATOMIC_INIT into linux/types.h") the macro only requires linux/types.h. Remove the now unnecessary workaround and the associated assertions. Signed-off-by: Thomas Weißschuh Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260313-jump_label-cleanup-v2-1-35d3c0bde549@linutronix.de --- include/linux/jump_label.h | 11 ++--------- kernel/jump_label.c | 9 --------- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index fdb79dd1ebd8..e494b360d36d 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -238,18 +238,11 @@ extern void static_key_enable_cpuslocked(struct static_key *key); extern void static_key_disable_cpuslocked(struct static_key *key); extern enum jump_label_type jump_label_init_type(struct jump_entry *entry); -/* - * We should be using ATOMIC_INIT() for initializing .enabled, but - * the inclusion of atomic.h is problematic for inclusion of jump_label.h - * in 'low-level' headers. Thus, we are initializing .enabled with a - * raw value, but have added a BUILD_BUG_ON() to catch any issues in - * jump_label_init() see: kernel/jump_label.c. - */ #define STATIC_KEY_INIT_TRUE \ - { .enabled = { 1 }, \ + { .enabled = ATOMIC_INIT(1), \ { .type = JUMP_TYPE_TRUE } } #define STATIC_KEY_INIT_FALSE \ - { .enabled = { 0 }, \ + { .enabled = ATOMIC_INIT(0), \ { .type = JUMP_TYPE_FALSE } } #else /* !CONFIG_JUMP_LABEL */ diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 7cb19e601426..e851e4b37d0e 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -529,15 +529,6 @@ void __init jump_label_init(void) struct static_key *key = NULL; struct jump_entry *iter; - /* - * Since we are initializing the static_key.enabled field with - * with the 'raw' int values (to avoid pulling in atomic.h) in - * jump_label.h, let's make sure that is safe. There are only two - * cases to check since we initialize to 0 or 1. - */ - BUILD_BUG_ON((int)ATOMIC_INIT(0) != 0); - BUILD_BUG_ON((int)ATOMIC_INIT(1) != 1); - if (static_key_initialized) return; -- cgit v1.2.3 From acb38872d4cbec5b6825345d9d757e21d2d9d953 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 13 Mar 2026 09:22:19 +0100 Subject: jump_label: remove workaround for old compilers in initializations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The extra braces for the initialization of the anonymous union members were added in commit cd8d860dcce9 ("jump_label: Fix anonymous union initialization") to compensate for limitations in gcc < 4.6. Versions of gcc this old are not supported anymore, so drop the workaround. Signed-off-by: Thomas Weißschuh Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260313-jump_label-cleanup-v2-2-35d3c0bde549@linutronix.de --- include/linux/jump_label.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index e494b360d36d..b9c7b0ebf7b9 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -87,13 +87,6 @@ struct static_key { atomic_t enabled; #ifdef CONFIG_JUMP_LABEL /* - * Note: - * To make anonymous unions work with old compilers, the static - * initialization of them requires brackets. This creates a dependency - * on the order of the struct with the initializers. If any fields - * are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need - * to be modified. - * * bit 0 => 1 if key is initially true * 0 if initially false * bit 1 => 1 if points to struct static_key_mod @@ -240,10 +233,10 @@ extern enum jump_label_type jump_label_init_type(struct jump_entry *entry); #define STATIC_KEY_INIT_TRUE \ { .enabled = ATOMIC_INIT(1), \ - { .type = JUMP_TYPE_TRUE } } + .type = JUMP_TYPE_TRUE } #define STATIC_KEY_INIT_FALSE \ { .enabled = ATOMIC_INIT(0), \ - { .type = JUMP_TYPE_FALSE } } + .type = JUMP_TYPE_FALSE } #else /* !CONFIG_JUMP_LABEL */ -- cgit v1.2.3 From 2deccd5c862a0337a691bcfaa87919b4216e6103 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 9 Mar 2026 17:40:42 +0100 Subject: cleanup: Optimize guards Andrew reported that a guard() conversion of zone_lock increased the code size unnecessarily. It turns out the unconditional __GUARD_IS_ERR() is to blame. As explored earlier [1], __GUARD_IS_ERR(), similar to IS_ERR_OR_NULL(), generates somewhat sub-optimal code. However, looking at things again, it is possible to avoid doing the __GUARD_IS_ERR() unconditionally. Revert the normal destructors to a simple NULL test and only add the IS_ERR bit to COND guards. This cures the reported overhead; as compiled by GCC-16: page_alloc.o: pre: Total: Before=45299, After=45371, chg +0.16% post: Total: Before=45299, After=45026, chg -0.60% [1] https://lkml.kernel.org/r/20250513085001.GC25891@noisy.programming.kicks-ass.net Reported-by: Andrew Morton Signed-off-by: Peter Zijlstra (Intel) Tested-by: Dan Williams Link: https://patch.msgid.link/20260309164516.GE606826@noisy.programming.kicks-ass.net --- include/linux/cleanup.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index dbc4162921e9..ea95ca4bc11c 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -286,15 +286,18 @@ static __always_inline _type class_##_name##_constructor(_init_args) \ __no_context_analysis \ { _type t = _init; return t; } -#define EXTEND_CLASS(_name, ext, _init, _init_args...) \ -typedef lock_##_name##_t lock_##_name##ext##_t; \ +#define EXTEND_CLASS_COND(_name, ext, _cond, _init, _init_args...) \ +typedef lock_##_name##_t lock_##_name##ext##_t; \ typedef class_##_name##_t class_##_name##ext##_t; \ -static __always_inline void class_##_name##ext##_destructor(class_##_name##_t *p) \ -{ class_##_name##_destructor(p); } \ +static __always_inline void class_##_name##ext##_destructor(class_##_name##_t *_T) \ +{ if (_cond) return; class_##_name##_destructor(_T); } \ static __always_inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ __no_context_analysis \ { class_##_name##_t t = _init; return t; } +#define EXTEND_CLASS(_name, ext, _init, _init_args...) \ + EXTEND_CLASS_COND(_name, ext, 0, _init, _init_args) + #define CLASS(_name, var) \ class_##_name##_t var __cleanup(class_##_name##_destructor) = \ class_##_name##_constructor @@ -394,12 +397,12 @@ static __maybe_unused const bool class_##_name##_is_conditional = _is_cond __DEFINE_GUARD_LOCK_PTR(_name, _T) #define DEFINE_GUARD(_name, _type, _lock, _unlock) \ - DEFINE_CLASS(_name, _type, if (!__GUARD_IS_ERR(_T)) { _unlock; }, ({ _lock; _T; }), _type _T); \ + DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \ DEFINE_CLASS_IS_GUARD(_name) #define DEFINE_GUARD_COND_4(_name, _ext, _lock, _cond) \ __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ - EXTEND_CLASS(_name, _ext, \ + EXTEND_CLASS_COND(_name, _ext, __GUARD_IS_ERR(*_T), \ ({ void *_t = _T; int _RET = (_lock); if (_T && !(_cond)) _t = ERR_PTR(_RET); _t; }), \ class_##_name##_t _T) \ static __always_inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ @@ -488,7 +491,7 @@ typedef struct { \ static __always_inline void class_##_name##_destructor(class_##_name##_t *_T) \ __no_context_analysis \ { \ - if (!__GUARD_IS_ERR(_T->lock)) { _unlock; } \ + if (_T->lock) { _unlock; } \ } \ \ __DEFINE_GUARD_LOCK_PTR(_name, &_T->lock) @@ -565,7 +568,7 @@ __DEFINE_LOCK_GUARD_0(_name, _lock) #define DEFINE_LOCK_GUARD_1_COND_4(_name, _ext, _lock, _cond) \ __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \ - EXTEND_CLASS(_name, _ext, \ + EXTEND_CLASS_COND(_name, _ext, __GUARD_IS_ERR(_T->lock), \ ({ class_##_name##_t _t = { .lock = l }, *_T = &_t;\ int _RET = (_lock); \ if (_T->lock && !(_cond)) _T->lock = ERR_PTR(_RET);\ -- cgit v1.2.3 From 891626973b2faf468565a253ca55373e0b9675de Mon Sep 17 00:00:00 2001 From: Mikhail Gavrilov Date: Fri, 13 Mar 2026 22:10:02 +0500 Subject: lockdep: Raise default stack trace limits when KASAN is enabled KASAN-enabled kernels with LOCKDEP and PREEMPT_FULL hit "BUG: MAX_STACK_TRACE_ENTRIES too low!" within 9-23 hours of normal desktop use. The root cause is a feedback loop between KASAN slab tracking and lockdep: every KASAN-tracked slab allocation saves a stack trace via stack_trace_save() -> arch_stack_walk(). The unwinder calls is_bpf_text_address(), which under PREEMPT_FULL can trigger RCU deferred quiescent-state processing -> swake_up_one() -> lock_acquire() -> lockdep validate_chain() -> save_trace(). This means KASAN's own stack captures indirectly generate new lockdep dependency chains, consuming the buffer from both directions. /proc/lockdep_stats at the moment of overflow confirms that stack-trace entries is the sole exhausted resource: stack-trace entries: 524288 [max: 524288] <- 100% full number of stack traces: 22080 <- unique after dedup dependency chains: 164665 [max: 524288] <- only 31% used direct dependencies: 45270 [max: 65536] <- 69% lock-classes: 2811 [max: 8192] <- 34% 22080 genuinely unique traces averaging ~24 frames each fill the buffer in under a day. The hash-based deduplication (12593b7467f9) is working correctly -- the traces are simply all different due to the deep and varied call stacks from GPU + filesystem + Wine/Proton + KASAN instrumentation. Raise the LOCKDEP_STACK_TRACE_BITS default from 19 to 21 when KASAN is enabled (2M entries, +12MB). This is negligible compared to KASAN's own shadow memory overhead (~12.5% of total RAM). Scale LOCKDEP_STACK_TRACE_HASH_BITS accordingly to maintain dedup efficiency. Signed-off-by: Mikhail Gavrilov Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260313171118.1702954-2-mikhail.v.gavrilov@gmail.com --- lib/Kconfig.debug | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 4e2dfbbd3d78..e51e3c5a6538 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1617,14 +1617,22 @@ config LOCKDEP_STACK_TRACE_BITS int "Size for MAX_STACK_TRACE_ENTRIES (as Nth power of 2)" depends on LOCKDEP && !LOCKDEP_SMALL range 10 26 + default 21 if KASAN default 19 help Try increasing this value if you hit "BUG: MAX_STACK_TRACE_ENTRIES too low!" message. + KASAN significantly increases stack trace consumption because its + slab tracking interacts with lockdep's dependency validation under + PREEMPT_FULL, creating a feedback loop. The higher default when + KASAN is enabled costs ~12MB extra, which is negligible compared to + KASAN's own shadow memory overhead. + config LOCKDEP_STACK_TRACE_HASH_BITS int "Size for STACK_TRACE_HASH_SIZE (as Nth power of 2)" depends on LOCKDEP && !LOCKDEP_SMALL range 10 26 + default 16 if KASAN default 14 help Try increasing this value if you need large STACK_TRACE_HASH_SIZE. -- cgit v1.2.3 From 756a0e011cfca0b45a48464aa25b05d9a9c2fb0b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 13 Mar 2026 10:15:07 -0700 Subject: locking: Fix rwlock support in Architecture support for rwlocks must be available whether or not CONFIG_DEBUG_SPINLOCK has been defined. Move the definitions of the arch_{read,write}_{lock,trylock,unlock}() macros such that these become visbile if CONFIG_DEBUG_SPINLOCK=n. This patch prepares for converting do_raw_{read,write}_trylock() into inline functions. Without this patch that conversion triggers a build failure for UP architectures, e.g. arm-ep93xx. I used the following kernel configuration to build the kernel for that architecture: CONFIG_ARCH_MULTIPLATFORM=y CONFIG_ARCH_MULTI_V7=n CONFIG_ATAGS=y CONFIG_MMU=y CONFIG_ARCH_MULTI_V4T=y CONFIG_CPU_LITTLE_ENDIAN=y CONFIG_ARCH_EP93XX=y Fixes: fb1c8f93d869 ("[PATCH] spinlock consolidation") Signed-off-by: Bart Van Assche Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260313171510.230998-2-bvanassche@acm.org --- include/linux/spinlock_up.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index 1e84e71ca495..3a50976471d7 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -48,16 +48,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) lock->slock = 1; } -/* - * Read-write spinlocks. No debug version. - */ -#define arch_read_lock(lock) do { barrier(); (void)(lock); } while (0) -#define arch_write_lock(lock) do { barrier(); (void)(lock); } while (0) -#define arch_read_trylock(lock) ({ barrier(); (void)(lock); 1; }) -#define arch_write_trylock(lock) ({ barrier(); (void)(lock); 1; }) -#define arch_read_unlock(lock) do { barrier(); (void)(lock); } while (0) -#define arch_write_unlock(lock) do { barrier(); (void)(lock); } while (0) - #else /* DEBUG_SPINLOCK */ #define arch_spin_is_locked(lock) ((void)(lock), 0) /* for sched/core.c and kernel_lock.c: */ @@ -68,4 +58,14 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) #define arch_spin_is_contended(lock) (((void)(lock), 0)) +/* + * Read-write spinlocks. No debug version. + */ +#define arch_read_lock(lock) do { barrier(); (void)(lock); } while (0) +#define arch_write_lock(lock) do { barrier(); (void)(lock); } while (0) +#define arch_read_trylock(lock) ({ barrier(); (void)(lock); 1; }) +#define arch_write_trylock(lock) ({ barrier(); (void)(lock); 1; }) +#define arch_read_unlock(lock) do { barrier(); (void)(lock); } while (0) +#define arch_write_unlock(lock) do { barrier(); (void)(lock); } while (0) + #endif /* __LINUX_SPINLOCK_UP_H */ -- cgit v1.2.3 From c4d3b8c77d85082d32250c505beb1d9e46ee47ee Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 13 Mar 2026 10:15:08 -0700 Subject: locking: Add lock context support in do_raw_{read,write}_trylock() Convert do_raw_{read,write}_trylock() from macros into inline functions and annotate these inline functions with __cond_acquires_shared() or __cond_acquires() as appropriate. This change is necessary to build kernel drivers or subsystems that use rwlock synchronization objects with lock context analysis enabled. The return type 'int' matches the return type for CONFIG_DEBUG_SPINLOCK=y. Signed-off-by: Bart Van Assche Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260313171510.230998-3-bvanassche@acm.org --- include/linux/rwlock.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h index 21ceefc4a49f..4e67cd934d8f 100644 --- a/include/linux/rwlock.h +++ b/include/linux/rwlock.h @@ -37,10 +37,20 @@ extern int do_raw_write_trylock(rwlock_t *lock) __cond_acquires(true, lock); extern void do_raw_write_unlock(rwlock_t *lock) __releases(lock); #else # define do_raw_read_lock(rwlock) do {__acquire_shared(lock); arch_read_lock(&(rwlock)->raw_lock); } while (0) -# define do_raw_read_trylock(rwlock) arch_read_trylock(&(rwlock)->raw_lock) +static inline int do_raw_read_trylock(rwlock_t *rwlock) + __cond_acquires_shared(true, rwlock) + __no_context_analysis +{ + return arch_read_trylock(&(rwlock)->raw_lock); +} # define do_raw_read_unlock(rwlock) do {arch_read_unlock(&(rwlock)->raw_lock); __release_shared(lock); } while (0) # define do_raw_write_lock(rwlock) do {__acquire(lock); arch_write_lock(&(rwlock)->raw_lock); } while (0) -# define do_raw_write_trylock(rwlock) arch_write_trylock(&(rwlock)->raw_lock) +static inline int do_raw_write_trylock(rwlock_t *rwlock) + __cond_acquires(true, rwlock) + __no_context_analysis +{ + return arch_write_trylock(&(rwlock)->raw_lock); +} # define do_raw_write_unlock(rwlock) do {arch_write_unlock(&(rwlock)->raw_lock); __release(lock); } while (0) #endif -- cgit v1.2.3 From b06e988c4c52ce8750616ea9b23c8bd3b611b931 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 13 Mar 2026 10:15:09 -0700 Subject: locking: Add lock context annotations in the spinlock implementation Make the spinlock implementation compatible with lock context analysis (CONTEXT_ANALYSIS := 1) by adding lock context annotations to the _raw_##op##_...() macros. Signed-off-by: Bart Van Assche Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260313171510.230998-4-bvanassche@acm.org --- kernel/locking/spinlock.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c index 7685defd7c52..b42d293da38b 100644 --- a/kernel/locking/spinlock.c +++ b/kernel/locking/spinlock.c @@ -64,8 +64,9 @@ EXPORT_PER_CPU_SYMBOL(__mmiowb_state); * time (making _this_ CPU preemptible if possible), and we also signal * towards that other CPU that it should break the lock ASAP. */ -#define BUILD_LOCK_OPS(op, locktype) \ +#define BUILD_LOCK_OPS(op, locktype, lock_ctx_op) \ static void __lockfunc __raw_##op##_lock(locktype##_t *lock) \ + lock_ctx_op(lock) \ { \ for (;;) { \ preempt_disable(); \ @@ -78,6 +79,7 @@ static void __lockfunc __raw_##op##_lock(locktype##_t *lock) \ } \ \ static unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ + lock_ctx_op(lock) \ { \ unsigned long flags; \ \ @@ -96,11 +98,13 @@ static unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ } \ \ static void __lockfunc __raw_##op##_lock_irq(locktype##_t *lock) \ + lock_ctx_op(lock) \ { \ _raw_##op##_lock_irqsave(lock); \ } \ \ static void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \ + lock_ctx_op(lock) \ { \ unsigned long flags; \ \ @@ -123,11 +127,11 @@ static void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \ * __[spin|read|write]_lock_irqsave() * __[spin|read|write]_lock_bh() */ -BUILD_LOCK_OPS(spin, raw_spinlock); +BUILD_LOCK_OPS(spin, raw_spinlock, __acquires); #ifndef CONFIG_PREEMPT_RT -BUILD_LOCK_OPS(read, rwlock); -BUILD_LOCK_OPS(write, rwlock); +BUILD_LOCK_OPS(read, rwlock, __acquires_shared); +BUILD_LOCK_OPS(write, rwlock, __acquires); #endif #endif -- cgit v1.2.3 From a21c1e961de28b95099a9ca2c3774b2eee1a33bb Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 19 Mar 2026 14:52:38 +0100 Subject: compiler: Simplify generic RELOC_HIDE() When enabling Context Analysis (CONTEXT_ANALYSIS := y) in arch/x86/kvm code, Clang's Thread Safety Analysis failed to recognize that identical per_cpu() accesses refer to the same lock: | CC [M] arch/x86/kvm/vmx/posted_intr.o | arch/x86/kvm/vmx/posted_intr.c:186:2: error: releasing raw_spinlock '__ptr + __per_cpu_offset[vcpu->cpu]' that was not held [-Werror,-Wthread-safety-analysis] | 186 | raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); | | ^ | ./include/linux/spinlock.h:276:32: note: expanded from macro 'raw_spin_unlock' | 276 | #define raw_spin_unlock(lock) _raw_spin_unlock(lock) | | ^ | arch/x86/kvm/vmx/posted_intr.c:207:1: error: raw_spinlock '__ptr + __per_cpu_offset[vcpu->cpu]' is still held at the end of function [-Werror,-Wthread-safety-analysis] | 207 | } | | ^ | arch/x86/kvm/vmx/posted_intr.c:182:2: note: raw_spinlock acquired here | 182 | raw_spin_lock_nested(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu), | | ^ | ./include/linux/spinlock.h:235:2: note: expanded from macro 'raw_spin_lock_nested' | 235 | _raw_spin_lock(((void)(subclass), (lock))) | | ^ | 2 errors generated. This occurred because the default RELOC_HIDE() implementation (used by the per-CPU macros) is a statement expression containing an intermediate 'unsigned long' variable (this version appears to predate Git history). While the analysis strips away inner casts when resolving pointer aliases, it stops when encountering intermediate non-pointer variables (this is Thread Safety Analysis specific and irrelevant for codegen). This prevents the analysis from concluding that the pointers passed to e.g. raw_spin_lock() and raw_spin_unlock() were identical when per-CPU accessors are used. Simplify RELOC_HIDE() to a single expression. This preserves the intent of obfuscating UB-introducing out-of-bounds pointer calculations from the compiler via the 'unsigned long' cast, but allows the alias analysis to successfully resolve the pointers. Using a recent Clang version, I observe that generated code remains the same for vmlinux; the intermediate variable was already being optimized away (for any respectable modern compiler, not doing so would be an optimizer bug). Note that GCC provides its own version of RELOC_HIDE(), so this change only affects Clang builds. Add a test case to lib/test_context-analysis.c to catch any regressions. Reported-by: Bart Van Assche Reported-by: Sean Christopherson Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/all/e3946223-4543-4a76-a328-9c6865e95192@acm.org/ Link: https://patch.msgid.link/20260319135245.1420780-1-elver@google.com --- include/linux/compiler.h | 5 +---- lib/test_context-analysis.c | 11 +++++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index af16624b29fd..cb2f6050bdf7 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -149,10 +149,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #endif #ifndef RELOC_HIDE -# define RELOC_HIDE(ptr, off) \ - ({ unsigned long __ptr; \ - __ptr = (unsigned long) (ptr); \ - (typeof(ptr)) (__ptr + (off)); }) +# define RELOC_HIDE(ptr, off) ((typeof(ptr))((unsigned long)(ptr) + (off))) #endif #define absolute_pointer(val) RELOC_HIDE((void *)(val), 0) diff --git a/lib/test_context-analysis.c b/lib/test_context-analysis.c index 140efa8a9763..06b4a6a028e0 100644 --- a/lib/test_context-analysis.c +++ b/lib/test_context-analysis.c @@ -596,3 +596,14 @@ static void __used test_ww_mutex_lock_ctx(struct test_ww_mutex_data *d) ww_mutex_destroy(&d->mtx); } + +static DEFINE_PER_CPU(raw_spinlock_t, test_per_cpu_lock); + +static void __used test_per_cpu(int cpu) +{ + raw_spin_lock(&per_cpu(test_per_cpu_lock, cpu)); + raw_spin_unlock(&per_cpu(test_per_cpu_lock, cpu)); + + raw_spin_lock(per_cpu_ptr(&test_per_cpu_lock, cpu)); + raw_spin_unlock(per_cpu_ptr(&test_per_cpu_lock, cpu)); +} -- cgit v1.2.3