From 85be6d842447067ce76047a14d4258c96fd33b7b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Aug 2023 12:52:04 +0200 Subject: cleanup: Make no_free_ptr() __must_check recent discussion brought about the realization that it makes sense for no_free_ptr() to have __must_check semantics in order to avoid leaking the resource. Additionally, add a few comments to clarify why/how things work. All credit to Linus on how to combine __must_check and the stmt-expression. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20230816103102.GF980931@hirez.programming.kicks-ass.net --- include/linux/cleanup.h | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 53f1a7a932b0..9f1a9c455b68 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -7,8 +7,9 @@ /* * DEFINE_FREE(name, type, free): * simple helper macro that defines the required wrapper for a __free() - * based cleanup function. @free is an expression using '_T' to access - * the variable. + * based cleanup function. @free is an expression using '_T' to access the + * variable. @free should typically include a NULL test before calling a + * function, see the example below. * * __free(name): * variable attribute to add a scoped based cleanup to the variable. @@ -17,6 +18,9 @@ * like a non-atomic xchg(var, NULL), such that the cleanup function will * be inhibited -- provided it sanely deals with a NULL value. * + * NOTE: this has __must_check semantics so that it is harder to accidentally + * leak the resource. + * * return_ptr(p): * returns p while inhibiting the __free(). * @@ -24,6 +28,8 @@ * * DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) * + * void *alloc_obj(...) + * { * struct obj *p __free(kfree) = kmalloc(...); * if (!p) * return NULL; @@ -32,6 +38,24 @@ * return NULL; * * return_ptr(p); + * } + * + * NOTE: the DEFINE_FREE()'s @free expression includes a NULL test even though + * kfree() is fine to be called with a NULL value. This is on purpose. This way + * the compiler sees the end of our alloc_obj() function as: + * + * tmp = p; + * p = NULL; + * if (p) + * kfree(p); + * return tmp; + * + * And through the magic of value-propagation and dead-code-elimination, it + * eliminates the actual cleanup call and compiles into: + * + * return p; + * + * Without the NULL test it turns into a mess and the compiler can't help us. */ #define DEFINE_FREE(_name, _type, _free) \ @@ -39,8 +63,17 @@ #define __free(_name) __cleanup(__free_##_name) +#define __get_and_null_ptr(p) \ + ({ __auto_type __ptr = &(p); \ + __auto_type __val = *__ptr; \ + *__ptr = NULL; __val; }) + +static inline __must_check +const volatile void * __must_check_fn(const volatile void *val) +{ return val; } + #define no_free_ptr(p) \ - ({ __auto_type __ptr = (p); (p) = NULL; __ptr; }) + ((typeof(p)) __must_check_fn(__get_and_null_ptr(p))) #define return_ptr(p) return no_free_ptr(p) -- cgit v1.2.3 From 6b596e62ed9f90c4a97e68ae1f7b1af5beeb3c05 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 Sep 2023 18:22:51 +0200 Subject: sched: Provide rt_mutex specific scheduler helpers With PREEMPT_RT there is a rt_mutex recursion problem where sched_submit_work() can use an rtlock (aka spinlock_t). More specifically what happens is: mutex_lock() /* really rt_mutex */ ... __rt_mutex_slowlock_locked() task_blocks_on_rt_mutex() // enqueue current task as waiter // do PI chain walk rt_mutex_slowlock_block() schedule() sched_submit_work() ... spin_lock() /* really rtlock */ ... __rt_mutex_slowlock_locked() task_blocks_on_rt_mutex() // enqueue current task as waiter *AGAIN* // *CONFUSION* Fix this by making rt_mutex do the sched_submit_work() early, before it enqueues itself as a waiter -- before it even knows *if* it will wait. [[ basically Thomas' patch but with different naming and a few asserts added ]] Originally-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230908162254.999499-5-bigeasy@linutronix.de --- include/linux/sched.h | 3 +++ include/linux/sched/rt.h | 4 ++++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 77f01ac385f7..67623ffd4a8e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -911,6 +911,9 @@ struct task_struct { * ->sched_remote_wakeup gets used, so it can be in this word. */ unsigned sched_remote_wakeup:1; +#ifdef CONFIG_RT_MUTEXES + unsigned sched_rt_mutex:1; +#endif /* Bit to tell LSMs we're in execve(): */ unsigned in_execve:1; diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index 994c25640e15..b2b9e6eb9683 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -30,6 +30,10 @@ static inline bool task_is_realtime(struct task_struct *tsk) } #ifdef CONFIG_RT_MUTEXES +extern void rt_mutex_pre_schedule(void); +extern void rt_mutex_schedule(void); +extern void rt_mutex_post_schedule(void); + /* * Must hold either p->pi_lock or task_rq(p)->lock. */ -- cgit v1.2.3 From c6f4a90022524d06f6d9de323b1757031dcf0c26 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Fri, 8 Sep 2023 11:43:39 -0400 Subject: asm-generic: ticket-lock: Optimize arch_spin_value_unlocked() The arch_spin_value_unlocked() of ticket-lock would cause the compiler to generate inefficient asm code in riscv architecture because of unnecessary memory access to the contended value. Before the patch: void lockref_get(struct lockref *lockref) { 78: fd010113 add sp,sp,-48 7c: 02813023 sd s0,32(sp) 80: 02113423 sd ra,40(sp) 84: 03010413 add s0,sp,48 0000000000000088 <.LBB296>: CMPXCHG_LOOP( 88: 00053783 ld a5,0(a0) After the patch: void lockref_get(struct lockref *lockref) { CMPXCHG_LOOP( 78: 00053783 ld a5,0(a0) After the patch, the lockref_get() could get in a fast path instead of the function's prologue. This is because ticket lock complex logic would limit compiler optimization for the spinlock fast path, and qspinlock won't. The caller of arch_spin_value_unlocked() could benefit from this change. Currently, the only caller is lockref. Signed-off-by: Guo Ren Signed-off-by: Guo Ren Signed-off-by: Ingo Molnar Acked-by: Waiman Long Acked-by: Will Deacon Link: https://lore.kernel.org/r/20230908154339.3250567-1-guoren@kernel.org --- include/asm-generic/spinlock.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h index fdfebcb050f4..90803a826ba0 100644 --- a/include/asm-generic/spinlock.h +++ b/include/asm-generic/spinlock.h @@ -68,11 +68,18 @@ static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) smp_store_release(ptr, (u16)val + 1); } +static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) +{ + u32 val = lock.counter; + + return ((val >> 16) == (val & 0xffff)); +} + static __always_inline int arch_spin_is_locked(arch_spinlock_t *lock) { - u32 val = atomic_read(lock); + arch_spinlock_t val = READ_ONCE(*lock); - return ((val >> 16) != (val & 0xffff)); + return !arch_spin_value_unlocked(val); } static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock) @@ -82,11 +89,6 @@ static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock) return (s16)((val >> 16) - (val & 0xffff)) > 1; } -static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) -{ - return !arch_spin_is_locked(&lock); -} - #include #endif /* __ASM_GENERIC_SPINLOCK_H */ -- cgit v1.2.3 From 4923954bbc4a760e0b2210e0cb5733726ac2e2e9 Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:06 +0200 Subject: futex: Clarify FUTEX2 flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sys_futex_waitv() is part of the futex2 series (the first and only so far) of syscalls and has a flags field per futex (as opposed to flags being encoded in the futex op). This new flags field has a new namespace, which unfortunately isn't super explicit. Notably it currently takes FUTEX_32 and FUTEX_PRIVATE_FLAG. Introduce the FUTEX2 namespace to clarify this Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Reviewed-by: André Almeida Link: https://lore.kernel.org/r/20230921105247.507327749@noisy.programming.kicks-ass.net --- include/uapi/linux/futex.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h index 71a5df8d2689..21d4eff41162 100644 --- a/include/uapi/linux/futex.h +++ b/include/uapi/linux/futex.h @@ -44,10 +44,20 @@ FUTEX_PRIVATE_FLAG) /* - * Flags to specify the bit length of the futex word for futex2 syscalls. - * Currently, only 32 is supported. + * Flags for futex2 syscalls. */ -#define FUTEX_32 2 + /* 0x00 */ + /* 0x01 */ +#define FUTEX2_SIZE_U32 0x02 + /* 0x04 */ + /* 0x08 */ + /* 0x10 */ + /* 0x20 */ + /* 0x40 */ +#define FUTEX2_PRIVATE FUTEX_PRIVATE_FLAG + +/* do not use */ +#define FUTEX_32 FUTEX2_SIZE_U32 /* historical accident :-( */ /* * Max numbers of elements in a futex_waitv array -- cgit v1.2.3 From d6d08d24790e82c69a46ef78ae44fe1b1ed30775 Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:07 +0200 Subject: futex: Extend the FUTEX2 flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the definition for the missing but always intended extra sizes, and add a NUMA flag for the planned numa extention. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Reviewed-by: André Almeida Link: https://lore.kernel.org/r/20230921105247.617057368@noisy.programming.kicks-ass.net --- include/uapi/linux/futex.h | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h index 21d4eff41162..d2ee625ea189 100644 --- a/include/uapi/linux/futex.h +++ b/include/uapi/linux/futex.h @@ -45,17 +45,32 @@ /* * Flags for futex2 syscalls. + * + * NOTE: these are not pure flags, they can also be seen as: + * + * union { + * u32 flags; + * struct { + * u32 size : 2, + * numa : 1, + * : 4, + * private : 1; + * }; + * }; */ - /* 0x00 */ - /* 0x01 */ +#define FUTEX2_SIZE_U8 0x00 +#define FUTEX2_SIZE_U16 0x01 #define FUTEX2_SIZE_U32 0x02 - /* 0x04 */ +#define FUTEX2_SIZE_U64 0x03 +#define FUTEX2_NUMA 0x04 /* 0x08 */ /* 0x10 */ /* 0x20 */ /* 0x40 */ #define FUTEX2_PRIVATE FUTEX_PRIVATE_FLAG +#define FUTEX2_SIZE_MASK 0x03 + /* do not use */ #define FUTEX_32 FUTEX2_SIZE_U32 /* historical accident :-( */ -- cgit v1.2.3 From 9f6c532f59b20580acf8ede9409c9b8dce6e74e1 Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:10 +0200 Subject: futex: Add sys_futex_wake() To complement sys_futex_waitv() add sys_futex_wake(). This syscall implements what was previously known as FUTEX_WAKE_BITSET except it uses 'unsigned long' for the bitmask and takes FUTEX2 flags. The 'unsigned long' allows FUTEX2_SIZE_U64 on 64bit platforms. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Acked-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230921105247.936205525@noisy.programming.kicks-ass.net --- include/linux/syscalls.h | 3 +++ include/uapi/asm-generic/unistd.h | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 22bc6bc147f8..e174ed86da1d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -549,6 +549,9 @@ asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, asmlinkage long sys_futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes, unsigned int flags, struct __kernel_timespec __user *timeout, clockid_t clockid); + +asmlinkage long sys_futex_wake(void __user *uaddr, unsigned long mask, int nr, unsigned int flags); + asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index abe087c53b4b..f5454e6f4c6f 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -822,9 +822,11 @@ __SYSCALL(__NR_cachestat, sys_cachestat) #define __NR_fchmodat2 452 __SYSCALL(__NR_fchmodat2, sys_fchmodat2) +#define __NR_futex_wake 454 +__SYSCALL(__NR_futex_wake, sys_futex_wake) #undef __NR_syscalls -#define __NR_syscalls 453 +#define __NR_syscalls 455 /* * 32 bit systems traditionally used different -- cgit v1.2.3 From cb8c4312afca1b2dc64107e7e7cea81911055612 Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:12 +0200 Subject: futex: Add sys_futex_wait() To complement sys_futex_waitv()/wake(), add sys_futex_wait(). This syscall implements what was previously known as FUTEX_WAIT_BITSET except it uses 'unsigned long' for the value and bitmask arguments, takes timespec and clockid_t arguments for the absolute timeout and uses FUTEX2 flags. The 'unsigned long' allows FUTEX2_SIZE_U64 on 64bit platforms. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Acked-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230921105248.164324363@noisy.programming.kicks-ass.net --- include/linux/syscalls.h | 4 ++++ include/uapi/asm-generic/unistd.h | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e174ed86da1d..11f3fdd1ee03 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -552,6 +552,10 @@ asmlinkage long sys_futex_waitv(struct futex_waitv *waiters, asmlinkage long sys_futex_wake(void __user *uaddr, unsigned long mask, int nr, unsigned int flags); +asmlinkage long sys_futex_wait(void __user *uaddr, unsigned long val, unsigned long mask, + unsigned int flags, struct __kernel_timespec __user *timespec, + clockid_t clockid); + asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index f5454e6f4c6f..f6553bd5d213 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -824,9 +824,11 @@ __SYSCALL(__NR_cachestat, sys_cachestat) __SYSCALL(__NR_fchmodat2, sys_fchmodat2) #define __NR_futex_wake 454 __SYSCALL(__NR_futex_wake, sys_futex_wake) +#define __NR_futex_wait 455 +__SYSCALL(__NR_futex_wait, sys_futex_wait) #undef __NR_syscalls -#define __NR_syscalls 455 +#define __NR_syscalls 456 /* * 32 bit systems traditionally used different -- cgit v1.2.3 From 0f4b5f972216782a4acb1ae00dcb55173847c2ff Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:15 +0200 Subject: futex: Add sys_futex_requeue() Finish off the 'simple' futex2 syscall group by adding sys_futex_requeue(). Unlike sys_futex_{wait,wake}() its arguments are too numerous to fit into a regular syscall. As such, use struct futex_waitv to pass the 'source' and 'destination' futexes to the syscall. This syscall implements what was previously known as FUTEX_CMP_REQUEUE and uses {val, uaddr, flags} for source and {uaddr, flags} for destination. This design explicitly allows requeueing between different types of futex by having a different flags word per uaddr. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Acked-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230921105248.511860556@noisy.programming.kicks-ass.net --- include/linux/syscalls.h | 3 +++ include/uapi/asm-generic/unistd.h | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 11f3fdd1ee03..0901af60d971 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -556,6 +556,9 @@ asmlinkage long sys_futex_wait(void __user *uaddr, unsigned long val, unsigned l unsigned int flags, struct __kernel_timespec __user *timespec, clockid_t clockid); +asmlinkage long sys_futex_requeue(struct futex_waitv __user *waiters, + unsigned int flags, int nr_wake, int nr_requeue); + asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index f6553bd5d213..d9e9cd13e577 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -826,9 +826,11 @@ __SYSCALL(__NR_fchmodat2, sys_fchmodat2) __SYSCALL(__NR_futex_wake, sys_futex_wake) #define __NR_futex_wait 455 __SYSCALL(__NR_futex_wait, sys_futex_wait) +#define __NR_futex_requeue 456 +__SYSCALL(__NR_futex_requeue, sys_futex_requeue) #undef __NR_syscalls -#define __NR_syscalls 456 +#define __NR_syscalls 457 /* * 32 bit systems traditionally used different -- cgit v1.2.3 From 0cff993e08a7578e2c1df93a95fc5059f447e7ae Mon Sep 17 00:00:00 2001 From: "pangzizhen001@208suo.com" Date: Thu, 20 Jul 2023 23:45:39 +0800 Subject: locking/seqlock: Fix typo in comment s/the the /the [ mingo: Cleaned up the changelog. ] Signed-off-by: Zizhen Pang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/70293ecd5bb7a1cd370fd4d95c35f936@208suo.com --- include/linux/seqlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 987a59d977c5..ea7a58258af6 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -864,7 +864,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) } /* - * For all seqlock_t write side functions, use the the internal + * For all seqlock_t write side functions, use the internal * do_write_seqcount_begin() instead of generic write_seqcount_begin(). * This way, no redundant lockdep_assert_held() checks are added. */ -- cgit v1.2.3 From e01cc1e8c2ad73cebb980878ede5584e0f2688f7 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 25 Sep 2023 16:50:23 +0200 Subject: locking/atomic: Add generic support for sync_try_cmpxchg() and its fallback Provide the generic sync_try_cmpxchg() function from the raw_ prefixed version, also adding explicit instrumentation. The patch amends existing scripts to generate sync_try_cmpxchg() locking primitive and its raw_sync_try_cmpxchg() fallback, while leaving existing macros from the try_cmpxchg() family unchanged. The target can define its own arch_sync_try_cmpxchg() to override the generic version of raw_sync_try_cmpxchg(). This allows the target to generate more optimal assembly than the generic version. Additionally, the patch renames two scripts to better reflect whet they really do. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Will Deacon Cc: Peter Zijlstra Cc: Boqun Feng Cc: Mark Rutland Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org --- include/linux/atomic/atomic-arch-fallback.h | 15 ++++++++++++++- include/linux/atomic/atomic-instrumented.h | 10 +++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h index b83ef19da13d..5e95faa959c4 100644 --- a/include/linux/atomic/atomic-arch-fallback.h +++ b/include/linux/atomic/atomic-arch-fallback.h @@ -428,6 +428,19 @@ extern void raw_cmpxchg128_relaxed_not_implemented(void); #define raw_sync_cmpxchg arch_sync_cmpxchg +#ifdef arch_sync_try_cmpxchg +#define raw_sync_try_cmpxchg arch_sync_try_cmpxchg +#else +#define raw_sync_try_cmpxchg(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = raw_sync_cmpxchg((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif + /** * raw_atomic_read() - atomic load with relaxed ordering * @v: pointer to atomic_t @@ -4649,4 +4662,4 @@ raw_atomic64_dec_if_positive(atomic64_t *v) } #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// 2fdd6702823fa842f9cea57a002e6e4476ae780c +// eec048affea735b8464f58e6d96992101f8f85f1 diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h index d401b406ef7c..54d7bbe0aeaa 100644 --- a/include/linux/atomic/atomic-instrumented.h +++ b/include/linux/atomic/atomic-instrumented.h @@ -4998,6 +4998,14 @@ atomic_long_dec_if_positive(atomic_long_t *v) raw_try_cmpxchg128_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \ }) +#define sync_try_cmpxchg(ptr, ...) \ +({ \ + typeof(ptr) __ai_ptr = (ptr); \ + kcsan_mb(); \ + instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \ + raw_sync_try_cmpxchg(__ai_ptr, __VA_ARGS__); \ +}) + #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */ -// 1568f875fef72097413caab8339120c065a39aa4 +// 2cc4bc990fef44d3836ec108f11b610f3f438184 -- cgit v1.2.3 From f995443f01b4dbcce723539b99050ce69b319e58 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 12 Oct 2023 16:31:58 +0200 Subject: locking/seqlock: Simplify SEQCOUNT_LOCKNAME() 1. Kill the "lockmember" argument. It is always s->lock plus __seqprop_##lockname##_sequence() already uses s->lock and ignores "lockmember". 2. Kill the "lock_acquire" argument. __seqprop_##lockname##_sequence() can use the same "lockbase" prefix for _lock and _unlock. Apart from line numbers, gcc -E outputs the same code. Signed-off-by: Oleg Nesterov Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Waiman Long Cc: Will Deacon Cc: Thomas Gleixner Cc: Linus Torvalds Cc: Paul E. McKenney Link: https://lore.kernel.org/r/20231012143158.GA16133@redhat.com --- include/linux/seqlock.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index af518e4d0c6b..7e7109dbc3f5 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -191,11 +191,9 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) * @lockname: "LOCKNAME" part of seqcount_LOCKNAME_t * @locktype: LOCKNAME canonical C data type * @preemptible: preemptibility of above locktype - * @lockmember: argument for lockdep_assert_held() - * @lockbase: associated lock release function (prefix only) - * @lock_acquire: associated lock acquisition function (full call) + * @lockbase: prefix for associated lock/unlock */ -#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember, lockbase, lock_acquire) \ +#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockbase) \ typedef struct seqcount_##lockname { \ seqcount_t seqcount; \ __SEQ_LOCK(locktype *lock); \ @@ -216,7 +214,7 @@ __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \ return seq; \ \ if (preemptible && unlikely(seq & 1)) { \ - __SEQ_LOCK(lock_acquire); \ + __SEQ_LOCK(lockbase##_lock(s->lock)); \ __SEQ_LOCK(lockbase##_unlock(s->lock)); \ \ /* \ @@ -242,7 +240,7 @@ __seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s) \ static __always_inline void \ __seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \ { \ - __SEQ_LOCK(lockdep_assert_held(lockmember)); \ + __SEQ_LOCK(lockdep_assert_held(s->lock)); \ } /* @@ -271,10 +269,10 @@ static inline void __seqprop_assert(const seqcount_t *s) #define __SEQ_RT IS_ENABLED(CONFIG_PREEMPT_RT) -SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, s->lock, raw_spin, raw_spin_lock(s->lock)) -SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, s->lock, spin, spin_lock(s->lock)) -SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, s->lock, read, read_lock(s->lock)) -SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex, mutex_lock(s->lock)) +SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, raw_spin) +SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, spin) +SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, read) +SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) /* * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t -- cgit v1.2.3 From e6115c6f7a0ce3388cc60b69a284facf78b5dbfd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 12 Oct 2023 16:32:27 +0200 Subject: locking/seqlock: Change __seqprop() to return the function pointer This simplifies the macro and makes it easy to add the new seqprop's with 2 or more args. Plus this way we do not lose the type info, the (void*) type cast is no longer needed. And the latter reveals the problem: a lot of seqcount_t helpers pass the "const seqcount_t *s" argument to __seqprop_ptr(seqcount_t *s) but (before this patch) "(void *)(s)" masked the problem. So this patch changes __seqprop_ptr() and __seqprop_##lockname##_ptr() to accept the "const LOCKNAME *s" argument. This is not nice either, they need to drop the constness on return because these helpers are used by both the readers and writers, but at least it is clear what's going on. Signed-off-by: Oleg Nesterov Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Waiman Long Cc: Will Deacon Cc: Thomas Gleixner Cc: Linus Torvalds Cc: Paul E. McKenney Link: https://lore.kernel.org/r/20231012143227.GA16143@redhat.com --- include/linux/seqlock.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 7e7109dbc3f5..4b8dcd3a0d93 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -200,9 +200,9 @@ typedef struct seqcount_##lockname { \ } seqcount_##lockname##_t; \ \ static __always_inline seqcount_t * \ -__seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \ +__seqprop_##lockname##_ptr(const seqcount_##lockname##_t *s) \ { \ - return &s->seqcount; \ + return (void *)&s->seqcount; /* drop const */ \ } \ \ static __always_inline unsigned \ @@ -247,9 +247,9 @@ __seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \ * __seqprop() for seqcount_t */ -static inline seqcount_t *__seqprop_ptr(seqcount_t *s) +static inline seqcount_t *__seqprop_ptr(const seqcount_t *s) { - return s; + return (void *)s; /* drop const */ } static inline unsigned __seqprop_sequence(const seqcount_t *s) @@ -292,19 +292,19 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) #define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) #define __seqprop_case(s, lockname, prop) \ - seqcount_##lockname##_t: __seqprop_##lockname##_##prop((void *)(s)) + seqcount_##lockname##_t: __seqprop_##lockname##_##prop #define __seqprop(s, prop) _Generic(*(s), \ - seqcount_t: __seqprop_##prop((void *)(s)), \ + seqcount_t: __seqprop_##prop, \ __seqprop_case((s), raw_spinlock, prop), \ __seqprop_case((s), spinlock, prop), \ __seqprop_case((s), rwlock, prop), \ __seqprop_case((s), mutex, prop)) -#define seqprop_ptr(s) __seqprop(s, ptr) -#define seqprop_sequence(s) __seqprop(s, sequence) -#define seqprop_preemptible(s) __seqprop(s, preemptible) -#define seqprop_assert(s) __seqprop(s, assert) +#define seqprop_ptr(s) __seqprop(s, ptr)(s) +#define seqprop_sequence(s) __seqprop(s, sequence)(s) +#define seqprop_preemptible(s) __seqprop(s, preemptible)(s) +#define seqprop_assert(s) __seqprop(s, assert)(s) /** * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier -- cgit v1.2.3 From 886ee55eabac0d46faf8bc0b22207ca2740847ba Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 13 Oct 2023 10:15:46 +0200 Subject: locking/seqlock: Propagate 'const' pointers within read-only methods, remove forced type casts Currently __seqprop_ptr() is an inline function that must chose to either use 'const' or non-const seqcount related pointers - but this results in the undesirable loss of 'const' propagation, via a forced type cast. The easiest solution would be to turn the pointer wrappers into macros that pass through whatever type is passed to them - but the clever maze of seqlock API instantiation macros relies on the GCC CPP '##' macro extension, which isn't recursive, so inline functions must be used here. So create two wrapper variants instead: 'ptr' and 'const_ptr', and pick the right one for the codepaths that are const: read_seqcount_begin() and read_seqcount_retry(). This cleans up type handling and allows the removal of all type forcing. No change in functionality. Signed-off-by: Ingo Molnar Reviewed-by: Oleg Nesterov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Waiman Long Cc: Will Deacon Cc: Thomas Gleixner Cc: Paul E. McKenney --- include/linux/seqlock.h | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 4b8dcd3a0d93..80f21d2ca2aa 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -200,9 +200,15 @@ typedef struct seqcount_##lockname { \ } seqcount_##lockname##_t; \ \ static __always_inline seqcount_t * \ -__seqprop_##lockname##_ptr(const seqcount_##lockname##_t *s) \ +__seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \ { \ - return (void *)&s->seqcount; /* drop const */ \ + return &s->seqcount; \ +} \ + \ +static __always_inline const seqcount_t * \ +__seqprop_##lockname##_const_ptr(const seqcount_##lockname##_t *s) \ +{ \ + return &s->seqcount; \ } \ \ static __always_inline unsigned \ @@ -247,9 +253,14 @@ __seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \ * __seqprop() for seqcount_t */ -static inline seqcount_t *__seqprop_ptr(const seqcount_t *s) +static inline seqcount_t *__seqprop_ptr(seqcount_t *s) +{ + return s; +} + +static inline const seqcount_t *__seqprop_const_ptr(const seqcount_t *s) { - return (void *)s; /* drop const */ + return s; } static inline unsigned __seqprop_sequence(const seqcount_t *s) @@ -302,6 +313,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) __seqprop_case((s), mutex, prop)) #define seqprop_ptr(s) __seqprop(s, ptr)(s) +#define seqprop_const_ptr(s) __seqprop(s, const_ptr)(s) #define seqprop_sequence(s) __seqprop(s, sequence)(s) #define seqprop_preemptible(s) __seqprop(s, preemptible)(s) #define seqprop_assert(s) __seqprop(s, assert)(s) @@ -353,7 +365,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) */ #define read_seqcount_begin(s) \ ({ \ - seqcount_lockdep_reader_access(seqprop_ptr(s)); \ + seqcount_lockdep_reader_access(seqprop_const_ptr(s)); \ raw_read_seqcount_begin(s); \ }) @@ -419,7 +431,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) * Return: true if a read section retry is required, else false */ #define __read_seqcount_retry(s, start) \ - do___read_seqcount_retry(seqprop_ptr(s), start) + do___read_seqcount_retry(seqprop_const_ptr(s), start) static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start) { @@ -439,7 +451,7 @@ static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start) * Return: true if a read section retry is required, else false */ #define read_seqcount_retry(s, start) \ - do_read_seqcount_retry(seqprop_ptr(s), start) + do_read_seqcount_retry(seqprop_const_ptr(s), start) static inline int do_read_seqcount_retry(const seqcount_t *s, unsigned start) { -- cgit v1.2.3 From 184fdf9fc7ae6ae7155768faa48fc609d1a24b7e Mon Sep 17 00:00:00 2001 From: Cuda-Chen Date: Tue, 17 Oct 2023 13:37:03 +0800 Subject: locking/seqlock: Fix grammar in comment The "neither writes before and after ..." for the description of do_write_seqcount_end() should be "neither writes before nor after". Signed-off-by: Cuda-Chen Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231017053703.11312-1-clh960524@gmail.com --- include/linux/seqlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 80f21d2ca2aa..e92f9d5577ba 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -584,7 +584,7 @@ static inline void do_write_seqcount_end(seqcount_t *s) * via WRITE_ONCE): a) to ensure the writes become visible to other threads * atomically, avoiding compiler optimizations; b) to document which writes are * meant to propagate to the reader critical section. This is necessary because - * neither writes before and after the barrier are enclosed in a seq-writer + * neither writes before nor after the barrier are enclosed in a seq-writer * critical section that would ensure readers are aware of ongoing writes:: * * seqcount_t seq; -- cgit v1.2.3