From 2202e15b2b1a946ce760d96748cd7477589701ab Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 4 Nov 2021 13:27:06 +0100 Subject: kernel/locking: Use a pointer in ww_mutex_trylock(). mutex_acquire_nest() expects a pointer, pass the pointer. Fixes: 12235da8c80a1 ("kernel/locking: Add context to ww_mutex_trylock()") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20211104122706.frk52zxbjorso2kv@linutronix.de --- kernel/locking/ww_rt_mutex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/locking/ww_rt_mutex.c b/kernel/locking/ww_rt_mutex.c index 0e00205cf467..d1473c624105 100644 --- a/kernel/locking/ww_rt_mutex.c +++ b/kernel/locking/ww_rt_mutex.c @@ -26,7 +26,7 @@ int ww_mutex_trylock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx) if (__rt_mutex_trylock(&rtm->rtmutex)) { ww_mutex_set_context_fastpath(lock, ww_ctx); - mutex_acquire_nest(&rtm->dep_map, 0, 1, ww_ctx->dep_map, _RET_IP_); + mutex_acquire_nest(&rtm->dep_map, 0, 1, &ww_ctx->dep_map, _RET_IP_); return 1; } -- cgit v1.2.3 From 3297481d688a5cc2973ea58bd78e66b8639748b1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 26 Oct 2021 12:03:48 +0200 Subject: futex: Remove futex_cmpxchg detection Now that all architectures have a working futex implementation in any configuration, remove the runtime detection code. Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Reviewed-by: Russell King (Oracle) Acked-by: Vineet Gupta Acked-by: Max Filippov Acked-by: Christian Borntraeger Link: https://lore.kernel.org/r/20211026100432.1730393-2-arnd@kernel.org --- kernel/futex/core.c | 35 ----------------------------------- kernel/futex/futex.h | 6 ------ kernel/futex/syscalls.c | 22 ---------------------- 3 files changed, 63 deletions(-) (limited to 'kernel') diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 25d8a88b32e5..926c2bb752bc 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -41,11 +41,6 @@ #include "futex.h" #include "../locking/rtmutex_common.h" -#ifndef CONFIG_HAVE_FUTEX_CMPXCHG -int __read_mostly futex_cmpxchg_enabled; -#endif - - /* * The base of the bucket array and its size are always used together * (after initialization only in futex_hash()), so ensure that they @@ -776,9 +771,6 @@ static void exit_robust_list(struct task_struct *curr) unsigned long futex_offset; int rc; - if (!futex_cmpxchg_enabled) - return; - /* * Fetch the list head (which was registered earlier, via * sys_set_robust_list()): @@ -874,9 +866,6 @@ static void compat_exit_robust_list(struct task_struct *curr) compat_long_t futex_offset; int rc; - if (!futex_cmpxchg_enabled) - return; - /* * Fetch the list head (which was registered earlier, via * sys_set_robust_list()): @@ -950,8 +939,6 @@ static void exit_pi_state_list(struct task_struct *curr) struct futex_hash_bucket *hb; union futex_key key = FUTEX_KEY_INIT; - if (!futex_cmpxchg_enabled) - return; /* * We are a ZOMBIE and nobody can enqueue itself on * pi_state_list anymore, but we have to be careful @@ -1125,26 +1112,6 @@ void futex_exit_release(struct task_struct *tsk) futex_cleanup_end(tsk, FUTEX_STATE_DEAD); } -static void __init futex_detect_cmpxchg(void) -{ -#ifndef CONFIG_HAVE_FUTEX_CMPXCHG - u32 curval; - - /* - * This will fail and we want it. Some arch implementations do - * runtime detection of the futex_atomic_cmpxchg_inatomic() - * functionality. We want to know that before we call in any - * of the complex code paths. Also we want to prevent - * registration of robust lists in that case. NULL is - * guaranteed to fault and we get -EFAULT on functional - * implementation, the non-functional ones will return - * -ENOSYS. - */ - if (futex_cmpxchg_value_locked(&curval, NULL, 0, 0) == -EFAULT) - futex_cmpxchg_enabled = 1; -#endif -} - static int __init futex_init(void) { unsigned int futex_shift; @@ -1163,8 +1130,6 @@ static int __init futex_init(void) futex_hashsize, futex_hashsize); futex_hashsize = 1UL << futex_shift; - futex_detect_cmpxchg(); - for (i = 0; i < futex_hashsize; i++) { atomic_set(&futex_queues[i].waiters, 0); plist_head_init(&futex_queues[i].chain); diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index 040ae4277cb0..c264cbeab71c 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -27,12 +27,6 @@ #define FLAGS_CLOCKRT 0x02 #define FLAGS_HAS_TIMEOUT 0x04 -#ifdef CONFIG_HAVE_FUTEX_CMPXCHG -#define futex_cmpxchg_enabled 1 -#else -extern int __read_mostly futex_cmpxchg_enabled; -#endif - #ifdef CONFIG_FAIL_FUTEX extern bool should_fail_futex(bool fshared); #else diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 6f91a07a6a83..086a22d1adb7 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -29,8 +29,6 @@ SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, size_t, len) { - if (!futex_cmpxchg_enabled) - return -ENOSYS; /* * The kernel knows only one size for now: */ @@ -56,9 +54,6 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, unsigned long ret; struct task_struct *p; - if (!futex_cmpxchg_enabled) - return -ENOSYS; - rcu_read_lock(); ret = -ESRCH; @@ -103,17 +98,6 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, return -ENOSYS; } - switch (cmd) { - case FUTEX_LOCK_PI: - case FUTEX_LOCK_PI2: - case FUTEX_UNLOCK_PI: - case FUTEX_TRYLOCK_PI: - case FUTEX_WAIT_REQUEUE_PI: - case FUTEX_CMP_REQUEUE_PI: - if (!futex_cmpxchg_enabled) - return -ENOSYS; - } - switch (cmd) { case FUTEX_WAIT: val3 = FUTEX_BITSET_MATCH_ANY; @@ -323,9 +307,6 @@ COMPAT_SYSCALL_DEFINE2(set_robust_list, struct compat_robust_list_head __user *, head, compat_size_t, len) { - if (!futex_cmpxchg_enabled) - return -ENOSYS; - if (unlikely(len != sizeof(*head))) return -EINVAL; @@ -342,9 +323,6 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, unsigned long ret; struct task_struct *p; - if (!futex_cmpxchg_enabled) - return -ENOSYS; - rcu_read_lock(); ret = -ESRCH; -- cgit v1.2.3 From 9d0df37797453f168afdb2e6fd0353c73718ae9a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 29 Nov 2021 18:46:44 +0100 Subject: sched: Trigger warning if ->migration_disabled counter underflows. If migrate_enable() is used more often than its counter part then it remains undetected and rq::nr_pinned will underflow, too. Add a warning if migrate_enable() is attempted if without a matching a migrate_disable(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20211129174654.668506-2-bigeasy@linutronix.de --- kernel/sched/core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3c9b0fda64ac..300218ad98a2 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2173,6 +2173,9 @@ void migrate_enable(void) return; } + if (WARN_ON_ONCE(!p->migration_disabled)) + return; + /* * Ensure stop_task runs either before or after this, and that * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule(). -- cgit v1.2.3 From e08f343be00c3fe8f9f6ac58085c81bcdd231fab Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 29 Nov 2021 18:46:45 +0100 Subject: locking: Remove rt_rwlock_is_contended(). rt_rwlock_is_contended() has no users. It makes no sense to use it as rwlock_is_contended() because it is a sleeping lock on RT and preemption is possible. It reports always != 0 if used by a writer and even if there is a waiter then the lock might not be handed over if the current owner has the highest priority. Remove rt_rwlock_is_contended(). Reported-by: kernel test robot Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20211129174654.668506-3-bigeasy@linutronix.de --- kernel/locking/spinlock_rt.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c index b2e553f9255b..9e396a09fe0f 100644 --- a/kernel/locking/spinlock_rt.c +++ b/kernel/locking/spinlock_rt.c @@ -257,12 +257,6 @@ void __sched rt_write_unlock(rwlock_t *rwlock) } EXPORT_SYMBOL(rt_write_unlock); -int __sched rt_rwlock_is_contended(rwlock_t *rwlock) -{ - return rw_base_is_contended(&rwlock->rwbase); -} -EXPORT_SYMBOL(rt_rwlock_is_contended); - #ifdef CONFIG_DEBUG_LOCK_ALLOC void __rt_rwlock_init(rwlock_t *rwlock, const char *name, struct lock_class_key *key) -- cgit v1.2.3 From 02ea9fc96fe976e7f7e067f38b12202f126e3f2f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 29 Nov 2021 18:46:46 +0100 Subject: locking/rtmutex: Squash self-deadlock check for ww_rt_mutex. Similar to the issues in commits: 6467822b8cc9 ("locking/rtmutex: Prevent spurious EDEADLK return caused by ww_mutexes") a055fcc132d4 ("locking/rtmutex: Return success on deadlock for ww_mutex waiters") ww_rt_mutex_lock() should not return EDEADLK without first going through the __ww_mutex logic to set the required state. In fact, the chain-walk can deal with the spurious cycles (per the above commits) this check warns about and is trying to avoid. Therefore ignore this test for ww_rt_mutex and simply let things fall in place. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20211129174654.668506-4-bigeasy@linutronix.de --- kernel/locking/rtmutex.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 0c6a48dfcecb..f89620852774 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1103,8 +1103,11 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, * the other will detect the deadlock and return -EDEADLOCK, * which is wrong, as the other waiter is not in a deadlock * situation. + * + * Except for ww_mutex, in that case the chain walk must already deal + * with spurious cycles, see the comments at [3] and [6]. */ - if (owner == task) + if (owner == task && !(build_ww_mutex() && ww_ctx)) return -EDEADLK; raw_spin_lock(&task->pi_lock); -- cgit v1.2.3 From a3642021923b26d86bb27d88c826494827612c06 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 29 Nov 2021 18:46:47 +0100 Subject: locking/rtmutex: Add rt_mutex_lock_nest_lock() and rt_mutex_lock_killable(). The locking selftest for ww-mutex expects to operate directly on the base-mutex which becomes a rtmutex on PREEMPT_RT. Add a rtmutex based implementation of mutex_lock_nest_lock() and mutex_lock_killable() named rt_mutex_lock_nest_lock() abd rt_mutex_lock_killable(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20211129174654.668506-5-bigeasy@linutronix.de --- kernel/locking/rtmutex_api.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c index 5c9299aaabae..900220941caa 100644 --- a/kernel/locking/rtmutex_api.c +++ b/kernel/locking/rtmutex_api.c @@ -21,12 +21,13 @@ int max_lock_depth = 1024; */ static __always_inline int __rt_mutex_lock_common(struct rt_mutex *lock, unsigned int state, + struct lockdep_map *nest_lock, unsigned int subclass) { int ret; might_sleep(); - mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); + mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, _RET_IP_); ret = __rt_mutex_lock(&lock->rtmutex, state); if (ret) mutex_release(&lock->dep_map, _RET_IP_); @@ -48,10 +49,16 @@ EXPORT_SYMBOL(rt_mutex_base_init); */ void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass) { - __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass); + __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, subclass); } EXPORT_SYMBOL_GPL(rt_mutex_lock_nested); +void __sched _rt_mutex_lock_nest_lock(struct rt_mutex *lock, struct lockdep_map *nest_lock) +{ + __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, nest_lock, 0); +} +EXPORT_SYMBOL_GPL(_rt_mutex_lock_nest_lock); + #else /* !CONFIG_DEBUG_LOCK_ALLOC */ /** @@ -61,7 +68,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock_nested); */ void __sched rt_mutex_lock(struct rt_mutex *lock) { - __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0); + __rt_mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, NULL, 0); } EXPORT_SYMBOL_GPL(rt_mutex_lock); #endif @@ -77,10 +84,25 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock); */ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) { - return __rt_mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0); + return __rt_mutex_lock_common(lock, TASK_INTERRUPTIBLE, NULL, 0); } EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); +/** + * rt_mutex_lock_killable - lock a rt_mutex killable + * + * @lock: the rt_mutex to be locked + * + * Returns: + * 0 on success + * -EINTR when interrupted by a signal + */ +int __sched rt_mutex_lock_killable(struct rt_mutex *lock) +{ + return __rt_mutex_lock_common(lock, TASK_KILLABLE, NULL, 0); +} +EXPORT_SYMBOL_GPL(rt_mutex_lock_killable); + /** * rt_mutex_trylock - try to lock a rt_mutex * -- cgit v1.2.3 From 0c1d7a2c2d32fac7ff4a644724b2d52a64184645 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 29 Nov 2021 18:46:48 +0100 Subject: lockdep: Remove softirq accounting on PREEMPT_RT. There is not really a softirq context on PREEMPT_RT. Softirqs on PREEMPT_RT are always invoked within the context of a threaded interrupt handler or within ksoftirqd. The "in-softirq" context is preemptible and is protected by a per-CPU lock to ensure mutual exclusion. There is no difference on PREEMPT_RT between spin_lock_irq() and spin_lock() because the former does not disable interrupts. Therefore if a lock is used in_softirq() and locked once with spin_lock_irq() then lockdep will report this with "inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage". Teach lockdep that we don't really do softirqs on -RT. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20211129174654.668506-6-bigeasy@linutronix.de --- kernel/locking/lockdep.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 2270ec68f10a..4a882f83aeb9 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -5485,6 +5485,7 @@ static noinstr void check_flags(unsigned long flags) } } +#ifndef CONFIG_PREEMPT_RT /* * We dont accurately track softirq state in e.g. * hardirq contexts (such as on 4KSTACKS), so only @@ -5499,6 +5500,7 @@ static noinstr void check_flags(unsigned long flags) DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); } } +#endif if (!debug_locks) print_irqtrace_events(current); -- cgit v1.2.3 From c0bed69daf4b67809b58cc7cd81a8fa4f45bc161 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 3 Dec 2021 15:59:34 +0800 Subject: locking: Make owner_on_cpu() into Move the owner_on_cpu() from kernel/locking/rwsem.c into include/linux/sched.h with under CONFIG_SMP, then use it in the mutex/rwsem/rtmutex to simplify the code. Signed-off-by: Kefeng Wang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20211203075935.136808-2-wangkefeng.wang@huawei.com --- kernel/locking/mutex.c | 11 ++--------- kernel/locking/rtmutex.c | 5 ++--- kernel/locking/rwsem.c | 9 --------- 3 files changed, 4 insertions(+), 21 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index db1913611192..5e3585950ec8 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -367,8 +367,7 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner, /* * Use vcpu_is_preempted to detect lock holder preemption issue. */ - if (!owner->on_cpu || need_resched() || - vcpu_is_preempted(task_cpu(owner))) { + if (!owner_on_cpu(owner) || need_resched()) { ret = false; break; } @@ -403,14 +402,8 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) * structure won't go away during the spinning period. */ owner = __mutex_owner(lock); - - /* - * As lock holder preemption issue, we both skip spinning if task is not - * on cpu or its cpu is preempted - */ - if (owner) - retval = owner->on_cpu && !vcpu_is_preempted(task_cpu(owner)); + retval = owner_on_cpu(owner); /* * If lock->owner is not set, the mutex has been released. Return true diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index f89620852774..0c1f2e3f019a 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1382,9 +1382,8 @@ static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, * for CONFIG_PREEMPT_RCU=y) * - the VCPU on which owner runs is preempted */ - if (!owner->on_cpu || need_resched() || - rt_mutex_waiter_is_top_waiter(lock, waiter) || - vcpu_is_preempted(task_cpu(owner))) { + if (!owner_on_cpu(owner) || need_resched() || + rt_mutex_waiter_is_top_waiter(lock, waiter)) { res = false; break; } diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index c51387a43265..b92d0a830568 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -613,15 +613,6 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) return false; } -static inline bool owner_on_cpu(struct task_struct *owner) -{ - /* - * As lock holder preemption issue, we both skip spinning if - * task is not on cpu or its cpu is preempted - */ - return owner->on_cpu && !vcpu_is_preempted(task_cpu(owner)); -} - static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) { struct task_struct *owner; -- cgit v1.2.3