diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-13 10:27:28 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-13 10:27:28 -0700 |
commit | cc76ee75a9d3201eeacc576d17fbc1511f673010 (patch) | |
tree | 9505405c270718c491840ae7d0da0521386ce939 | |
parent | 9c65e12a55fea2da50f4069ec0dc47c50b7bd2bb (diff) | |
parent | 58995a9a5b292458f94a2356b8c878230fa56fe0 (diff) |
Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull core locking changes from Ingo Molnar:
"Main changes:
- jump label asm preparatory work for PowerPC (Anton Blanchard)
- rwsem optimizations and cleanups (Davidlohr Bueso)
- mutex optimizations and cleanups (Jason Low)
- futex fix (Oleg Nesterov)
- remove broken atomicity checks from {READ,WRITE}_ONCE() (Peter
Zijlstra)"
* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
powerpc, jump_label: Include linux/jump_label.h to get HAVE_JUMP_LABEL define
jump_label: Allow jump labels to be used in assembly
jump_label: Allow asm/jump_label.h to be included in assembly
locking/mutex: Further simplify mutex_spin_on_owner()
locking: Remove atomicy checks from {READ,WRITE}_ONCE
locking/rtmutex: Rename argument in the rt_mutex_adjust_prio_chain() documentation as well
locking/rwsem: Fix lock optimistic spinning when owner is not running
locking: Remove ACCESS_ONCE() usage
locking/rwsem: Check for active lock before bailing on spinning
locking/rwsem: Avoid deceiving lock spinners
locking/rwsem: Set lock ownership ASAP
locking/rwsem: Document barrier need when waking tasks
locking/futex: Check PF_KTHREAD rather than !p->mm to filter out kthreads
locking/mutex: Refactor mutex_spin_on_owner()
locking/mutex: In mutex_spin_on_owner(), return true when owner changes
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | arch/arm/include/asm/jump_label.h | 5 | ||||
-rw-r--r-- | arch/arm64/include/asm/jump_label.h | 8 | ||||
-rw-r--r-- | arch/mips/include/asm/jump_label.h | 7 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/opal-wrappers.S | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/hvCall.S | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/lpar.c | 2 | ||||
-rw-r--r-- | arch/s390/include/asm/jump_label.h | 3 | ||||
-rw-r--r-- | arch/sparc/include/asm/jump_label.h | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/jump_label.h | 5 | ||||
-rw-r--r-- | include/linux/compiler.h | 16 | ||||
-rw-r--r-- | include/linux/jump_label.h | 21 | ||||
-rw-r--r-- | include/linux/seqlock.h | 6 | ||||
-rw-r--r-- | kernel/futex.c | 2 | ||||
-rw-r--r-- | kernel/locking/mcs_spinlock.h | 6 | ||||
-rw-r--r-- | kernel/locking/mutex.c | 51 | ||||
-rw-r--r-- | kernel/locking/osq_lock.c | 14 | ||||
-rw-r--r-- | kernel/locking/rtmutex.c | 2 | ||||
-rw-r--r-- | kernel/locking/rwsem-spinlock.c | 7 | ||||
-rw-r--r-- | kernel/locking/rwsem-xadd.c | 98 | ||||
-rw-r--r-- | kernel/locking/rwsem.c | 22 | ||||
-rw-r--r-- | kernel/locking/rwsem.h | 20 | ||||
-rw-r--r-- | lib/lockref.c | 2 |
23 files changed, 160 insertions, 147 deletions
@@ -779,6 +779,7 @@ KBUILD_ARFLAGS := $(call ar-option,D) # check for 'asm goto' ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y) KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO + KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO endif include $(srctree)/scripts/Makefile.kasan diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h index 70f9b9bfb1f9..5f337dc5c108 100644 --- a/arch/arm/include/asm/jump_label.h +++ b/arch/arm/include/asm/jump_label.h @@ -1,7 +1,7 @@ #ifndef _ASM_ARM_JUMP_LABEL_H #define _ASM_ARM_JUMP_LABEL_H -#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ #include <linux/types.h> @@ -27,8 +27,6 @@ l_yes: return true; } -#endif /* __KERNEL__ */ - typedef u32 jump_label_t; struct jump_entry { @@ -37,4 +35,5 @@ struct jump_entry { jump_label_t key; }; +#endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 076a1c714049..c0e5165c2f76 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -18,11 +18,12 @@ */ #ifndef __ASM_JUMP_LABEL_H #define __ASM_JUMP_LABEL_H + +#ifndef __ASSEMBLY__ + #include <linux/types.h> #include <asm/insn.h> -#ifdef __KERNEL__ - #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE static __always_inline bool arch_static_branch(struct static_key *key) @@ -39,8 +40,6 @@ l_yes: return true; } -#endif /* __KERNEL__ */ - typedef u64 jump_label_t; struct jump_entry { @@ -49,4 +48,5 @@ struct jump_entry { jump_label_t key; }; +#endif /* __ASSEMBLY__ */ #endif /* __ASM_JUMP_LABEL_H */ diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index fdbff44e5482..608aa57799c8 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -8,9 +8,9 @@ #ifndef _ASM_MIPS_JUMP_LABEL_H #define _ASM_MIPS_JUMP_LABEL_H -#include <linux/types.h> +#ifndef __ASSEMBLY__ -#ifdef __KERNEL__ +#include <linux/types.h> #define JUMP_LABEL_NOP_SIZE 4 @@ -39,8 +39,6 @@ l_yes: return true; } -#endif /* __KERNEL__ */ - #ifdef CONFIG_64BIT typedef u64 jump_label_t; #else @@ -53,4 +51,5 @@ struct jump_entry { jump_label_t key; }; +#endif /* __ASSEMBLY__ */ #endif /* _ASM_MIPS_JUMP_LABEL_H */ diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 0509bca5e830..fcbe899fe299 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -9,11 +9,11 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/jump_label.h> #include <asm/ppc_asm.h> #include <asm/hvcall.h> #include <asm/asm-offsets.h> #include <asm/opal.h> -#include <asm/jump_label.h> .section ".text" diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index ccd53f91e8aa..74b5b8e239c8 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -7,12 +7,12 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ +#include <linux/jump_label.h> #include <asm/hvcall.h> #include <asm/processor.h> #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/ptrace.h> -#include <asm/jump_label.h> .section ".text" diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index b5682fd6c984..b7a67e3d2201 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -26,7 +26,7 @@ #include <linux/dma-mapping.h> #include <linux/console.h> #include <linux/export.h> -#include <linux/static_key.h> +#include <linux/jump_label.h> #include <asm/processor.h> #include <asm/mmu.h> #include <asm/page.h> diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 58642fd29c87..2b77e235b5fb 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -1,6 +1,8 @@ #ifndef _ASM_S390_JUMP_LABEL_H #define _ASM_S390_JUMP_LABEL_H +#ifndef __ASSEMBLY__ + #include <linux/types.h> #define JUMP_LABEL_NOP_SIZE 6 @@ -39,4 +41,5 @@ struct jump_entry { jump_label_t key; }; +#endif /* __ASSEMBLY__ */ #endif diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index ec2e2e2aba7d..cc9b04a2b11b 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -1,7 +1,7 @@ #ifndef _ASM_SPARC_JUMP_LABEL_H #define _ASM_SPARC_JUMP_LABEL_H -#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ #include <linux/types.h> @@ -22,8 +22,6 @@ l_yes: return true; } -#endif /* __KERNEL__ */ - typedef u32 jump_label_t; struct jump_entry { @@ -32,4 +30,5 @@ struct jump_entry { jump_label_t key; }; +#endif /* __ASSEMBLY__ */ #endif diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 6a2cefb4395a..a4c1cf7e93f8 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -1,7 +1,7 @@ #ifndef _ASM_X86_JUMP_LABEL_H #define _ASM_X86_JUMP_LABEL_H -#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ #include <linux/stringify.h> #include <linux/types.h> @@ -30,8 +30,6 @@ l_yes: return true; } -#endif /* __KERNEL__ */ - #ifdef CONFIG_X86_64 typedef u64 jump_label_t; #else @@ -44,4 +42,5 @@ struct jump_entry { jump_label_t key; }; +#endif /* __ASSEMBLY__ */ #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 1b45e4a0519b..0e41ca0e5927 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -192,29 +192,16 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #include <uapi/linux/types.h> -static __always_inline void data_access_exceeds_word_size(void) -#ifdef __compiletime_warning -__compiletime_warning("data access exceeds word size and won't be atomic") -#endif -; - -static __always_inline void data_access_exceeds_word_size(void) -{ -} - static __always_inline void __read_once_size(const volatile void *p, void *res, int size) { switch (size) { case 1: *(__u8 *)res = *(volatile __u8 *)p; break; case 2: *(__u16 *)res = *(volatile __u16 *)p; break; case 4: *(__u32 *)res = *(volatile __u32 *)p; break; -#ifdef CONFIG_64BIT case 8: *(__u64 *)res = *(volatile __u64 *)p; break; -#endif default: barrier(); __builtin_memcpy((void *)res, (const void *)p, size); - data_access_exceeds_word_size(); barrier(); } } @@ -225,13 +212,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s case 1: *(volatile __u8 *)p = *(__u8 *)res; break; case 2: *(volatile __u16 *)p = *(__u16 *)res; break; case 4: *(volatile __u32 *)p = *(__u32 *)res; break; -#ifdef CONFIG_64BIT case 8: *(volatile __u64 *)p = *(__u64 *)res; break; -#endif default: barrier(); __builtin_memcpy((void *)p, (const void *)res, size); - data_access_exceeds_word_size(); barrier(); } } diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 98f923b6a0ea..f4de473f226b 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -45,6 +45,12 @@ * same as using STATIC_KEY_INIT_FALSE. */ +#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) +# define HAVE_JUMP_LABEL +#endif + +#ifndef __ASSEMBLY__ + #include <linux/types.h> #include <linux/compiler.h> #include <linux/bug.h> @@ -55,7 +61,7 @@ extern bool static_key_initialized; "%s used before call to jump_label_init", \ __func__) -#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) +#ifdef HAVE_JUMP_LABEL struct static_key { atomic_t enabled; @@ -66,13 +72,18 @@ struct static_key { #endif }; -# include <asm/jump_label.h> -# define HAVE_JUMP_LABEL #else struct static_key { atomic_t enabled; }; -#endif /* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */ +#endif /* HAVE_JUMP_LABEL */ +#endif /* __ASSEMBLY__ */ + +#ifdef HAVE_JUMP_LABEL +#include <asm/jump_label.h> +#endif + +#ifndef __ASSEMBLY__ enum jump_label_type { JUMP_LABEL_DISABLE = 0, @@ -203,3 +214,5 @@ static inline bool static_key_enabled(struct static_key *key) } #endif /* _LINUX_JUMP_LABEL_H */ + +#endif /* __ASSEMBLY__ */ diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index f5df8f687b4d..5f68d0a391ce 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -108,7 +108,7 @@ static inline unsigned __read_seqcount_begin(const seqcount_t *s) unsigned ret; repeat: - ret = ACCESS_ONCE(s->sequence); + ret = READ_ONCE(s->sequence); if (unlikely(ret & 1)) { cpu_relax(); goto repeat; @@ -127,7 +127,7 @@ repeat: */ static inline unsigned raw_read_seqcount(const seqcount_t *s) { - unsigned ret = ACCESS_ONCE(s->sequence); + unsigned ret = READ_ONCE(s->sequence); smp_rmb(); return ret; } @@ -179,7 +179,7 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s) */ static inline unsigned raw_seqcount_begin(const seqcount_t *s) { - unsigned ret = ACCESS_ONCE(s->sequence); + unsigned ret = READ_ONCE(s->sequence); smp_rmb(); return ret & ~1; } diff --git a/kernel/futex.c b/kernel/futex.c index 2a5e3830e953..2579e407ff67 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -900,7 +900,7 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key, if (!p) return -ESRCH; - if (!p->mm) { + if (unlikely(p->flags & PF_KTHREAD)) { put_task_struct(p); return -EPERM; } diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index d1fe2ba5bac9..75e114bdf3f2 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h @@ -78,7 +78,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) */ return; } - ACCESS_ONCE(prev->next) = node; + WRITE_ONCE(prev->next, node); /* Wait until the lock holder passes the lock down. */ arch_mcs_spin_lock_contended(&node->locked); @@ -91,7 +91,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) static inline void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) { - struct mcs_spinlock *next = ACCESS_ONCE(node->next); + struct mcs_spinlock *next = READ_ONCE(node->next); if (likely(!next)) { /* @@ -100,7 +100,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) if (likely(cmpxchg(lock, node, NULL) == node)) return; /* Wait until the next pointer is set */ - while (!(next = ACCESS_ONCE(node->next))) + while (!(next = READ_ONCE(node->next))) cpu_relax_lowlatency(); } diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 94674e5919cb..4cccea6b8934 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -25,7 +25,7 @@ #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/debug_locks.h> -#include "mcs_spinlock.h" +#include <linux/osq_lock.h> /* * In the DEBUG case we are using the "NULL fastpath" for mutexes, @@ -217,44 +217,35 @@ ww_mutex_set_context_slowpath(struct ww_mutex *lock, } #ifdef CONFIG_MUTEX_SPIN_ON_OWNER -static inline bool owner_running(struct mutex *lock, struct task_struct *owner) -{ - if (lock->owner != owner) - return false; - - /* - * Ensure we emit the owner->on_cpu, dereference _after_ checking - * lock->owner still matches owner, if that fails, owner might - * point to free()d memory, if it still matches, the rcu_read_lock() - * ensures the memory stays valid. - */ - barrier(); - - return owner->on_cpu; -} - /* * Look out! "owner" is an entirely speculative pointer * access and not reliable. */ static noinline -int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) +bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) { + bool ret = true; + rcu_read_lock(); - while (owner_running(lock, owner)) { - if (need_resched()) + while (lock->owner == owner) { + /* + * Ensure we emit the owner->on_cpu, dereference _after_ + * checking lock->owner still matches owner. If that fails, + * owner might point to freed memory. If it still matches, + * the rcu_read_lock() ensures the memory stays valid. + */ + barrier(); + + if (!owner->on_cpu || need_resched()) { + ret = false; break; + } cpu_relax_lowlatency(); } rcu_read_unlock(); - /* - * We break out the loop above on need_resched() and when the - * owner changed, which is a sign for heavy contention. Return - * success only when lock->owner is NULL. - */ - return lock->owner == NULL; + return ret; } /* @@ -269,7 +260,7 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) return 0; rcu_read_lock(); - owner = ACCESS_ONCE(lock->owner); + owner = READ_ONCE(lock->owner); if (owner) retval = owner->on_cpu; rcu_read_unlock(); @@ -343,7 +334,7 @@ static bool mutex_optimistic_spin(struct mutex *lock, * As such, when deadlock detection needs to be * performed the optimistic spinning cannot be done. */ - if (ACCESS_ONCE(ww->ctx)) + if (READ_ONCE(ww->ctx)) break; } @@ -351,7 +342,7 @@ static bool mutex_optimistic_spin(struct mutex *lock, * If there's an owner, wait for it to either * release the lock or go to sleep. */ - owner = ACCESS_ONCE(lock->owner); + owner = READ_ONCE(lock->owner); if (owner && !mutex_spin_on_owner(lock, owner)) break; @@ -490,7 +481,7 @@ static inline int __sched __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) { struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); - struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx); + struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx); if (!hold_ctx) return 0; diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c index c112d00341b0..dc85ee23a26f 100644 --- a/kernel/locking/osq_lock.c +++ b/kernel/locking/osq_lock.c @@ -98,7 +98,7 @@ bool osq_lock(struct optimistic_spin_queue *lock) prev = decode_cpu(old); node->prev = prev; - ACCESS_ONCE(prev->next) = node; + WRITE_ONCE(prev->next, node); /* * Normally @prev is untouchable after the above store; because at that @@ -109,7 +109,7 @@ bool osq_lock(struct optimistic_spin_queue *lock) * cmpxchg in an attempt to undo our queueing. */ - while (!ACCESS_ONCE(node->locked)) { + while (!READ_ONCE(node->locked)) { /* * If we need to reschedule bail... so we can block. */ @@ -148,7 +148,7 @@ unqueue: * Or we race against a concurrent unqueue()'s step-B, in which * case its step-C will write us a new @node->prev pointer. */ - prev = ACCESS_ONCE(node->prev); + prev = READ_ONCE(node->prev); } /* @@ -170,8 +170,8 @@ unqueue: * it will wait in Step-A. */ - ACCESS_ONCE(next->prev) = prev; - ACCESS_ONCE(prev->next) = next; + WRITE_ONCE(next->prev, prev); + WRITE_ONCE(prev->next, next); return false; } @@ -193,11 +193,11 @@ void osq_unlock(struct optimistic_spin_queue *lock) node = this_cpu_ptr(&osq_node); next = xchg(&node->next, NULL); if (next) { - ACCESS_ONCE(next->locked) = 1; + WRITE_ONCE(next->locked, 1); return; } next = osq_wait_next(lock, node, NULL); if (next) - ACCESS_ONCE(next->locked) = 1; + WRITE_ONCE(next->locked, 1); } diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 6357265a31ad..b73279367087 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -349,7 +349,7 @@ static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) * * @task: the task owning the mutex (owner) for which a chain walk is * probably needed - * @deadlock_detect: do we have to carry out deadlock detection? + * @chwalk: do we have to carry out deadlock detection? * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck * things for a task that has just got its priority adjusted, and * is waiting on a mutex) diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 2555ae15ec14..3a5048572065 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c @@ -85,6 +85,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) list_del(&waiter->list); tsk = waiter->task; + /* + * Make sure we do not wakeup the next reader before + * setting the nil condition to grant the next reader; + * otherwise we could miss the wakeup on the other + * side and end up sleeping again. See the pairing + * in rwsem_down_read_failed(). + */ smp_mb(); waiter->task = NULL; wake_up_process(tsk); diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 2f7cc4076f50..3417d0172a5d 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -14,8 +14,9 @@ #include <linux/init.h> #include <linux/export.h> #include <linux/sched/rt.h> +#include <linux/osq_lock.h> -#include "mcs_spinlock.h" +#include "rwsem.h" /* * Guide to the rw_semaphore's count field for common values. @@ -186,6 +187,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) waiter = list_entry(next, struct rwsem_waiter, list); next = waiter->list.next; tsk = waiter->task; + /* + * Make sure we do not wakeup the next reader before + * setting the nil condition to grant the next reader; + * otherwise we could miss the wakeup on the other + * side and end up sleeping again. See the pairing + * in rwsem_down_read_failed(). + */ smp_mb(); waiter->task = NULL; wake_up_process(tsk); @@ -258,6 +266,7 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) { if (!list_is_singular(&sem->wait_list)) rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); + rwsem_set_owner(sem); return true; } @@ -270,15 +279,17 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) */ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) { - long old, count = ACCESS_ONCE(sem->count); + long old, count = READ_ONCE(sem->count); while (true) { if (!(count == 0 || count == RWSEM_WAITING_BIAS)) return false; old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS); - if (old == count) + if (old == count) { + rwsem_set_owner(sem); return true; + } count = old; } @@ -287,60 +298,67 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) { struct task_struct *owner; - bool on_cpu = false; + bool ret = true; if (need_resched()) return false; rcu_read_lock(); - owner = ACCESS_ONCE(sem->owner); - if (owner) - on_cpu = owner->on_cpu; - rcu_read_unlock(); - - /* - * If sem->owner is not set, yet we have just recently entered the - * slowpath, then there is a possibility reader(s) may have the lock. - * To be safe, avoid spinning in these situations. - */ - return on_cpu; -} - -static inline bool owner_running(struct rw_semaphore *sem, - struct task_struct *owner) -{ - if (sem->owner != owner) - return false; - - /* - * Ensure we emit the owner->on_cpu, dereference _after_ checking - * sem->owner still matches owner, if that fails, owner might - * point to free()d memory, if it still matches, the rcu_read_lock() - * ensures the memory stays valid. - */ - barrier(); + owner = READ_ONCE(sem->owner); + if (!owner) { + long count = READ_ONCE(sem->count); + /* + * If sem->owner is not set, yet we have just recently entered the + * slowpath with the lock being active, then there is a possibility + * reader(s) may have the lock. To be safe, bail spinning in these + * situations. + */ + if (count & RWSEM_ACTIVE_MASK) + ret = false; + goto done; + } - return owner->on_cpu; + ret = owner->on_cpu; +done: + rcu_read_unlock(); + return ret; } static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner) { + long count; + rcu_read_lock(); - while (owner_running(sem, owner)) { - if (need_resched()) - break; + while (sem->owner == owner) { + /* + * Ensure we emit the owner->on_cpu, dereference _after_ + * checking sem->owner still matches owner, if that fails, + * owner might point to free()d memory, if it still matches, + * the rcu_read_lock() ensures the memory stays valid. + */ + barrier(); + + /* abort spinning when need_resched or owner is not running */ + if (!owner->on_cpu || need_resched()) { + rcu_read_unlock(); + return false; + } cpu_relax_lowlatency(); } rcu_read_unlock(); + if (READ_ONCE(sem->owner)) + return true; /* new owner, continue spinning */ + /* - * We break out the loop above on need_resched() or when the - * owner changed, which is a sign for heavy contention. Return - * success only when sem->owner is NULL. + * When the owner is not set, the lock could be free or + * held by readers. Check the counter to verify the + * state. */ - return sem->owner == NULL; + count = READ_ONCE(sem->count); + return (count == 0 || count == RWSEM_WAITING_BIAS); } static bool rwsem_optimistic_spin(struct rw_semaphore *sem) @@ -358,7 +376,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem) goto done; while (true) { - owner = ACCESS_ONCE(sem->owner); + owner = READ_ONCE(sem->owner); if (owner && !rwsem_spin_on_owner(sem, owner)) break; @@ -432,7 +450,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) /* we're now waiting on the lock, but no longer actively locking */ if (waiting) { - count = ACCESS_ONCE(sem->count); + count = READ_ONCE(sem->count); /* * If there were already threads queued before us and there are diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index e2d3bc7f03b4..205be0ce34de 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -9,29 +9,9 @@ #include <linux/sched.h> #include <linux/export.h> #include <linux/rwsem.h> - #include <linux/atomic.h> -#ifdef CONFIG_RWSEM_SPIN_ON_OWNER -static inline void rwsem_set_owner(struct rw_semaphore *sem) -{ - sem->owner = current; -} - -static inline void rwsem_clear_owner(struct rw_semaphore *sem) -{ - sem->owner = NULL; -} - -#else -static inline void rwsem_set_owner(struct rw_semaphore *sem) -{ -} - -static inline void rwsem_clear_owner(struct rw_semaphore *sem) -{ -} -#endif +#include "rwsem.h" /* * lock for reading diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h new file mode 100644 index 000000000000..870ed9a5b426 --- /dev/null +++ b/kernel/locking/rwsem.h @@ -0,0 +1,20 @@ +#ifdef CONFIG_RWSEM_SPIN_ON_OWNER +static inline void rwsem_set_owner(struct rw_semaphore *sem) +{ + sem->owner = current; +} + +static inline void rwsem_clear_owner(struct rw_semaphore *sem) +{ + sem->owner = NULL; +} + +#else +static inline void rwsem_set_owner(struct rw_semaphore *sem) +{ +} + +static inline void rwsem_clear_owner(struct rw_semaphore *sem) +{ +} +#endif diff --git a/lib/lockref.c b/lib/lockref.c index ecb9a665ec19..494994bf17c8 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -18,7 +18,7 @@ #define CMPXCHG_LOOP(CODE, SUCCESS) do { \ struct lockref old; \ BUILD_BUG_ON(sizeof(old) != 8); \ - old.lock_count = ACCESS_ONCE(lockref->lock_count); \ + old.lock_count = READ_ONCE(lockref->lock_count); \ while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \ struct lockref new = old, prev = old; \ CODE \ |