diff options
Diffstat (limited to 'kernel/mutex.c')
-rw-r--r-- | kernel/mutex.c | 151 |
1 files changed, 147 insertions, 4 deletions
diff --git a/kernel/mutex.c b/kernel/mutex.c index 52f23011b6e0..ad53a664f113 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -37,6 +37,12 @@ # include <asm/mutex.h> #endif +/* + * A negative mutex count indicates that waiters are sleeping waiting for the + * mutex. + */ +#define MUTEX_SHOW_NO_WAITER(mutex) (atomic_read(&(mutex)->count) >= 0) + void __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) { @@ -44,6 +50,9 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) spin_lock_init(&lock->wait_lock); INIT_LIST_HEAD(&lock->wait_list); mutex_clear_owner(lock); +#ifdef CONFIG_MUTEX_SPIN_ON_OWNER + lock->spin_mlock = NULL; +#endif debug_mutex_init(lock, name, key); } @@ -95,6 +104,124 @@ void __sched mutex_lock(struct mutex *lock) EXPORT_SYMBOL(mutex_lock); #endif +#ifdef CONFIG_MUTEX_SPIN_ON_OWNER +/* + * In order to avoid a stampede of mutex spinners from acquiring the mutex + * more or less simultaneously, the spinners need to acquire a MCS lock + * first before spinning on the owner field. + * + * We don't inline mspin_lock() so that perf can correctly account for the + * time spent in this lock function. + */ +struct mspin_node { + struct mspin_node *next ; + int locked; /* 1 if lock acquired */ +}; +#define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock)) + +static noinline +void mspin_lock(struct mspin_node **lock, struct mspin_node *node) +{ + struct mspin_node *prev; + + /* Init node */ + node->locked = 0; + node->next = NULL; + + prev = xchg(lock, node); + if (likely(prev == NULL)) { + /* Lock acquired */ + node->locked = 1; + return; + } + ACCESS_ONCE(prev->next) = node; + smp_wmb(); + /* Wait until the lock holder passes the lock down */ + while (!ACCESS_ONCE(node->locked)) + arch_mutex_cpu_relax(); +} + +static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node) +{ + struct mspin_node *next = ACCESS_ONCE(node->next); + + if (likely(!next)) { + /* + * Release the lock by setting it to NULL + */ + if (cmpxchg(lock, node, NULL) == node) + return; + /* Wait until the next pointer is set */ + while (!(next = ACCESS_ONCE(node->next))) + arch_mutex_cpu_relax(); + } + ACCESS_ONCE(next->locked) = 1; + smp_wmb(); +} + +/* + * Mutex spinning code migrated from kernel/sched/core.c + */ + +static inline bool owner_running(struct mutex *lock, struct task_struct *owner) +{ + if (lock->owner != owner) + return false; + + /* + * Ensure we emit the owner->on_cpu, dereference _after_ checking + * lock->owner still matches owner, if that fails, owner might + * point to free()d memory, if it still matches, the rcu_read_lock() + * ensures the memory stays valid. + */ + barrier(); + + return owner->on_cpu; +} + +/* + * Look out! "owner" is an entirely speculative pointer + * access and not reliable. + */ +static noinline +int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) +{ + rcu_read_lock(); + while (owner_running(lock, owner)) { + if (need_resched()) + break; + + arch_mutex_cpu_relax(); + } + rcu_read_unlock(); + + /* + * We break out the loop above on need_resched() and when the + * owner changed, which is a sign for heavy contention. Return + * success only when lock->owner is NULL. + */ + return lock->owner == NULL; +} + +/* + * Initial check for entering the mutex spinning loop + */ +static inline int mutex_can_spin_on_owner(struct mutex *lock) +{ + int retval = 1; + + rcu_read_lock(); + if (lock->owner) + retval = lock->owner->on_cpu; + rcu_read_unlock(); + /* + * if lock->owner is not set, the mutex owner may have just acquired + * it and not set the owner yet or the mutex has been released. + */ + return retval; +} +#endif + static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); /** @@ -158,25 +285,39 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * * We can't do this for DEBUG_MUTEXES because that relies on wait_lock * to serialize everything. + * + * The mutex spinners are queued up using MCS lock so that only one + * spinner can compete for the mutex. However, if mutex spinning isn't + * going to happen, there is no point in going through the lock/unlock + * overhead. */ + if (!mutex_can_spin_on_owner(lock)) + goto slowpath; for (;;) { struct task_struct *owner; + struct mspin_node node; /* * If there's an owner, wait for it to either * release the lock or go to sleep. */ + mspin_lock(MLOCK(lock), &node); owner = ACCESS_ONCE(lock->owner); - if (owner && !mutex_spin_on_owner(lock, owner)) + if (owner && !mutex_spin_on_owner(lock, owner)) { + mspin_unlock(MLOCK(lock), &node); break; + } - if (atomic_cmpxchg(&lock->count, 1, 0) == 1) { + if ((atomic_read(&lock->count) == 1) && + (atomic_cmpxchg(&lock->count, 1, 0) == 1)) { lock_acquired(&lock->dep_map, ip); mutex_set_owner(lock); + mspin_unlock(MLOCK(lock), &node); preempt_enable(); return 0; } + mspin_unlock(MLOCK(lock), &node); /* * When there's no owner, we might have preempted between the @@ -195,6 +336,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, */ arch_mutex_cpu_relax(); } +slowpath: #endif spin_lock_mutex(&lock->wait_lock, flags); @@ -205,7 +347,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, list_add_tail(&waiter.list, &lock->wait_list); waiter.task = task; - if (atomic_xchg(&lock->count, -1) == 1) + if (MUTEX_SHOW_NO_WAITER(lock) && (atomic_xchg(&lock->count, -1) == 1)) goto done; lock_contended(&lock->dep_map, ip); @@ -220,7 +362,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * that when we release the lock, we properly wake up the * other waiters: */ - if (atomic_xchg(&lock->count, -1) == 1) + if (MUTEX_SHOW_NO_WAITER(lock) && + (atomic_xchg(&lock->count, -1) == 1)) break; /* |