diff options
author | Manfred Spraul <manfred@colorfullife.com> | 2016-10-11 13:54:50 -0700 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2016-10-28 03:01:32 -0400 |
commit | f6031d95320dc2930f1b813e98533a01c92f3dc0 (patch) | |
tree | 68741eceb18f0e17330775b423851b2f0239e217 /ipc | |
parent | b52b7b5a5cafe1b415291b971259cee319345d18 (diff) |
ipc/sem.c: fix complex_count vs. simple op race
commit 5864a2fd3088db73d47942370d0f7210a807b9bc upstream.
Commit 6d07b68ce16a ("ipc/sem.c: optimize sem_lock()") introduced a
race:
sem_lock has a fast path that allows parallel simple operations.
There are two reasons why a simple operation cannot run in parallel:
- a non-simple operations is ongoing (sma->sem_perm.lock held)
- a complex operation is sleeping (sma->complex_count != 0)
As both facts are stored independently, a thread can bypass the current
checks by sleeping in the right positions. See below for more details
(or kernel bugzilla 105651).
The patch fixes that by creating one variable (complex_mode)
that tracks both reasons why parallel operations are not possible.
The patch also updates stale documentation regarding the locking.
With regards to stable kernels:
The patch is required for all kernels that include the
commit 6d07b68ce16a ("ipc/sem.c: optimize sem_lock()") (3.10?)
The alternative is to revert the patch that introduced the race.
The patch is safe for backporting, i.e. it makes no assumptions
about memory barriers in spin_unlock_wait().
Background:
Here is the race of the current implementation:
Thread A: (simple op)
- does the first "sma->complex_count == 0" test
Thread B: (complex op)
- does sem_lock(): This includes an array scan. But the scan can't
find Thread A, because Thread A does not own sem->lock yet.
- the thread does the operation, increases complex_count,
drops sem_lock, sleeps
Thread A:
- spin_lock(&sem->lock), spin_is_locked(sma->sem_perm.lock)
- sleeps before the complex_count test
Thread C: (complex op)
- does sem_lock (no array scan, complex_count==1)
- wakes up Thread B.
- decrements complex_count
Thread A:
- does the complex_count test
Bug:
Now both thread A and thread C operate on the same array, without
any synchronization.
Fixes: 6d07b68ce16a ("ipc/sem.c: optimize sem_lock()")
Link: http://lkml.kernel.org/r/1469123695-5661-1-git-send-email-manfred@colorfullife.com
Reported-by: <felixh@informatik.uni-bremen.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: <1vier1@web.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'ipc')
-rw-r--r-- | ipc/sem.c | 130 |
1 files changed, 75 insertions, 55 deletions
diff --git a/ipc/sem.c b/ipc/sem.c index 20d07008ad5e..9862c3d1c26d 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -155,14 +155,21 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it); /* * Locking: + * a) global sem_lock() for read/write * sem_undo.id_next, * sem_array.complex_count, - * sem_array.pending{_alter,_cont}, - * sem_array.sem_undo: global sem_lock() for read/write - * sem_undo.proc_next: only "current" is allowed to read/write that field. + * sem_array.complex_mode + * sem_array.pending{_alter,_const}, + * sem_array.sem_undo * + * b) global or semaphore sem_lock() for read/write: * sem_array.sem_base[i].pending_{const,alter}: - * global or semaphore sem_lock() for read/write + * sem_array.complex_mode (for read) + * + * c) special: + * sem_undo_list.list_proc: + * * undo_list->lock for write + * * rcu for read */ #define sc_semmsl sem_ctls[0] @@ -263,24 +270,25 @@ static void sem_rcu_free(struct rcu_head *head) #define ipc_smp_acquire__after_spin_is_unlocked() smp_rmb() /* - * Wait until all currently ongoing simple ops have completed. + * Enter the mode suitable for non-simple operations: * Caller must own sem_perm.lock. - * New simple ops cannot start, because simple ops first check - * that sem_perm.lock is free. - * that a) sem_perm.lock is free and b) complex_count is 0. */ -static void sem_wait_array(struct sem_array *sma) +static void complexmode_enter(struct sem_array *sma) { int i; struct sem *sem; - if (sma->complex_count) { - /* The thread that increased sma->complex_count waited on - * all sem->lock locks. Thus we don't need to wait again. - */ + if (sma->complex_mode) { + /* We are already in complex_mode. Nothing to do */ return; } + /* We need a full barrier after seting complex_mode: + * The write to complex_mode must be visible + * before we read the first sem->lock spinlock state. + */ + smp_store_mb(sma->complex_mode, true); + for (i = 0; i < sma->sem_nsems; i++) { sem = sma->sem_base + i; spin_unlock_wait(&sem->lock); @@ -289,6 +297,28 @@ static void sem_wait_array(struct sem_array *sma) } /* + * Try to leave the mode that disallows simple operations: + * Caller must own sem_perm.lock. + */ +static void complexmode_tryleave(struct sem_array *sma) +{ + if (sma->complex_count) { + /* Complex ops are sleeping. + * We must stay in complex mode + */ + return; + } + /* + * Immediately after setting complex_mode to false, + * a simple op can start. Thus: all memory writes + * performed by the current operation must be visible + * before we set complex_mode to false. + */ + smp_store_release(&sma->complex_mode, false); +} + +#define SEM_GLOBAL_LOCK (-1) +/* * If the request contains only one semaphore operation, and there are * no complex transactions pending, lock only the semaphore involved. * Otherwise, lock the entire semaphore array, since we either have @@ -304,56 +334,42 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, /* Complex operation - acquire a full lock */ ipc_lock_object(&sma->sem_perm); - /* And wait until all simple ops that are processed - * right now have dropped their locks. - */ - sem_wait_array(sma); - return -1; + /* Prevent parallel simple ops */ + complexmode_enter(sma); + return SEM_GLOBAL_LOCK; } /* * Only one semaphore affected - try to optimize locking. - * The rules are: - * - optimized locking is possible if no complex operation - * is either enqueued or processed right now. - * - The test for enqueued complex ops is simple: - * sma->complex_count != 0 - * - Testing for complex ops that are processed right now is - * a bit more difficult. Complex ops acquire the full lock - * and first wait that the running simple ops have completed. - * (see above) - * Thus: If we own a simple lock and the global lock is free - * and complex_count is now 0, then it will stay 0 and - * thus just locking sem->lock is sufficient. + * Optimized locking is possible if no complex operation + * is either enqueued or processed right now. + * + * Both facts are tracked by complex_mode. */ sem = sma->sem_base + sops->sem_num; - if (sma->complex_count == 0) { + /* + * Initial check for complex_mode. Just an optimization, + * no locking, no memory barrier. + */ + if (!sma->complex_mode) { /* * It appears that no complex operation is around. * Acquire the per-semaphore lock. */ spin_lock(&sem->lock); - /* Then check that the global lock is free */ - if (!spin_is_locked(&sma->sem_perm.lock)) { - /* - * We need a memory barrier with acquire semantics, - * otherwise we can race with another thread that does: - * complex_count++; - * spin_unlock(sem_perm.lock); - */ - ipc_smp_acquire__after_spin_is_unlocked(); + /* + * See 51d7d5205d33 + * ("powerpc: Add smp_mb() to arch_spin_is_locked()"): + * A full barrier is required: the write of sem->lock + * must be visible before the read is executed + */ + smp_mb(); - /* - * Now repeat the test of complex_count: - * It can't change anymore until we drop sem->lock. - * Thus: if is now 0, then it will stay 0. - */ - if (sma->complex_count == 0) { - /* fast path successful! */ - return sops->sem_num; - } + if (!smp_load_acquire(&sma->complex_mode)) { + /* fast path successful! */ + return sops->sem_num; } spin_unlock(&sem->lock); } @@ -373,15 +389,16 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, /* Not a false alarm, thus complete the sequence for a * full lock. */ - sem_wait_array(sma); - return -1; + complexmode_enter(sma); + return SEM_GLOBAL_LOCK; } } static inline void sem_unlock(struct sem_array *sma, int locknum) { - if (locknum == -1) { + if (locknum == SEM_GLOBAL_LOCK) { unmerge_queues(sma); + complexmode_tryleave(sma); ipc_unlock_object(&sma->sem_perm); } else { struct sem *sem = sma->sem_base + locknum; @@ -533,6 +550,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) } sma->complex_count = 0; + sma->complex_mode = true; /* dropped by sem_unlock below */ INIT_LIST_HEAD(&sma->pending_alter); INIT_LIST_HEAD(&sma->pending_const); INIT_LIST_HEAD(&sma->list_id); @@ -2186,10 +2204,10 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it) /* * The proc interface isn't aware of sem_lock(), it calls * ipc_lock_object() directly (in sysvipc_find_ipc). - * In order to stay compatible with sem_lock(), we must wait until - * all simple semop() calls have left their critical regions. + * In order to stay compatible with sem_lock(), we must + * enter / leave complex_mode. */ - sem_wait_array(sma); + complexmode_enter(sma); sem_otime = get_semotime(sma); @@ -2206,6 +2224,8 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it) sem_otime, sma->sem_ctime); + complexmode_tryleave(sma); + return 0; } #endif |