From 53dad6d3a8e5ac1af8bacc6ac2134ae1a8b085f1 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <davidlohr@hp.com>
Date: Mon, 23 Sep 2013 17:04:45 -0700
Subject: ipc: fix race with LSMs

Currently, IPC mechanisms do security and auditing related checks under
RCU.  However, since security modules can free the security structure,
for example, through selinux_[sem,msg_queue,shm]_free_security(), we can
race if the structure is freed before other tasks are done with it,
creating a use-after-free condition.  Manfred illustrates this nicely,
for instance with shared mem and selinux:

 -> do_shmat calls rcu_read_lock()
 -> do_shmat calls shm_object_check().
     Checks that the object is still valid - but doesn't acquire any locks.
     Then it returns.
 -> do_shmat calls security_shm_shmat (e.g. selinux_shm_shmat)
 -> selinux_shm_shmat calls ipc_has_perm()
 -> ipc_has_perm accesses ipc_perms->security

shm_close()
 -> shm_close acquires rw_mutex & shm_lock
 -> shm_close calls shm_destroy
 -> shm_destroy calls security_shm_free (e.g. selinux_shm_free_security)
 -> selinux_shm_free_security calls ipc_free_security(&shp->shm_perm)
 -> ipc_free_security calls kfree(ipc_perms->security)

This patch delays the freeing of the security structures after all RCU
readers are done.  Furthermore it aligns the security life cycle with
that of the rest of IPC - freeing them based on the reference counter.
For situations where we need not free security, the current behavior is
kept.  Linus states:

 "... the old behavior was suspect for another reason too: having the
  security blob go away from under a user sounds like it could cause
  various other problems anyway, so I think the old code was at least
  _prone_ to bugs even if it didn't have catastrophic behavior."

I have tested this patch with IPC testcases from LTP on both my
quad-core laptop and on a 64 core NUMA server.  In both cases selinux is
enabled, and tests pass for both voluntary and forced preemption models.
While the mentioned races are theoretical (at least no one as reported
them), I wanted to make sure that this new logic doesn't break anything
we weren't aware of.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
Acked-by: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/msg.c  | 19 +++++++++++++------
 ipc/sem.c  | 34 ++++++++++++++++++----------------
 ipc/shm.c  | 17 ++++++++++++-----
 ipc/util.c | 32 ++++++++++++--------------------
 ipc/util.h | 10 +++++++++-
 5 files changed, 64 insertions(+), 48 deletions(-)

(limited to 'ipc')

diff --git a/ipc/msg.c b/ipc/msg.c
index b0d541d42677..9e4310c546ae 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -165,6 +165,15 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
 	ipc_rmid(&msg_ids(ns), &s->q_perm);
 }
 
+static void msg_rcu_free(struct rcu_head *head)
+{
+	struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
+	struct msg_queue *msq = ipc_rcu_to_struct(p);
+
+	security_msg_queue_free(msq);
+	ipc_rcu_free(head);
+}
+
 /**
  * newque - Create a new msg queue
  * @ns: namespace
@@ -189,15 +198,14 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 	msq->q_perm.security = NULL;
 	retval = security_msg_queue_alloc(msq);
 	if (retval) {
-		ipc_rcu_putref(msq);
+		ipc_rcu_putref(msq, ipc_rcu_free);
 		return retval;
 	}
 
 	/* ipc_addid() locks msq upon success. */
 	id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
 	if (id < 0) {
-		security_msg_queue_free(msq);
-		ipc_rcu_putref(msq);
+		ipc_rcu_putref(msq, msg_rcu_free);
 		return id;
 	}
 
@@ -276,8 +284,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 		free_msg(msg);
 	}
 	atomic_sub(msq->q_cbytes, &ns->msg_bytes);
-	security_msg_queue_free(msq);
-	ipc_rcu_putref(msq);
+	ipc_rcu_putref(msq, msg_rcu_free);
 }
 
 /*
@@ -717,7 +724,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
 		rcu_read_lock();
 		ipc_lock_object(&msq->q_perm);
 
-		ipc_rcu_putref(msq);
+		ipc_rcu_putref(msq, ipc_rcu_free);
 		if (msq->q_perm.deleted) {
 			err = -EIDRM;
 			goto out_unlock0;
diff --git a/ipc/sem.c b/ipc/sem.c
index 69b6a21f3844..19c8b980d1fe 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -243,6 +243,15 @@ static void merge_queues(struct sem_array *sma)
 	}
 }
 
+static void sem_rcu_free(struct rcu_head *head)
+{
+	struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
+	struct sem_array *sma = ipc_rcu_to_struct(p);
+
+	security_sem_free(sma);
+	ipc_rcu_free(head);
+}
+
 /*
  * If the request contains only one semaphore operation, and there are
  * no complex transactions pending, lock only the semaphore involved.
@@ -374,12 +383,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns
 static inline void sem_lock_and_putref(struct sem_array *sma)
 {
 	sem_lock(sma, NULL, -1);
-	ipc_rcu_putref(sma);
-}
-
-static inline void sem_putref(struct sem_array *sma)
-{
-	ipc_rcu_putref(sma);
+	ipc_rcu_putref(sma, ipc_rcu_free);
 }
 
 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
@@ -458,14 +462,13 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 	sma->sem_perm.security = NULL;
 	retval = security_sem_alloc(sma);
 	if (retval) {
-		ipc_rcu_putref(sma);
+		ipc_rcu_putref(sma, ipc_rcu_free);
 		return retval;
 	}
 
 	id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
 	if (id < 0) {
-		security_sem_free(sma);
-		ipc_rcu_putref(sma);
+		ipc_rcu_putref(sma, sem_rcu_free);
 		return id;
 	}
 	ns->used_sems += nsems;
@@ -1047,8 +1050,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 
 	wake_up_sem_queue_do(&tasks);
 	ns->used_sems -= sma->sem_nsems;
-	security_sem_free(sma);
-	ipc_rcu_putref(sma);
+	ipc_rcu_putref(sma, sem_rcu_free);
 }
 
 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
@@ -1292,7 +1294,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 			rcu_read_unlock();
 			sem_io = ipc_alloc(sizeof(ushort)*nsems);
 			if(sem_io == NULL) {
-				sem_putref(sma);
+				ipc_rcu_putref(sma, ipc_rcu_free);
 				return -ENOMEM;
 			}
 
@@ -1328,20 +1330,20 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 		if(nsems > SEMMSL_FAST) {
 			sem_io = ipc_alloc(sizeof(ushort)*nsems);
 			if(sem_io == NULL) {
-				sem_putref(sma);
+				ipc_rcu_putref(sma, ipc_rcu_free);
 				return -ENOMEM;
 			}
 		}
 
 		if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) {
-			sem_putref(sma);
+			ipc_rcu_putref(sma, ipc_rcu_free);
 			err = -EFAULT;
 			goto out_free;
 		}
 
 		for (i = 0; i < nsems; i++) {
 			if (sem_io[i] > SEMVMX) {
-				sem_putref(sma);
+				ipc_rcu_putref(sma, ipc_rcu_free);
 				err = -ERANGE;
 				goto out_free;
 			}
@@ -1629,7 +1631,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
 	/* step 2: allocate new undo structure */
 	new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
 	if (!new) {
-		sem_putref(sma);
+		ipc_rcu_putref(sma, ipc_rcu_free);
 		return ERR_PTR(-ENOMEM);
 	}
 
diff --git a/ipc/shm.c b/ipc/shm.c
index 2821cdf93adb..d69739610fd4 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -167,6 +167,15 @@ static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
 	ipc_lock_object(&ipcp->shm_perm);
 }
 
+static void shm_rcu_free(struct rcu_head *head)
+{
+	struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
+	struct shmid_kernel *shp = ipc_rcu_to_struct(p);
+
+	security_shm_free(shp);
+	ipc_rcu_free(head);
+}
+
 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
 {
 	ipc_rmid(&shm_ids(ns), &s->shm_perm);
@@ -208,8 +217,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
 		user_shm_unlock(file_inode(shp->shm_file)->i_size,
 						shp->mlock_user);
 	fput (shp->shm_file);
-	security_shm_free(shp);
-	ipc_rcu_putref(shp);
+	ipc_rcu_putref(shp, shm_rcu_free);
 }
 
 /*
@@ -497,7 +505,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	shp->shm_perm.security = NULL;
 	error = security_shm_alloc(shp);
 	if (error) {
-		ipc_rcu_putref(shp);
+		ipc_rcu_putref(shp, ipc_rcu_free);
 		return error;
 	}
 
@@ -566,8 +574,7 @@ no_id:
 		user_shm_unlock(size, shp->mlock_user);
 	fput(file);
 no_file:
-	security_shm_free(shp);
-	ipc_rcu_putref(shp);
+	ipc_rcu_putref(shp, shm_rcu_free);
 	return error;
 }
 
diff --git a/ipc/util.c b/ipc/util.c
index e829da9ed01f..fdb8ae740775 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -474,11 +474,6 @@ void ipc_free(void* ptr, int size)
 		kfree(ptr);
 }
 
-struct ipc_rcu {
-	struct rcu_head rcu;
-	atomic_t refcount;
-} ____cacheline_aligned_in_smp;
-
 /**
  *	ipc_rcu_alloc	-	allocate ipc and rcu space 
  *	@size: size desired
@@ -505,27 +500,24 @@ int ipc_rcu_getref(void *ptr)
 	return atomic_inc_not_zero(&p->refcount);
 }
 
-/**
- * ipc_schedule_free - free ipc + rcu space
- * @head: RCU callback structure for queued work
- */
-static void ipc_schedule_free(struct rcu_head *head)
-{
-	vfree(container_of(head, struct ipc_rcu, rcu));
-}
-
-void ipc_rcu_putref(void *ptr)
+void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head))
 {
 	struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1;
 
 	if (!atomic_dec_and_test(&p->refcount))
 		return;
 
-	if (is_vmalloc_addr(ptr)) {
-		call_rcu(&p->rcu, ipc_schedule_free);
-	} else {
-		kfree_rcu(p, rcu);
-	}
+	call_rcu(&p->rcu, func);
+}
+
+void ipc_rcu_free(struct rcu_head *head)
+{
+	struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
+
+	if (is_vmalloc_addr(p))
+		vfree(p);
+	else
+		kfree(p);
 }
 
 /**
diff --git a/ipc/util.h b/ipc/util.h
index c5f3338ba1fa..f2f5036f2eed 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -47,6 +47,13 @@ static inline void msg_exit_ns(struct ipc_namespace *ns) { }
 static inline void shm_exit_ns(struct ipc_namespace *ns) { }
 #endif
 
+struct ipc_rcu {
+	struct rcu_head rcu;
+	atomic_t refcount;
+} ____cacheline_aligned_in_smp;
+
+#define ipc_rcu_to_struct(p)  ((void *)(p+1))
+
 /*
  * Structure that holds the parameters needed by the ipc operations
  * (see after)
@@ -120,7 +127,8 @@ void ipc_free(void* ptr, int size);
  */
 void* ipc_rcu_alloc(int size);
 int ipc_rcu_getref(void *ptr);
-void ipc_rcu_putref(void *ptr);
+void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head));
+void ipc_rcu_free(struct rcu_head *head);
 
 struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
 struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id);
-- 
cgit v1.2.3


From 5e9d527591421ccdb16acb8c23662231135d8686 Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Mon, 30 Sep 2013 13:45:04 -0700
Subject: ipc/sem.c: fix race in sem_lock()

The exclusion of complex operations in sem_lock() is insufficient: after
acquiring the per-semaphore lock, a simple op must first check that
sem_perm.lock is not locked and only after that test check
complex_count.  The current code does it the other way around - and that
creates a race.  Details are below.

The patch is a complete rewrite of sem_lock(), based in part on the code
from Mike Galbraith.  It removes all gotos and all loops and thus the
risk of livelocks.

I have tested the patch (together with the next one) on my i3 laptop and
it didn't cause any problems.

The bug is probably also present in 3.10 and 3.11, but for these kernels
it might be simpler just to move the test of sma->complex_count after
the spin_is_locked() test.

Details of the bug:

Assume:
 - sma->complex_count = 0.
 - Thread 1: semtimedop(complex op that must sleep)
 - Thread 2: semtimedop(simple op).

Pseudo-Trace:

Thread 1: sem_lock(): acquire sem_perm.lock
Thread 1: sem_lock(): check for ongoing simple ops
			Nothing ongoing, thread 2 is still before sem_lock().
Thread 1: try_atomic_semop()
	<<< preempted.

Thread 2: sem_lock():
        static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
                                      int nsops)
        {
                int locknum;
         again:
                if (nsops == 1 && !sma->complex_count) {
                        struct sem *sem = sma->sem_base + sops->sem_num;

                        /* Lock just the semaphore we are interested in. */
                        spin_lock(&sem->lock);

                        /*
                         * If sma->complex_count was set while we were spinning,
                         * we may need to look at things we did not lock here.
                         */
                        if (unlikely(sma->complex_count)) {
                                spin_unlock(&sem->lock);
                                goto lock_array;
                        }
        <<<<<<<<<
	<<< complex_count is still 0.
	<<<
        <<< Here it is preempted
        <<<<<<<<<

Thread 1: try_atomic_semop() returns, notices that it must sleep.
Thread 1: increases sma->complex_count.
Thread 1: drops sem_perm.lock
Thread 2:
                /*
                 * Another process is holding the global lock on the
                 * sem_array; we cannot enter our critical section,
                 * but have to wait for the global lock to be released.
                 */
                if (unlikely(spin_is_locked(&sma->sem_perm.lock))) {
                        spin_unlock(&sem->lock);
                        spin_unlock_wait(&sma->sem_perm.lock);
                        goto again;
                }
	<<< sem_perm.lock already dropped, thus no "goto again;"

                locknum = sops->sem_num;

Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Mike Galbraith <bitbucket@online.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Davidlohr Bueso <davidlohr.bueso@hp.com>
Cc: <stable@vger.kernel.org>	[3.10+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/sem.c | 122 +++++++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 78 insertions(+), 44 deletions(-)

(limited to 'ipc')

diff --git a/ipc/sem.c b/ipc/sem.c
index 19c8b980d1fe..4a92c0447ad6 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -252,71 +252,105 @@ static void sem_rcu_free(struct rcu_head *head)
 	ipc_rcu_free(head);
 }
 
+/*
+ * Wait until all currently ongoing simple ops have completed.
+ * Caller must own sem_perm.lock.
+ * New simple ops cannot start, because simple ops first check
+ * that sem_perm.lock is free.
+ */
+static void sem_wait_array(struct sem_array *sma)
+{
+	int i;
+	struct sem *sem;
+
+	for (i = 0; i < sma->sem_nsems; i++) {
+		sem = sma->sem_base + i;
+		spin_unlock_wait(&sem->lock);
+	}
+}
+
 /*
  * If the request contains only one semaphore operation, and there are
  * no complex transactions pending, lock only the semaphore involved.
  * Otherwise, lock the entire semaphore array, since we either have
  * multiple semaphores in our own semops, or we need to look at
  * semaphores from other pending complex operations.
- *
- * Carefully guard against sma->complex_count changing between zero
- * and non-zero while we are spinning for the lock. The value of
- * sma->complex_count cannot change while we are holding the lock,
- * so sem_unlock should be fine.
- *
- * The global lock path checks that all the local locks have been released,
- * checking each local lock once. This means that the local lock paths
- * cannot start their critical sections while the global lock is held.
  */
 static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
 			      int nsops)
 {
-	int locknum;
- again:
-	if (nsops == 1 && !sma->complex_count) {
-		struct sem *sem = sma->sem_base + sops->sem_num;
+	struct sem *sem;
 
-		/* Lock just the semaphore we are interested in. */
-		spin_lock(&sem->lock);
+	if (nsops != 1) {
+		/* Complex operation - acquire a full lock */
+		ipc_lock_object(&sma->sem_perm);
 
-		/*
-		 * If sma->complex_count was set while we were spinning,
-		 * we may need to look at things we did not lock here.
+		/* And wait until all simple ops that are processed
+		 * right now have dropped their locks.
 		 */
-		if (unlikely(sma->complex_count)) {
-			spin_unlock(&sem->lock);
-			goto lock_array;
-		}
+		sem_wait_array(sma);
+		return -1;
+	}
+
+	/*
+	 * Only one semaphore affected - try to optimize locking.
+	 * The rules are:
+	 * - optimized locking is possible if no complex operation
+	 *   is either enqueued or processed right now.
+	 * - The test for enqueued complex ops is simple:
+	 *      sma->complex_count != 0
+	 * - Testing for complex ops that are processed right now is
+	 *   a bit more difficult. Complex ops acquire the full lock
+	 *   and first wait that the running simple ops have completed.
+	 *   (see above)
+	 *   Thus: If we own a simple lock and the global lock is free
+	 *	and complex_count is now 0, then it will stay 0 and
+	 *	thus just locking sem->lock is sufficient.
+	 */
+	sem = sma->sem_base + sops->sem_num;
 
+	if (sma->complex_count == 0) {
 		/*
-		 * Another process is holding the global lock on the
-		 * sem_array; we cannot enter our critical section,
-		 * but have to wait for the global lock to be released.
+		 * It appears that no complex operation is around.
+		 * Acquire the per-semaphore lock.
 		 */
-		if (unlikely(spin_is_locked(&sma->sem_perm.lock))) {
-			spin_unlock(&sem->lock);
-			spin_unlock_wait(&sma->sem_perm.lock);
-			goto again;
+		spin_lock(&sem->lock);
+
+		/* Then check that the global lock is free */
+		if (!spin_is_locked(&sma->sem_perm.lock)) {
+			/* spin_is_locked() is not a memory barrier */
+			smp_mb();
+
+			/* Now repeat the test of complex_count:
+			 * It can't change anymore until we drop sem->lock.
+			 * Thus: if is now 0, then it will stay 0.
+			 */
+			if (sma->complex_count == 0) {
+				/* fast path successful! */
+				return sops->sem_num;
+			}
 		}
+		spin_unlock(&sem->lock);
+	}
 
-		locknum = sops->sem_num;
+	/* slow path: acquire the full lock */
+	ipc_lock_object(&sma->sem_perm);
+
+	if (sma->complex_count == 0) {
+		/* False alarm:
+		 * There is no complex operation, thus we can switch
+		 * back to the fast path.
+		 */
+		spin_lock(&sem->lock);
+		ipc_unlock_object(&sma->sem_perm);
+		return sops->sem_num;
 	} else {
-		int i;
-		/*
-		 * Lock the semaphore array, and wait for all of the
-		 * individual semaphore locks to go away.  The code
-		 * above ensures no new single-lock holders will enter
-		 * their critical section while the array lock is held.
+		/* Not a false alarm, thus complete the sequence for a
+		 * full lock.
 		 */
- lock_array:
-		ipc_lock_object(&sma->sem_perm);
-		for (i = 0; i < sma->sem_nsems; i++) {
-			struct sem *sem = sma->sem_base + i;
-			spin_unlock_wait(&sem->lock);
-		}
-		locknum = -1;
+		sem_wait_array(sma);
+		return -1;
 	}
-	return locknum;
 }
 
 static inline void sem_unlock(struct sem_array *sma, int locknum)
-- 
cgit v1.2.3


From 6d07b68ce16ae9535955ba2059dedba5309c3ca1 Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Mon, 30 Sep 2013 13:45:06 -0700
Subject: ipc/sem.c: optimize sem_lock()

Operations that need access to the whole array must guarantee that there
are no simple operations ongoing.  Right now this is achieved by
spin_unlock_wait(sem->lock) on all semaphores.

If complex_count is nonzero, then this spin_unlock_wait() is not
necessary, because it was already performed in the past by the thread
that increased complex_count and even though sem_perm.lock was dropped
inbetween, no simple operation could have started, because simple
operations cannot start when complex_count is non-zero.

Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Mike Galbraith <bitbucket@online.de>
Cc: Rik van Riel <riel@redhat.com>
Reviewed-by: Davidlohr Bueso <davidlohr@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/sem.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'ipc')

diff --git a/ipc/sem.c b/ipc/sem.c
index 4a92c0447ad6..e20658d76bb5 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -257,12 +257,20 @@ static void sem_rcu_free(struct rcu_head *head)
  * Caller must own sem_perm.lock.
  * New simple ops cannot start, because simple ops first check
  * that sem_perm.lock is free.
+ * that a) sem_perm.lock is free and b) complex_count is 0.
  */
 static void sem_wait_array(struct sem_array *sma)
 {
 	int i;
 	struct sem *sem;
 
+	if (sma->complex_count)  {
+		/* The thread that increased sma->complex_count waited on
+		 * all sem->lock locks. Thus we don't need to wait again.
+		 */
+		return;
+	}
+
 	for (i = 0; i < sma->sem_nsems; i++) {
 		sem = sma->sem_base + i;
 		spin_unlock_wait(&sem->lock);
-- 
cgit v1.2.3


From d8c633766ad88527f25d9f81a5c2f083d78a2b39 Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Mon, 30 Sep 2013 13:45:07 -0700
Subject: ipc/sem.c: synchronize the proc interface

The proc interface is not aware of sem_lock(), it instead calls
ipc_lock_object() directly.  This means that simple semop() operations
can run in parallel with the proc interface.  Right now, this is
uncritical, because the implementation doesn't do anything that requires
a proper synchronization.

But it is dangerous and therefore should be fixed.

Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Davidlohr Bueso <davidlohr.bueso@hp.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/sem.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'ipc')

diff --git a/ipc/sem.c b/ipc/sem.c
index e20658d76bb5..cd6a733011a2 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -2103,6 +2103,14 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
 	struct sem_array *sma = it;
 	time_t sem_otime;
 
+	/*
+	 * The proc interface isn't aware of sem_lock(), it calls
+	 * ipc_lock_object() directly (in sysvipc_find_ipc).
+	 * In order to stay compatible with sem_lock(), we must wait until
+	 * all simple semop() calls have left their critical regions.
+	 */
+	sem_wait_array(sma);
+
 	sem_otime = get_semotime(sma);
 
 	return seq_printf(s,
-- 
cgit v1.2.3


From 0e8c665699e953fa58dc1b0b0d09e5dce7343cc7 Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Mon, 30 Sep 2013 13:45:25 -0700
Subject: ipc/sem.c: update sem_otime for all operations

In commit 0a2b9d4c7967 ("ipc/sem.c: move wake_up_process out of the
spinlock section"), the update of semaphore's sem_otime(last semop time)
was moved to one central position (do_smart_update).

But since do_smart_update() is only called for operations that modify
the array, this means that wait-for-zero semops do not update sem_otime
anymore.

The fix is simple:
Non-alter operations must update sem_otime.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Reported-by: Jia He <jiakernel@gmail.com>
Tested-by: Jia He <jiakernel@gmail.com>
Cc: Davidlohr Bueso <davidlohr.bueso@hp.com>
Cc: Mike Galbraith <efault@gmx.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/sem.c | 42 +++++++++++++++++++++++++++++-------------
 1 file changed, 29 insertions(+), 13 deletions(-)

(limited to 'ipc')

diff --git a/ipc/sem.c b/ipc/sem.c
index cd6a733011a2..8c4f59b0204a 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -917,6 +917,24 @@ again:
 	return semop_completed;
 }
 
+/**
+ * set_semotime(sma, sops) - set sem_otime
+ * @sma: semaphore array
+ * @sops: operations that modified the array, may be NULL
+ *
+ * sem_otime is replicated to avoid cache line trashing.
+ * This function sets one instance to the current time.
+ */
+static void set_semotime(struct sem_array *sma, struct sembuf *sops)
+{
+	if (sops == NULL) {
+		sma->sem_base[0].sem_otime = get_seconds();
+	} else {
+		sma->sem_base[sops[0].sem_num].sem_otime =
+							get_seconds();
+	}
+}
+
 /**
  * do_smart_update(sma, sops, nsops, otime, pt) - optimized update_queue
  * @sma: semaphore array
@@ -967,17 +985,10 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop
 			}
 		}
 	}
-	if (otime) {
-		if (sops == NULL) {
-			sma->sem_base[0].sem_otime = get_seconds();
-		} else {
-			sma->sem_base[sops[0].sem_num].sem_otime =
-								get_seconds();
-		}
-	}
+	if (otime)
+		set_semotime(sma, sops);
 }
 
-
 /* The following counts are associated to each semaphore:
  *   semncnt        number of tasks waiting on semval being nonzero
  *   semzcnt        number of tasks waiting on semval being zero
@@ -1839,12 +1850,17 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 
 	error = perform_atomic_semop(sma, sops, nsops, un,
 					task_tgid_vnr(current));
-	if (error <= 0) {
-		if (alter && error == 0)
+	if (error == 0) {
+		/* If the operation was successful, then do
+		 * the required updates.
+		 */
+		if (alter)
 			do_smart_update(sma, sops, nsops, 1, &tasks);
-
-		goto out_unlock_free;
+		else
+			set_semotime(sma, sops);
 	}
+	if (error <= 0)
+		goto out_unlock_free;
 
 	/* We need to sleep on this operation, so we put the current
 	 * task into the pending queue and go to sleep.
-- 
cgit v1.2.3


From 4271b05a227dc6175b66c3d9941aeab09048aeb2 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <davidlohr@hp.com>
Date: Mon, 30 Sep 2013 13:45:26 -0700
Subject: ipc,msg: prevent race with rmid in msgsnd,msgrcv

This fixes a race in both msgrcv() and msgsnd() between finding the msg
and actually dealing with the queue, as another thread can delete shmid
underneath us if we are preempted before acquiring the
kern_ipc_perm.lock.

Manfred illustrates this nicely:

Assume a preemptible kernel that is preempted just after

    msq = msq_obtain_object_check(ns, msqid)

in do_msgrcv().  The only lock that is held is rcu_read_lock().

Now the other thread processes IPC_RMID.  When the first task is
resumed, then it will happily wait for messages on a deleted queue.

Fix this by checking for if the queue has been deleted after taking the
lock.

Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
Reported-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: <stable@vger.kernel.org> 	[3.11]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/msg.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'ipc')

diff --git a/ipc/msg.c b/ipc/msg.c
index 9e4310c546ae..558aa91186b6 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -695,6 +695,12 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
 		if (ipcperms(ns, &msq->q_perm, S_IWUGO))
 			goto out_unlock0;
 
+		/* raced with RMID? */
+		if (msq->q_perm.deleted) {
+			err = -EIDRM;
+			goto out_unlock0;
+		}
+
 		err = security_msg_queue_msgsnd(msq, msg, msgflg);
 		if (err)
 			goto out_unlock0;
@@ -901,6 +907,13 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl
 			goto out_unlock1;
 
 		ipc_lock_object(&msq->q_perm);
+
+		/* raced with RMID? */
+		if (msq->q_perm.deleted) {
+			msg = ERR_PTR(-EIDRM);
+			goto out_unlock0;
+		}
+
 		msg = find_msg(msq, &msgtyp, mode);
 		if (!IS_ERR(msg)) {
 			/*
-- 
cgit v1.2.3