diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-01 08:17:51 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-01 08:17:51 -0700 |
| commit | 823e75f723aa3fefd5d2eecbf8636184ca4790fc (patch) | |
| tree | 3a481d7077a27ba86006f431697b5cac2e59ab58 /ipc/sem.c | |
| parent | 149b306089b88e186942a8d6647028ae6683aaf9 (diff) | |
| parent | d69f3bad4675ac519d41ca2b11e1c00ca115cecd (diff) | |
Merge branch 'ipc-scalability'
Merge IPC cleanup and scalability patches from Andrew Morton.
This cleans up many of the oddities in the IPC code, uses the list
iterator helpers, splits out locking and adds per-semaphore locks for
greater scalability of the IPC semaphore code.
Most normal user-level locking by now uses futexes (ie pthreads, but
also a lot of specialized locks), but SysV IPC semaphores are apparently
still used in some big applications, either for portability reasons, or
because they offer tracking and undo (and you don't need to have a
special shared memory area for them).
Our IPC semaphore scalability was pitiful. We used to lock much too big
ranges, and we used to have a single ipc lock per ipc semaphore array.
Most loads never cared, but some do. There are some numbers in the
individual commits.
* ipc-scalability:
ipc: sysv shared memory limited to 8TiB
ipc/msg.c: use list_for_each_entry_[safe] for list traversing
ipc,sem: fine grained locking for semtimedop
ipc,sem: have only one list in struct sem_queue
ipc,sem: open code and rename sem_lock
ipc,sem: do not hold ipc lock more than necessary
ipc: introduce lockless pre_down ipcctl
ipc: introduce obtaining a lockless ipc object
ipc: remove bogus lock comment for ipc_checkid
ipc/msgutil.c: use linux/uaccess.h
ipc: refactor msg list search into separate function
ipc: simplify msg list search
ipc: implement MSG_COPY as a new receive mode
ipc: remove msg handling from queue scan
ipc: set EFAULT as default error in load_msg()
ipc: tighten msg copy loops
ipc: separate msg allocation from userspace copy
ipc: clamp with min()
Diffstat (limited to 'ipc/sem.c')
| -rw-r--r-- | ipc/sem.c | 474 |
1 files changed, 315 insertions, 159 deletions
diff --git a/ipc/sem.c b/ipc/sem.c index 5b167d00efa6..e78ee3186d1f 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -94,12 +94,12 @@ struct sem { int semval; /* current value */ int sempid; /* pid of last operation */ + spinlock_t lock; /* spinlock for fine-grained semtimedop */ struct list_head sem_pending; /* pending single-sop operations */ }; /* One queue for each sleeping process in the system. */ struct sem_queue { - struct list_head simple_list; /* queue of pending operations */ struct list_head list; /* queue of pending operations */ struct task_struct *sleeper; /* this process */ struct sem_undo *undo; /* undo structure */ @@ -138,7 +138,6 @@ struct sem_undo_list { #define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS]) -#define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm) #define sem_checkid(sma, semid) ipc_checkid(&sma->sem_perm, semid) static int newary(struct ipc_namespace *, struct ipc_params *); @@ -191,47 +190,164 @@ void __init sem_init (void) } /* + * If the request contains only one semaphore operation, and there are + * no complex transactions pending, lock only the semaphore involved. + * Otherwise, lock the entire semaphore array, since we either have + * multiple semaphores in our own semops, or we need to look at + * semaphores from other pending complex operations. + * + * Carefully guard against sma->complex_count changing between zero + * and non-zero while we are spinning for the lock. The value of + * sma->complex_count cannot change while we are holding the lock, + * so sem_unlock should be fine. + * + * The global lock path checks that all the local locks have been released, + * checking each local lock once. This means that the local lock paths + * cannot start their critical sections while the global lock is held. + */ +static inline int sem_lock(struct sem_array *sma, struct sembuf *sops, + int nsops) +{ + int locknum; + again: + if (nsops == 1 && !sma->complex_count) { + struct sem *sem = sma->sem_base + sops->sem_num; + + /* Lock just the semaphore we are interested in. */ + spin_lock(&sem->lock); + + /* + * If sma->complex_count was set while we were spinning, + * we may need to look at things we did not lock here. + */ + if (unlikely(sma->complex_count)) { + spin_unlock(&sem->lock); + goto lock_array; + } + + /* + * Another process is holding the global lock on the + * sem_array; we cannot enter our critical section, + * but have to wait for the global lock to be released. + */ + if (unlikely(spin_is_locked(&sma->sem_perm.lock))) { + spin_unlock(&sem->lock); + spin_unlock_wait(&sma->sem_perm.lock); + goto again; + } + + locknum = sops->sem_num; + } else { + int i; + /* + * Lock the semaphore array, and wait for all of the + * individual semaphore locks to go away. The code + * above ensures no new single-lock holders will enter + * their critical section while the array lock is held. + */ + lock_array: + spin_lock(&sma->sem_perm.lock); + for (i = 0; i < sma->sem_nsems; i++) { + struct sem *sem = sma->sem_base + i; + spin_unlock_wait(&sem->lock); + } + locknum = -1; + } + return locknum; +} + +static inline void sem_unlock(struct sem_array *sma, int locknum) +{ + if (locknum == -1) { + spin_unlock(&sma->sem_perm.lock); + } else { + struct sem *sem = sma->sem_base + locknum; + spin_unlock(&sem->lock); + } + rcu_read_unlock(); +} + +/* * sem_lock_(check_) routines are called in the paths where the rw_mutex * is not held. */ -static inline struct sem_array *sem_lock(struct ipc_namespace *ns, int id) +static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns, + int id, struct sembuf *sops, int nsops, int *locknum) +{ + struct kern_ipc_perm *ipcp; + struct sem_array *sma; + + rcu_read_lock(); + ipcp = ipc_obtain_object(&sem_ids(ns), id); + if (IS_ERR(ipcp)) { + sma = ERR_CAST(ipcp); + goto err; + } + + sma = container_of(ipcp, struct sem_array, sem_perm); + *locknum = sem_lock(sma, sops, nsops); + + /* ipc_rmid() may have already freed the ID while sem_lock + * was spinning: verify that the structure is still valid + */ + if (!ipcp->deleted) + return container_of(ipcp, struct sem_array, sem_perm); + + sem_unlock(sma, *locknum); + sma = ERR_PTR(-EINVAL); +err: + rcu_read_unlock(); + return sma; +} + +static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id) { - struct kern_ipc_perm *ipcp = ipc_lock(&sem_ids(ns), id); + struct kern_ipc_perm *ipcp = ipc_obtain_object(&sem_ids(ns), id); if (IS_ERR(ipcp)) - return (struct sem_array *)ipcp; + return ERR_CAST(ipcp); return container_of(ipcp, struct sem_array, sem_perm); } -static inline struct sem_array *sem_lock_check(struct ipc_namespace *ns, - int id) +static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns, + int id) { - struct kern_ipc_perm *ipcp = ipc_lock_check(&sem_ids(ns), id); + struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id); if (IS_ERR(ipcp)) - return (struct sem_array *)ipcp; + return ERR_CAST(ipcp); return container_of(ipcp, struct sem_array, sem_perm); } static inline void sem_lock_and_putref(struct sem_array *sma) { - ipc_lock_by_ptr(&sma->sem_perm); + rcu_read_lock(); + sem_lock(sma, NULL, -1); ipc_rcu_putref(sma); } static inline void sem_getref_and_unlock(struct sem_array *sma) { - ipc_rcu_getref(sma); - ipc_unlock(&(sma)->sem_perm); + WARN_ON_ONCE(!ipc_rcu_getref(sma)); + sem_unlock(sma, -1); } static inline void sem_putref(struct sem_array *sma) { - ipc_lock_by_ptr(&sma->sem_perm); - ipc_rcu_putref(sma); - ipc_unlock(&(sma)->sem_perm); + sem_lock_and_putref(sma); + sem_unlock(sma, -1); +} + +/* + * Call inside the rcu read section. + */ +static inline void sem_getref(struct sem_array *sma) +{ + sem_lock(sma, NULL, -1); + WARN_ON_ONCE(!ipc_rcu_getref(sma)); + sem_unlock(sma, -1); } static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) @@ -324,15 +440,17 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) sma->sem_base = (struct sem *) &sma[1]; - for (i = 0; i < nsems; i++) + for (i = 0; i < nsems; i++) { INIT_LIST_HEAD(&sma->sem_base[i].sem_pending); + spin_lock_init(&sma->sem_base[i].lock); + } sma->complex_count = 0; INIT_LIST_HEAD(&sma->sem_pending); INIT_LIST_HEAD(&sma->list_id); sma->sem_nsems = nsems; sma->sem_ctime = get_seconds(); - sem_unlock(sma); + sem_unlock(sma, -1); return sma->sem_perm.id; } @@ -471,7 +589,7 @@ static void wake_up_sem_queue_prepare(struct list_head *pt, q->status = IN_WAKEUP; q->pid = error; - list_add_tail(&q->simple_list, pt); + list_add_tail(&q->list, pt); } /** @@ -489,7 +607,7 @@ static void wake_up_sem_queue_do(struct list_head *pt) int did_something; did_something = !list_empty(pt); - list_for_each_entry_safe(q, t, pt, simple_list) { + list_for_each_entry_safe(q, t, pt, list) { wake_up_process(q->sleeper); /* q can disappear immediately after writing q->status. */ smp_wmb(); @@ -502,9 +620,7 @@ static void wake_up_sem_queue_do(struct list_head *pt) static void unlink_queue(struct sem_array *sma, struct sem_queue *q) { list_del(&q->list); - if (q->nsops == 1) - list_del(&q->simple_list); - else + if (q->nsops > 1) sma->complex_count--; } @@ -557,9 +673,9 @@ static int check_restart(struct sem_array *sma, struct sem_queue *q) } /* * semval is 0. Check if there are wait-for-zero semops. - * They must be the first entries in the per-semaphore simple queue + * They must be the first entries in the per-semaphore queue */ - h = list_first_entry(&curr->sem_pending, struct sem_queue, simple_list); + h = list_first_entry(&curr->sem_pending, struct sem_queue, list); BUG_ON(h->nsops != 1); BUG_ON(h->sops[0].sem_num != q->sops[0].sem_num); @@ -579,8 +695,9 @@ static int check_restart(struct sem_array *sma, struct sem_queue *q) * @pt: list head for the tasks that must be woken up. * * update_queue must be called after a semaphore in a semaphore array - * was modified. If multiple semaphore were modified, then @semnum - * must be set to -1. + * was modified. If multiple semaphores were modified, update_queue must + * be called with semnum = -1, as well as with the number of each modified + * semaphore. * The tasks that must be woken up are added to @pt. The return code * is stored in q->pid. * The function return 1 if at least one semop was completed successfully. @@ -590,30 +707,19 @@ static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt) struct sem_queue *q; struct list_head *walk; struct list_head *pending_list; - int offset; int semop_completed = 0; - /* if there are complex operations around, then knowing the semaphore - * that was modified doesn't help us. Assume that multiple semaphores - * were modified. - */ - if (sma->complex_count) - semnum = -1; - - if (semnum == -1) { + if (semnum == -1) pending_list = &sma->sem_pending; - offset = offsetof(struct sem_queue, list); - } else { + else pending_list = &sma->sem_base[semnum].sem_pending; - offset = offsetof(struct sem_queue, simple_list); - } again: walk = pending_list->next; while (walk != pending_list) { int error, restart; - q = (struct sem_queue *)((char *)walk - offset); + q = container_of(walk, struct sem_queue, list); walk = walk->next; /* If we are scanning the single sop, per-semaphore list of @@ -672,9 +778,18 @@ static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsop if (sma->complex_count || sops == NULL) { if (update_queue(sma, -1, pt)) otime = 1; + } + + if (!sops) { + /* No semops; something special is going on. */ + for (i = 0; i < sma->sem_nsems; i++) { + if (update_queue(sma, i, pt)) + otime = 1; + } goto done; } + /* Check the semaphores that were modified. */ for (i = 0; i < nsops; i++) { if (sops[i].sem_op > 0 || (sops[i].sem_op < 0 && @@ -745,6 +860,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) struct sem_queue *q, *tq; struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); struct list_head tasks; + int i; /* Free the existing undo structures for this semaphore set. */ assert_spin_locked(&sma->sem_perm.lock); @@ -763,10 +879,17 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) unlink_queue(sma, q); wake_up_sem_queue_prepare(&tasks, q, -EIDRM); } + for (i = 0; i < sma->sem_nsems; i++) { + struct sem *sem = sma->sem_base + i; + list_for_each_entry_safe(q, tq, &sem->sem_pending, list) { + unlink_queue(sma, q); + wake_up_sem_queue_prepare(&tasks, q, -EIDRM); + } + } /* Remove the semaphore set from the IDR */ sem_rmid(ns, sma); - sem_unlock(sma); + sem_unlock(sma, -1); wake_up_sem_queue_do(&tasks); ns->used_sems -= sma->sem_nsems; @@ -842,18 +965,25 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, case SEM_STAT: { struct semid64_ds tbuf; - int id; + int id = 0; + + memset(&tbuf, 0, sizeof(tbuf)); if (cmd == SEM_STAT) { - sma = sem_lock(ns, semid); - if (IS_ERR(sma)) - return PTR_ERR(sma); + rcu_read_lock(); + sma = sem_obtain_object(ns, semid); + if (IS_ERR(sma)) { + err = PTR_ERR(sma); + goto out_unlock; + } id = sma->sem_perm.id; } else { - sma = sem_lock_check(ns, semid); - if (IS_ERR(sma)) - return PTR_ERR(sma); - id = 0; + rcu_read_lock(); + sma = sem_obtain_object_check(ns, semid); + if (IS_ERR(sma)) { + err = PTR_ERR(sma); + goto out_unlock; + } } err = -EACCES; @@ -864,13 +994,11 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, if (err) goto out_unlock; - memset(&tbuf, 0, sizeof(tbuf)); - kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); tbuf.sem_otime = sma->sem_otime; tbuf.sem_ctime = sma->sem_ctime; tbuf.sem_nsems = sma->sem_nsems; - sem_unlock(sma); + rcu_read_unlock(); if (copy_semid_to_user(p, &tbuf, version)) return -EFAULT; return id; @@ -879,7 +1007,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, return -EINVAL; } out_unlock: - sem_unlock(sma); + rcu_read_unlock(); return err; } @@ -890,7 +1018,6 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, struct sem_array *sma; struct sem* curr; int err; - int nsems; struct list_head tasks; int val; #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN) @@ -901,31 +1028,39 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, val = arg; #endif - sma = sem_lock_check(ns, semid); - if (IS_ERR(sma)) - return PTR_ERR(sma); + if (val > SEMVMX || val < 0) + return -ERANGE; INIT_LIST_HEAD(&tasks); - nsems = sma->sem_nsems; - err = -EACCES; - if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) - goto out_unlock; + rcu_read_lock(); + sma = sem_obtain_object_check(ns, semid); + if (IS_ERR(sma)) { + rcu_read_unlock(); + return PTR_ERR(sma); + } + + if (semnum < 0 || semnum >= sma->sem_nsems) { + rcu_read_unlock(); + return -EINVAL; + } + + + if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) { + rcu_read_unlock(); + return -EACCES; + } err = security_sem_semctl(sma, SETVAL); - if (err) - goto out_unlock; + if (err) { + rcu_read_unlock(); + return -EACCES; + } - err = -EINVAL; - if(semnum < 0 || semnum >= nsems) - goto out_unlock; + sem_lock(sma, NULL, -1); curr = &sma->sem_base[semnum]; - err = -ERANGE; - if (val > SEMVMX || val < 0) - goto out_unlock; - assert_spin_locked(&sma->sem_perm.lock); list_for_each_entry(un, &sma->list_id, list_id) un->semadj[semnum] = 0; @@ -935,11 +1070,9 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, sma->sem_ctime = get_seconds(); /* maybe some queued-up processes were waiting for this */ do_smart_update(sma, NULL, 0, 0, &tasks); - err = 0; -out_unlock: - sem_unlock(sma); + sem_unlock(sma, -1); wake_up_sem_queue_do(&tasks); - return err; + return 0; } static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, @@ -947,27 +1080,34 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, { struct sem_array *sma; struct sem* curr; - int err; + int err, nsems; ushort fast_sem_io[SEMMSL_FAST]; ushort* sem_io = fast_sem_io; - int nsems; struct list_head tasks; - sma = sem_lock_check(ns, semid); - if (IS_ERR(sma)) + INIT_LIST_HEAD(&tasks); + + rcu_read_lock(); + sma = sem_obtain_object_check(ns, semid); + if (IS_ERR(sma)) { + rcu_read_unlock(); return PTR_ERR(sma); + } - INIT_LIST_HEAD(&tasks); nsems = sma->sem_nsems; err = -EACCES; if (ipcperms(ns, &sma->sem_perm, - cmd == SETALL ? S_IWUGO : S_IRUGO)) - goto out_unlock; + cmd == SETALL ? S_IWUGO : S_IRUGO)) { + rcu_read_unlock(); + goto out_wakeup; + } err = security_sem_semctl(sma, cmd); - if (err) - goto out_unlock; + if (err) { + rcu_read_unlock(); + goto out_wakeup; + } err = -EACCES; switch (cmd) { @@ -977,7 +1117,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, int i; if(nsems > SEMMSL_FAST) { - sem_getref_and_unlock(sma); + sem_getref(sma); sem_io = ipc_alloc(sizeof(ushort)*nsems); if(sem_io == NULL) { @@ -987,15 +1127,16 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, sem_lock_and_putref(sma); if (sma->sem_perm.deleted) { - sem_unlock(sma); + sem_unlock(sma, -1); err = -EIDRM; goto out_free; } - } + } else + sem_lock(sma, NULL, -1); for (i = 0; i < sma->sem_nsems; i++) sem_io[i] = sma->sem_base[i].semval; - sem_unlock(sma); + sem_unlock(sma, -1); err = 0; if(copy_to_user(array, sem_io, nsems*sizeof(ushort))) err = -EFAULT; @@ -1006,7 +1147,11 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, int i; struct sem_undo *un; - sem_getref_and_unlock(sma); + if (!ipc_rcu_getref(sma)) { + rcu_read_unlock(); + return -EIDRM; + } + rcu_read_unlock(); if(nsems > SEMMSL_FAST) { sem_io = ipc_alloc(sizeof(ushort)*nsems); @@ -1031,7 +1176,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, } sem_lock_and_putref(sma); if (sma->sem_perm.deleted) { - sem_unlock(sma); + sem_unlock(sma, -1); err = -EIDRM; goto out_free; } @@ -1053,9 +1198,12 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */ } err = -EINVAL; - if(semnum < 0 || semnum >= nsems) - goto out_unlock; + if (semnum < 0 || semnum >= nsems) { + rcu_read_unlock(); + goto out_wakeup; + } + sem_lock(sma, NULL, -1); curr = &sma->sem_base[semnum]; switch (cmd) { @@ -1072,10 +1220,11 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, err = count_semzcnt(sma,semnum); goto out_unlock; } + out_unlock: - sem_unlock(sma); + sem_unlock(sma, -1); +out_wakeup: wake_up_sem_queue_do(&tasks); - out_free: if(sem_io != fast_sem_io) ipc_free(sem_io, sizeof(ushort)*nsems); @@ -1126,33 +1275,39 @@ static int semctl_down(struct ipc_namespace *ns, int semid, return -EFAULT; } - ipcp = ipcctl_pre_down(ns, &sem_ids(ns), semid, cmd, - &semid64.sem_perm, 0); + ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd, + &semid64.sem_perm, 0); if (IS_ERR(ipcp)) return PTR_ERR(ipcp); sma = container_of(ipcp, struct sem_array, sem_perm); err = security_sem_semctl(sma, cmd); - if (err) + if (err) { + rcu_read_unlock(); goto out_unlock; + } switch(cmd){ case IPC_RMID: + sem_lock(sma, NULL, -1); freeary(ns, ipcp); goto out_up; case IPC_SET: + sem_lock(sma, NULL, -1); err = ipc_update_perm(&semid64.sem_perm, ipcp); if (err) goto out_unlock; sma->sem_ctime = get_seconds(); break; default: + rcu_read_unlock(); err = -EINVAL; + goto out_up; } out_unlock: - sem_unlock(sma); + sem_unlock(sma, -1); out_up: up_write(&sem_ids(ns).rw_mutex); return err; @@ -1264,8 +1419,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) struct sem_array *sma; struct sem_undo_list *ulp; struct sem_undo *un, *new; - int nsems; - int error; + int nsems, error; error = get_undo_list(&ulp); if (error) @@ -1277,16 +1431,22 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) spin_unlock(&ulp->lock); if (likely(un!=NULL)) goto out; - rcu_read_unlock(); /* no undo structure around - allocate one. */ /* step 1: figure out the size of the semaphore array */ - sma = sem_lock_check(ns, semid); - if (IS_ERR(sma)) + sma = sem_obtain_object_check(ns, semid); + if (IS_ERR(sma)) { + rcu_read_unlock(); return ERR_CAST(sma); + } nsems = sma->sem_nsems; - sem_getref_and_unlock(sma); + if (!ipc_rcu_getref(sma)) { + rcu_read_unlock(); + un = ERR_PTR(-EIDRM); + goto out; + } + rcu_read_unlock(); /* step 2: allocate new undo structure */ new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); @@ -1298,7 +1458,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) /* step 3: Acquire the lock on semaphore array */ sem_lock_and_putref(sma); if (sma->sem_perm.deleted) { - sem_unlock(sma); + sem_unlock(sma, -1); kfree(new); un = ERR_PTR(-EIDRM); goto out; @@ -1326,7 +1486,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) success: spin_unlock(&ulp->lock); rcu_read_lock(); - sem_unlock(sma); + sem_unlock(sma, -1); out: return un; } @@ -1366,7 +1526,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, struct sembuf fast_sops[SEMOPM_FAST]; struct sembuf* sops = fast_sops, *sop; struct sem_undo *un; - int undos = 0, alter = 0, max; + int undos = 0, alter = 0, max, locknum; struct sem_queue queue; unsigned long jiffies_left = 0; struct ipc_namespace *ns; @@ -1410,25 +1570,45 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, alter = 1; } + INIT_LIST_HEAD(&tasks); + if (undos) { + /* On success, find_alloc_undo takes the rcu_read_lock */ un = find_alloc_undo(ns, semid); if (IS_ERR(un)) { error = PTR_ERR(un); goto out_free; } - } else + } else { un = NULL; + rcu_read_lock(); + } - INIT_LIST_HEAD(&tasks); - - sma = sem_lock_check(ns, semid); + sma = sem_obtain_object_check(ns, semid); if (IS_ERR(sma)) { - if (un) - rcu_read_unlock(); + rcu_read_unlock(); error = PTR_ERR(sma); goto out_free; } + error = -EFBIG; + if (max >= sma->sem_nsems) { + rcu_read_unlock(); + goto out_wakeup; + } + + error = -EACCES; + if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) { + rcu_read_unlock(); + goto out_wakeup; + } + + error = security_sem_semop(sma, sops, nsops, alter); + if (error) { + rcu_read_unlock(); + goto out_wakeup; + } + /* * semid identifiers are not unique - find_alloc_undo may have * allocated an undo structure, it was invalidated by an RMID @@ -1437,33 +1617,8 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, * "un" itself is guaranteed by rcu. */ error = -EIDRM; - if (un) { - if (un->semid == -1) { - rcu_read_unlock(); - goto out_unlock_free; - } else { - /* - * rcu lock can be released, "un" cannot disappear: - * - sem_lock is acquired, thus IPC_RMID is - * impossible. - * - exit_sem is impossible, it always operates on - * current (or a dead task). - */ - - rcu_read_unlock(); - } - } - - error = -EFBIG; - if (max >= sma->sem_nsems) - goto out_unlock_free; - - error = -EACCES; - if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) - goto out_unlock_free; - - error = security_sem_semop(sma, sops, nsops, alter); - if (error) + locknum = sem_lock(sma, sops, nsops); + if (un && un->semid == -1) goto out_unlock_free; error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current)); @@ -1483,21 +1638,20 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, queue.undo = un; queue.pid = task_tgid_vnr(current); queue.alter = alter; - if (alter) - list_add_tail(&queue.list, &sma->sem_pending); - else - list_add(&queue.list, &sma->sem_pending); if (nsops == 1) { struct sem *curr; curr = &sma->sem_base[sops->sem_num]; if (alter) - list_add_tail(&queue.simple_list, &curr->sem_pending); + list_add_tail(&queue.list, &curr->sem_pending); else - list_add(&queue.simple_list, &curr->sem_pending); + list_add(&queue.list, &curr->sem_pending); } else { - INIT_LIST_HEAD(&queue.simple_list); + if (alter) + list_add_tail(&queue.list, &sma->sem_pending); + else + list_add(&queue.list, &sma->sem_pending); sma->complex_count++; } @@ -1506,7 +1660,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, sleep_again: current->state = TASK_INTERRUPTIBLE; - sem_unlock(sma); + sem_unlock(sma, locknum); if (timeout) jiffies_left = schedule_timeout(jiffies_left); @@ -1528,7 +1682,7 @@ sleep_again: goto out_free; } - sma = sem_lock(ns, semid); + sma = sem_obtain_lock(ns, semid, sops, nsops, &locknum); /* * Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing. @@ -1567,8 +1721,8 @@ sleep_again: unlink_queue(sma, &queue); out_unlock_free: - sem_unlock(sma); - + sem_unlock(sma, locknum); +out_wakeup: wake_up_sem_queue_do(&tasks); out_free: if(sops != fast_sops) @@ -1631,8 +1785,7 @@ void exit_sem(struct task_struct *tsk) struct sem_array *sma; struct sem_undo *un; struct list_head tasks; - int semid; - int i; + int semid, i; rcu_read_lock(); un = list_entry_rcu(ulp->list_proc.next, @@ -1641,23 +1794,26 @@ void exit_sem(struct task_struct *tsk) semid = -1; else semid = un->semid; - rcu_read_unlock(); - if (semid == -1) + if (semid == -1) { + rcu_read_unlock(); break; + } - sma = sem_lock_check(tsk->nsproxy->ipc_ns, un->semid); - + sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, un->semid); /* exit_sem raced with IPC_RMID, nothing to do */ - if (IS_ERR(sma)) + if (IS_ERR(sma)) { + rcu_read_unlock(); continue; + } + sem_lock(sma, NULL, -1); un = __lookup_undo(ulp, semid); if (un == NULL) { /* exit_sem raced with IPC_RMID+semget() that created * exactly the same semid. Nothing to do. */ - sem_unlock(sma); + sem_unlock(sma, -1); continue; } @@ -1697,7 +1853,7 @@ void exit_sem(struct task_struct *tsk) /* maybe some queued-up processes were waiting for this */ INIT_LIST_HEAD(&tasks); do_smart_update(sma, NULL, 0, 1, &tasks); - sem_unlock(sma); + sem_unlock(sma, -1); wake_up_sem_queue_do(&tasks); kfree_rcu(un, rcu); |
