diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit_watch.c | 85 | ||||
-rw-r--r-- | kernel/cpuset.c | 7 | ||||
-rw-r--r-- | kernel/futex.c | 125 | ||||
-rw-r--r-- | kernel/hrtimer.c | 6 | ||||
-rw-r--r-- | kernel/irq/internals.h | 6 | ||||
-rw-r--r-- | kernel/irq/irqdesc.c | 11 | ||||
-rw-r--r-- | kernel/irq/manage.c | 13 | ||||
-rw-r--r-- | kernel/irq/pm.c | 3 | ||||
-rw-r--r-- | kernel/irq/resend.c | 2 | ||||
-rw-r--r-- | kernel/perf_event.c | 19 | ||||
-rw-r--r-- | kernel/power/main.c | 2 | ||||
-rw-r--r-- | kernel/power/process.c | 6 | ||||
-rw-r--r-- | kernel/power/snapshot.c | 7 | ||||
-rw-r--r-- | kernel/ptrace.c | 6 | ||||
-rw-r--r-- | kernel/sched.c | 1 | ||||
-rw-r--r-- | kernel/sched_rt.c | 14 | ||||
-rw-r--r-- | kernel/sys_ni.c | 5 | ||||
-rw-r--r-- | kernel/sysctl.c | 15 | ||||
-rw-r--r-- | kernel/sysctl_binary.c | 19 | ||||
-rw-r--r-- | kernel/time/tick-broadcast.c | 10 | ||||
-rw-r--r-- | kernel/time/tick-common.c | 6 | ||||
-rw-r--r-- | kernel/time/tick-internal.h | 3 | ||||
-rw-r--r-- | kernel/timer.c | 6 | ||||
-rw-r--r-- | kernel/trace/blktrace.c | 16 | ||||
-rw-r--r-- | kernel/workqueue.c | 43 |
25 files changed, 239 insertions, 197 deletions
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index d2e3c7866460..e683869365d9 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -144,9 +144,9 @@ int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev) } /* Initialize a parent watch entry. */ -static struct audit_parent *audit_init_parent(struct nameidata *ndp) +static struct audit_parent *audit_init_parent(struct path *path) { - struct inode *inode = ndp->path.dentry->d_inode; + struct inode *inode = path->dentry->d_inode; struct audit_parent *parent; int ret; @@ -353,53 +353,40 @@ static void audit_remove_parent_watches(struct audit_parent *parent) } /* Get path information necessary for adding watches. */ -static int audit_get_nd(char *path, struct nameidata **ndp, struct nameidata **ndw) +static int audit_get_nd(struct audit_watch *watch, struct path *parent) { - struct nameidata *ndparent, *ndwatch; + struct nameidata nd; + struct dentry *d; int err; - ndparent = kmalloc(sizeof(*ndparent), GFP_KERNEL); - if (unlikely(!ndparent)) - return -ENOMEM; + err = kern_path_parent(watch->path, &nd); + if (err) + return err; - ndwatch = kmalloc(sizeof(*ndwatch), GFP_KERNEL); - if (unlikely(!ndwatch)) { - kfree(ndparent); - return -ENOMEM; + if (nd.last_type != LAST_NORM) { + path_put(&nd.path); + return -EINVAL; } - err = path_lookup(path, LOOKUP_PARENT, ndparent); - if (err) { - kfree(ndparent); - kfree(ndwatch); - return err; + mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); + d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); + if (IS_ERR(d)) { + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + path_put(&nd.path); + return PTR_ERR(d); } - - err = path_lookup(path, 0, ndwatch); - if (err) { - kfree(ndwatch); - ndwatch = NULL; + if (d->d_inode) { + /* update watch filter fields */ + watch->dev = d->d_inode->i_sb->s_dev; + watch->ino = d->d_inode->i_ino; } + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); - *ndp = ndparent; - *ndw = ndwatch; - + *parent = nd.path; + dput(d); return 0; } -/* Release resources used for watch path information. */ -static void audit_put_nd(struct nameidata *ndp, struct nameidata *ndw) -{ - if (ndp) { - path_put(&ndp->path); - kfree(ndp); - } - if (ndw) { - path_put(&ndw->path); - kfree(ndw); - } -} - /* Associate the given rule with an existing parent. * Caller must hold audit_filter_mutex. */ static void audit_add_to_parent(struct audit_krule *krule, @@ -440,31 +427,24 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list) { struct audit_watch *watch = krule->watch; struct audit_parent *parent; - struct nameidata *ndp = NULL, *ndw = NULL; + struct path parent_path; int h, ret = 0; mutex_unlock(&audit_filter_mutex); /* Avoid calling path_lookup under audit_filter_mutex. */ - ret = audit_get_nd(watch->path, &ndp, &ndw); - if (ret) { - /* caller expects mutex locked */ - mutex_lock(&audit_filter_mutex); - goto error; - } + ret = audit_get_nd(watch, &parent_path); + /* caller expects mutex locked */ mutex_lock(&audit_filter_mutex); - /* update watch filter fields */ - if (ndw) { - watch->dev = ndw->path.dentry->d_inode->i_sb->s_dev; - watch->ino = ndw->path.dentry->d_inode->i_ino; - } + if (ret) + return ret; /* either find an old parent or attach a new one */ - parent = audit_find_parent(ndp->path.dentry->d_inode); + parent = audit_find_parent(parent_path.dentry->d_inode); if (!parent) { - parent = audit_init_parent(ndp); + parent = audit_init_parent(&parent_path); if (IS_ERR(parent)) { ret = PTR_ERR(parent); goto error; @@ -479,9 +459,8 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list) h = audit_hash_ino((u32)watch->ino); *list = &audit_inode_hash[h]; error: - audit_put_nd(ndp, ndw); /* NULL args OK */ + path_put(&parent_path); return ret; - } void audit_remove_watch_rule(struct audit_krule *krule) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4349935c2ad8..e92e98189032 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1575,8 +1575,10 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, return -ENODEV; trialcs = alloc_trial_cpuset(cs); - if (!trialcs) - return -ENOMEM; + if (!trialcs) { + retval = -ENOMEM; + goto out; + } switch (cft->private) { case FILE_CPULIST: @@ -1591,6 +1593,7 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, } free_trial_cpuset(trialcs); +out: cgroup_unlock(); return retval; } diff --git a/kernel/futex.c b/kernel/futex.c index 64c38115c7b6..bda415715382 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -381,15 +381,16 @@ static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, return NULL; } -static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) +static int cmpxchg_futex_value_locked(u32 *curval, u32 __user *uaddr, + u32 uval, u32 newval) { - u32 curval; + int ret; pagefault_disable(); - curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); + ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval); pagefault_enable(); - return curval; + return ret; } static int get_futex_value_locked(u32 *dest, u32 __user *from) @@ -674,7 +675,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, struct task_struct *task, int set_waiters) { int lock_taken, ret, ownerdied = 0; - u32 uval, newval, curval; + u32 uval, newval, curval, vpid = task_pid_vnr(task); retry: ret = lock_taken = 0; @@ -684,19 +685,17 @@ retry: * (by doing a 0 -> TID atomic cmpxchg), while holding all * the locks. It will most likely not succeed. */ - newval = task_pid_vnr(task); + newval = vpid; if (set_waiters) newval |= FUTEX_WAITERS; - curval = cmpxchg_futex_value_locked(uaddr, 0, newval); - - if (unlikely(curval == -EFAULT)) + if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval))) return -EFAULT; /* * Detect deadlocks. */ - if ((unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(task)))) + if ((unlikely((curval & FUTEX_TID_MASK) == vpid))) return -EDEADLK; /* @@ -723,14 +722,12 @@ retry: */ if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { /* Keep the OWNER_DIED bit */ - newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(task); + newval = (curval & ~FUTEX_TID_MASK) | vpid; ownerdied = 0; lock_taken = 1; } - curval = cmpxchg_futex_value_locked(uaddr, uval, newval); - - if (unlikely(curval == -EFAULT)) + if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))) return -EFAULT; if (unlikely(curval != uval)) goto retry; @@ -775,6 +772,24 @@ retry: return ret; } +/** + * __unqueue_futex() - Remove the futex_q from its futex_hash_bucket + * @q: The futex_q to unqueue + * + * The q->lock_ptr must not be NULL and must be held by the caller. + */ +static void __unqueue_futex(struct futex_q *q) +{ + struct futex_hash_bucket *hb; + + if (WARN_ON(!q->lock_ptr || !spin_is_locked(q->lock_ptr) + || plist_node_empty(&q->list))) + return; + + hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); + plist_del(&q->list, &hb->chain); +} + /* * The hash bucket lock must be held when this is called. * Afterwards, the futex_q must not be accessed. @@ -792,7 +807,7 @@ static void wake_futex(struct futex_q *q) */ get_task_struct(p); - plist_del(&q->list, &q->list.plist); + __unqueue_futex(q); /* * The waiting task can free the futex_q as soon as * q->lock_ptr = NULL is written, without taking any locks. A @@ -843,9 +858,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) newval = FUTEX_WAITERS | task_pid_vnr(new_owner); - curval = cmpxchg_futex_value_locked(uaddr, uval, newval); - - if (curval == -EFAULT) + if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) ret = -EFAULT; else if (curval != uval) ret = -EINVAL; @@ -880,10 +893,8 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval) * There is no waiter, so we unlock the futex. The owner died * bit has not to be preserved here. We are the owner: */ - oldval = cmpxchg_futex_value_locked(uaddr, uval, 0); - - if (oldval == -EFAULT) - return oldval; + if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0)) + return -EFAULT; if (oldval != uval) return -EAGAIN; @@ -1071,9 +1082,6 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, plist_del(&q->list, &hb1->chain); plist_add(&q->list, &hb2->chain); q->lock_ptr = &hb2->lock; -#ifdef CONFIG_DEBUG_PI_LIST - q->list.plist.spinlock = &hb2->lock; -#endif } get_futex_key_refs(key2); q->key = *key2; @@ -1100,16 +1108,12 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, get_futex_key_refs(key); q->key = *key; - WARN_ON(plist_node_empty(&q->list)); - plist_del(&q->list, &q->list.plist); + __unqueue_futex(q); WARN_ON(!q->rt_waiter); q->rt_waiter = NULL; q->lock_ptr = &hb->lock; -#ifdef CONFIG_DEBUG_PI_LIST - q->list.plist.spinlock = &hb->lock; -#endif wake_up_state(q->task, TASK_NORMAL); } @@ -1457,9 +1461,6 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) prio = min(current->normal_prio, MAX_RT_PRIO); plist_node_init(&q->list, prio); -#ifdef CONFIG_DEBUG_PI_LIST - q->list.plist.spinlock = &hb->lock; -#endif plist_add(&q->list, &hb->chain); q->task = current; spin_unlock(&hb->lock); @@ -1504,8 +1505,7 @@ retry: spin_unlock(lock_ptr); goto retry; } - WARN_ON(plist_node_empty(&q->list)); - plist_del(&q->list, &q->list.plist); + __unqueue_futex(q); BUG_ON(q->pi_state); @@ -1525,8 +1525,7 @@ retry: static void unqueue_me_pi(struct futex_q *q) __releases(q->lock_ptr) { - WARN_ON(plist_node_empty(&q->list)); - plist_del(&q->list, &q->list.plist); + __unqueue_futex(q); BUG_ON(!q->pi_state); free_pi_state(q->pi_state); @@ -1578,9 +1577,7 @@ retry: while (1) { newval = (uval & FUTEX_OWNER_DIED) | newtid; - curval = cmpxchg_futex_value_locked(uaddr, uval, newval); - - if (curval == -EFAULT) + if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) goto handle_fault; if (curval == uval) break; @@ -1783,13 +1780,14 @@ static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, * * The basic logical guarantee of a futex is that it blocks ONLY * if cond(var) is known to be true at the time of blocking, for - * any cond. If we queued after testing *uaddr, that would open - * a race condition where we could block indefinitely with + * any cond. If we locked the hash-bucket after testing *uaddr, that + * would open a race condition where we could block indefinitely with * cond(var) false, which would violate the guarantee. * - * A consequence is that futex_wait() can return zero and absorb - * a wakeup when *uaddr != val on entry to the syscall. This is - * rare, but normal. + * On the other hand, we insert q and release the hash-bucket only + * after testing *uaddr. This guarantees that futex_wait() will NOT + * absorb a wakeup if *uaddr does not match the desired values + * while the syscall executes. */ retry: ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key); @@ -2048,9 +2046,9 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) { struct futex_hash_bucket *hb; struct futex_q *this, *next; - u32 uval; struct plist_head *head; union futex_key key = FUTEX_KEY_INIT; + u32 uval, vpid = task_pid_vnr(current); int ret; retry: @@ -2059,7 +2057,7 @@ retry: /* * We release only a lock we actually own: */ - if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) + if ((uval & FUTEX_TID_MASK) != vpid) return -EPERM; ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key); @@ -2074,17 +2072,14 @@ retry: * again. If it succeeds then we can return without waking * anyone else up: */ - if (!(uval & FUTEX_OWNER_DIED)) - uval = cmpxchg_futex_value_locked(uaddr, task_pid_vnr(current), 0); - - - if (unlikely(uval == -EFAULT)) + if (!(uval & FUTEX_OWNER_DIED) && + cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0)) goto pi_faulted; /* * Rare case: we managed to release the lock atomically, * no need to wake anyone else up: */ - if (unlikely(uval == task_pid_vnr(current))) + if (unlikely(uval == vpid)) goto out_unlock; /* @@ -2169,7 +2164,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, * We were woken prior to requeue by a timeout or a signal. * Unqueue the futex_q and determine which it was. */ - plist_del(&q->list, &q->list.plist); + plist_del(&q->list, &hb->chain); /* Handle spurious wakeups gracefully */ ret = -EWOULDBLOCK; @@ -2465,11 +2460,20 @@ retry: * userspace. */ mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; - nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval); - - if (nval == -EFAULT) - return -1; - + /* + * We are not holding a lock here, but we want to have + * the pagefault_disable/enable() protection because + * we want to handle the fault gracefully. If the + * access fails we try to fault in the futex with R/W + * verification via get_user_pages. get_user() above + * does not guarantee R/W access. If that fails we + * give up and leave the futex locked. + */ + if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) { + if (fault_in_user_writeable(uaddr)) + return -1; + goto retry; + } if (nval != uval) goto retry; @@ -2680,8 +2684,7 @@ static int __init futex_init(void) * implementation, the non-functional ones will return * -ENOSYS. */ - curval = cmpxchg_futex_value_locked(NULL, 0, 0); - if (curval == -EFAULT) + if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT) futex_cmpxchg_enabled = 1; for (i = 0; i < ARRAY_SIZE(futex_queues); i++) { diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 0c8d7c048615..e38f5a073d01 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -334,6 +334,11 @@ EXPORT_SYMBOL_GPL(ktime_add_safe); static struct debug_obj_descr hrtimer_debug_descr; +static void *hrtimer_debug_hint(void *addr) +{ + return ((struct hrtimer *) addr)->function; +} + /* * fixup_init is called when: * - an active object is initialized @@ -393,6 +398,7 @@ static int hrtimer_fixup_free(void *addr, enum debug_obj_state state) static struct debug_obj_descr hrtimer_debug_descr = { .name = "hrtimer", + .debug_hint = hrtimer_debug_hint, .fixup_init = hrtimer_fixup_init, .fixup_activate = hrtimer_fixup_activate, .fixup_free = hrtimer_fixup_free, diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 4571ae7e085a..99c3bc8a6fb4 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -3,6 +3,12 @@ */ #include <linux/irqdesc.h> +#ifdef CONFIG_SPARSE_IRQ +# define IRQ_BITMAP_BITS (NR_IRQS + 8196) +#else +# define IRQ_BITMAP_BITS NR_IRQS +#endif + extern int noirqdebug; #define irq_data_to_desc(data) container_of(data, struct irq_desc, irq_data) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 282f20230e67..2039bea31bdf 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -94,7 +94,7 @@ int nr_irqs = NR_IRQS; EXPORT_SYMBOL_GPL(nr_irqs); static DEFINE_MUTEX(sparse_irq_lock); -static DECLARE_BITMAP(allocated_irqs, NR_IRQS); +static DECLARE_BITMAP(allocated_irqs, IRQ_BITMAP_BITS); #ifdef CONFIG_SPARSE_IRQ @@ -217,6 +217,15 @@ int __init early_irq_init(void) initcnt = arch_probe_nr_irqs(); printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d %d\n", NR_IRQS, nr_irqs, initcnt); + if (WARN_ON(nr_irqs > IRQ_BITMAP_BITS)) + nr_irqs = IRQ_BITMAP_BITS; + + if (WARN_ON(initcnt > IRQ_BITMAP_BITS)) + initcnt = IRQ_BITMAP_BITS; + + if (initcnt > nr_irqs) + nr_irqs = initcnt; + for (i = 0; i < initcnt; i++) { desc = alloc_desc(i, node); set_bit(i, allocated_irqs); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0caa59f747dd..2782bacdf494 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -282,8 +282,17 @@ EXPORT_SYMBOL(disable_irq); void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume) { - if (resume) + if (resume) { + if (!(desc->status & IRQ_SUSPENDED)) { + if (!desc->action) + return; + if (!(desc->action->flags & IRQF_FORCE_RESUME)) + return; + /* Pretend that it got disabled ! */ + desc->depth++; + } desc->status &= ~IRQ_SUSPENDED; + } switch (desc->depth) { case 0: @@ -1100,7 +1109,7 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, if (retval) kfree(action); -#ifdef CONFIG_DEBUG_SHIRQ +#ifdef CONFIG_DEBUG_SHIRQ_FIXME if (!retval && (irqflags & IRQF_SHARED)) { /* * It's a shared IRQ -- the driver ought to be prepared for it diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index 0d4005d85b03..d6bfb89cce91 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -53,9 +53,6 @@ void resume_device_irqs(void) for_each_irq_desc(irq, desc) { unsigned long flags; - if (!(desc->status & IRQ_SUSPENDED)) - continue; - raw_spin_lock_irqsave(&desc->lock, flags); __enable_irq(desc, irq, true); raw_spin_unlock_irqrestore(&desc->lock, flags); diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index 891115a929aa..dc49358b73fa 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c @@ -23,7 +23,7 @@ #ifdef CONFIG_HARDIRQS_SW_RESEND /* Bitmap to handle software resend of interrupts: */ -static DECLARE_BITMAP(irqs_resend, NR_IRQS); +static DECLARE_BITMAP(irqs_resend, IRQ_BITMAP_BITS); /* * Run software resends of IRQ's diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 999835b6112b..656222fcf767 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -782,6 +782,10 @@ retry: raw_spin_unlock_irq(&ctx->lock); } +#define MAX_INTERRUPTS (~0ULL) + +static void perf_log_throttle(struct perf_event *event, int enable); + static int event_sched_in(struct perf_event *event, struct perf_cpu_context *cpuctx, @@ -794,6 +798,17 @@ event_sched_in(struct perf_event *event, event->state = PERF_EVENT_STATE_ACTIVE; event->oncpu = smp_processor_id(); + + /* + * Unthrottle events, since we scheduled we might have missed several + * ticks already, also for a heavily scheduling task there is little + * guarantee it'll get a tick in a timely manner. + */ + if (unlikely(event->hw.interrupts == MAX_INTERRUPTS)) { + perf_log_throttle(event, 1); + event->hw.interrupts = 0; + } + /* * The new state must be visible before we turn it on in the hardware: */ @@ -1596,10 +1611,6 @@ void __perf_event_task_sched_in(struct task_struct *task) } } -#define MAX_INTERRUPTS (~0ULL) - -static void perf_log_throttle(struct perf_event *event, int enable); - static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) { u64 frequency = event->attr.sample_freq; diff --git a/kernel/power/main.c b/kernel/power/main.c index 7b5db6a8561e..701853042c28 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -326,7 +326,7 @@ EXPORT_SYMBOL_GPL(pm_wq); static int __init pm_start_workqueue(void) { - pm_wq = alloc_workqueue("pm", WQ_FREEZEABLE, 0); + pm_wq = alloc_workqueue("pm", WQ_FREEZABLE, 0); return pm_wq ? 0 : -ENOMEM; } diff --git a/kernel/power/process.c b/kernel/power/process.c index d6d2a10320e0..0cf3a27a6c9d 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -22,7 +22,7 @@ */ #define TIMEOUT (20 * HZ) -static inline int freezeable(struct task_struct * p) +static inline int freezable(struct task_struct * p) { if ((p == current) || (p->flags & PF_NOFREEZE) || @@ -53,7 +53,7 @@ static int try_to_freeze_tasks(bool sig_only) todo = 0; read_lock(&tasklist_lock); do_each_thread(g, p) { - if (frozen(p) || !freezeable(p)) + if (frozen(p) || !freezable(p)) continue; if (!freeze_task(p, sig_only)) @@ -167,7 +167,7 @@ static void thaw_tasks(bool nosig_only) read_lock(&tasklist_lock); do_each_thread(g, p) { - if (!freezeable(p)) + if (!freezable(p)) continue; if (nosig_only && should_send_signal(p)) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 0dac75ea4456..64db648ff911 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1519,11 +1519,8 @@ static int swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, unsigned int nr_pages, unsigned int nr_highmem) { - int error = 0; - if (nr_highmem > 0) { - error = get_highmem_buffer(PG_ANY); - if (error) + if (get_highmem_buffer(PG_ANY)) goto err_out; if (nr_highmem > alloc_highmem) { nr_highmem -= alloc_highmem; @@ -1546,7 +1543,7 @@ swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, err_out: swsusp_free(); - return error; + return -ENOMEM; } asmlinkage int swsusp_save(void) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 1708b1e2972d..e2302e40b360 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -163,7 +163,7 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode) return !err; } -int ptrace_attach(struct task_struct *task) +static int ptrace_attach(struct task_struct *task) { int retval; @@ -219,7 +219,7 @@ out: * Performs checks and sets PT_PTRACED. * Should be used by all ptrace implementations for PTRACE_TRACEME. */ -int ptrace_traceme(void) +static int ptrace_traceme(void) { int ret = -EPERM; @@ -293,7 +293,7 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p) return false; } -int ptrace_detach(struct task_struct *child, unsigned int data) +static int ptrace_detach(struct task_struct *child, unsigned int data) { bool dead = false; diff --git a/kernel/sched.c b/kernel/sched.c index 18d38e4ec7ba..42eab5a8437d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4213,6 +4213,7 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) { __wake_up_common(q, mode, 1, 0, key); } +EXPORT_SYMBOL_GPL(__wake_up_locked_key); /** * __wake_up_sync_key - wake up threads blocked on a waitqueue. diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index ad6267714c84..01f75a5f17af 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -210,11 +210,12 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se); static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) { - int this_cpu = smp_processor_id(); struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; struct sched_rt_entity *rt_se; - rt_se = rt_rq->tg->rt_se[this_cpu]; + int cpu = cpu_of(rq_of_rt_rq(rt_rq)); + + rt_se = rt_rq->tg->rt_se[cpu]; if (rt_rq->rt_nr_running) { if (rt_se && !on_rt_rq(rt_se)) @@ -226,10 +227,10 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) { - int this_cpu = smp_processor_id(); struct sched_rt_entity *rt_se; + int cpu = cpu_of(rq_of_rt_rq(rt_rq)); - rt_se = rt_rq->tg->rt_se[this_cpu]; + rt_se = rt_rq->tg->rt_se[cpu]; if (rt_se && on_rt_rq(rt_se)) dequeue_rt_entity(rt_se); @@ -565,8 +566,11 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) if (rt_rq->rt_time || rt_rq->rt_nr_running) idle = 0; raw_spin_unlock(&rt_rq->rt_runtime_lock); - } else if (rt_rq->rt_nr_running) + } else if (rt_rq->rt_nr_running) { idle = 0; + if (!rt_rq_throttled(rt_rq)) + enqueue = 1; + } if (enqueue) sched_rt_rq_enqueue(rt_rq); diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index c782fe9924c7..25cc41cd8f33 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -186,3 +186,8 @@ cond_syscall(sys_perf_event_open); /* fanotify! */ cond_syscall(sys_fanotify_init); cond_syscall(sys_fanotify_mark); + +/* open by handle */ +cond_syscall(sys_name_to_handle_at); +cond_syscall(sys_open_by_handle_at); +cond_syscall(compat_sys_open_by_handle_at); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 0f1bd83db985..4eed0af5d144 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -194,9 +194,9 @@ static int sysrq_sysctl_handler(ctl_table *table, int write, static struct ctl_table root_table[]; static struct ctl_table_root sysctl_table_root; static struct ctl_table_header root_table_header = { - .count = 1, + {{.count = 1, .ctl_table = root_table, - .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list), + .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}}, .root = &sysctl_table_root, .set = &sysctl_table_root.default_set, }; @@ -1567,11 +1567,16 @@ void sysctl_head_get(struct ctl_table_header *head) spin_unlock(&sysctl_lock); } +static void free_head(struct rcu_head *rcu) +{ + kfree(container_of(rcu, struct ctl_table_header, rcu)); +} + void sysctl_head_put(struct ctl_table_header *head) { spin_lock(&sysctl_lock); if (!--head->count) - kfree(head); + call_rcu(&head->rcu, free_head); spin_unlock(&sysctl_lock); } @@ -1948,10 +1953,10 @@ void unregister_sysctl_table(struct ctl_table_header * header) start_unregistering(header); if (!--header->parent->count) { WARN_ON(1); - kfree(header->parent); + call_rcu(&header->parent->rcu, free_head); } if (!--header->count) - kfree(header); + call_rcu(&header->rcu, free_head); spin_unlock(&sysctl_lock); } diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index b875bedf7c9a..3b8e028b9601 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -1321,13 +1321,11 @@ static ssize_t binary_sysctl(const int *name, int nlen, void __user *oldval, size_t oldlen, void __user *newval, size_t newlen) { const struct bin_table *table = NULL; - struct nameidata nd; struct vfsmount *mnt; struct file *file; ssize_t result; char *pathname; int flags; - int acc_mode; pathname = sysctl_getname(name, nlen, &table); result = PTR_ERR(pathname); @@ -1337,28 +1335,17 @@ static ssize_t binary_sysctl(const int *name, int nlen, /* How should the sysctl be accessed? */ if (oldval && oldlen && newval && newlen) { flags = O_RDWR; - acc_mode = MAY_READ | MAY_WRITE; } else if (newval && newlen) { flags = O_WRONLY; - acc_mode = MAY_WRITE; } else if (oldval && oldlen) { flags = O_RDONLY; - acc_mode = MAY_READ; } else { result = 0; goto out_putname; } mnt = current->nsproxy->pid_ns->proc_mnt; - result = vfs_path_lookup(mnt->mnt_root, mnt, pathname, 0, &nd); - if (result) - goto out_putname; - - result = may_open(&nd.path, acc_mode, flags); - if (result) - goto out_putpath; - - file = dentry_open(nd.path.dentry, nd.path.mnt, flags, current_cred()); + file = file_open_root(mnt->mnt_root, mnt, pathname, flags); result = PTR_ERR(file); if (IS_ERR(file)) goto out_putname; @@ -1370,10 +1357,6 @@ out_putname: putname(pathname); out: return result; - -out_putpath: - path_put(&nd.path); - goto out_putname; } diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 48b2761b5668..a3b5aff62606 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -600,4 +600,14 @@ int tick_broadcast_oneshot_active(void) return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT; } +/* + * Check whether the broadcast device supports oneshot. + */ +bool tick_broadcast_oneshot_available(void) +{ + struct clock_event_device *bc = tick_broadcast_device.evtdev; + + return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false; +} + #endif diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 051bc80a0c43..ed228ef6f6b8 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -51,7 +51,11 @@ int tick_is_oneshot_available(void) { struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); - return dev && (dev->features & CLOCK_EVT_FEAT_ONESHOT); + if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT)) + return 0; + if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) + return 1; + return tick_broadcast_oneshot_available(); } /* diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 290eefbc1f60..f65d3a723a64 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -36,6 +36,7 @@ extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); extern int tick_broadcast_oneshot_active(void); extern void tick_check_oneshot_broadcast(int cpu); +bool tick_broadcast_oneshot_available(void); # else /* BROADCAST */ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { @@ -46,6 +47,7 @@ static inline void tick_broadcast_switch_to_oneshot(void) { } static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } static inline int tick_broadcast_oneshot_active(void) { return 0; } static inline void tick_check_oneshot_broadcast(int cpu) { } +static inline bool tick_broadcast_oneshot_available(void) { return true; } # endif /* !BROADCAST */ #else /* !ONESHOT */ @@ -76,6 +78,7 @@ static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc) return 0; } static inline int tick_broadcast_oneshot_active(void) { return 0; } +static inline bool tick_broadcast_oneshot_available(void) { return false; } #endif /* !TICK_ONESHOT */ /* diff --git a/kernel/timer.c b/kernel/timer.c index 5f40c2e0a94e..3503c17ac1d3 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -404,6 +404,11 @@ static void timer_stats_account_timer(struct timer_list *timer) {} static struct debug_obj_descr timer_debug_descr; +static void *timer_debug_hint(void *addr) +{ + return ((struct timer_list *) addr)->function; +} + /* * fixup_init is called when: * - an active object is initialized @@ -477,6 +482,7 @@ static int timer_fixup_free(void *addr, enum debug_obj_state state) static struct debug_obj_descr timer_debug_descr = { .name = "timer_list", + .debug_hint = timer_debug_hint, .fixup_init = timer_fixup_init, .fixup_activate = timer_fixup_activate, .fixup_free = timer_fixup_free, diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index d95721f33702..cbafed7d4f38 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1827,21 +1827,5 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) rwbs[i] = '\0'; } -void blk_fill_rwbs_rq(char *rwbs, struct request *rq) -{ - int rw = rq->cmd_flags & 0x03; - int bytes; - - if (rq->cmd_flags & REQ_DISCARD) - rw |= REQ_DISCARD; - - if (rq->cmd_flags & REQ_SECURE) - rw |= REQ_SECURE; - - bytes = blk_rq_bytes(rq); - - blk_fill_rwbs(rwbs, rw, bytes); -} - #endif /* CONFIG_EVENT_TRACING */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 11869faa6819..b5fe4c00eb3c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -79,7 +79,9 @@ enum { MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */ IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */ - MAYDAY_INITIAL_TIMEOUT = HZ / 100, /* call for help after 10ms */ + MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2, + /* call for help after 10ms + (min two ticks) */ MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */ CREATE_COOLDOWN = HZ, /* time to breath after fail */ TRUSTEE_COOLDOWN = HZ / 10, /* for trustee draining */ @@ -314,6 +316,11 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask, static struct debug_obj_descr work_debug_descr; +static void *work_debug_hint(void *addr) +{ + return ((struct work_struct *) addr)->func; +} + /* * fixup_init is called when: * - an active object is initialized @@ -385,6 +392,7 @@ static int work_fixup_free(void *addr, enum debug_obj_state state) static struct debug_obj_descr work_debug_descr = { .name = "work_struct", + .debug_hint = work_debug_hint, .fixup_init = work_fixup_init, .fixup_activate = work_fixup_activate, .fixup_free = work_fixup_free, @@ -2047,6 +2055,15 @@ repeat: move_linked_works(work, scheduled, &n); process_scheduled_works(rescuer); + + /* + * Leave this gcwq. If keep_working() is %true, notify a + * regular worker; otherwise, we end up with 0 concurrency + * and stalling the execution. + */ + if (keep_working(gcwq)) + wake_up_worker(gcwq); + spin_unlock_irq(&gcwq->lock); } @@ -2956,7 +2973,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name, */ spin_lock(&workqueue_lock); - if (workqueue_freezing && wq->flags & WQ_FREEZEABLE) + if (workqueue_freezing && wq->flags & WQ_FREEZABLE) for_each_cwq_cpu(cpu, wq) get_cwq(cpu, wq)->max_active = 0; @@ -3068,7 +3085,7 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) spin_lock_irq(&gcwq->lock); - if (!(wq->flags & WQ_FREEZEABLE) || + if (!(wq->flags & WQ_FREEZABLE) || !(gcwq->flags & GCWQ_FREEZING)) get_cwq(gcwq->cpu, wq)->max_active = max_active; @@ -3318,7 +3335,7 @@ static int __cpuinit trustee_thread(void *__gcwq) * want to get it over with ASAP - spam rescuers, wake up as * many idlers as necessary and create new ones till the * worklist is empty. Note that if the gcwq is frozen, there - * may be frozen works in freezeable cwqs. Don't declare + * may be frozen works in freezable cwqs. Don't declare * completion while frozen. */ while (gcwq->nr_workers != gcwq->nr_idle || @@ -3576,9 +3593,9 @@ EXPORT_SYMBOL_GPL(work_on_cpu); /** * freeze_workqueues_begin - begin freezing workqueues * - * Start freezing workqueues. After this function returns, all - * freezeable workqueues will queue new works to their frozen_works - * list instead of gcwq->worklist. + * Start freezing workqueues. After this function returns, all freezable + * workqueues will queue new works to their frozen_works list instead of + * gcwq->worklist. * * CONTEXT: * Grabs and releases workqueue_lock and gcwq->lock's. @@ -3604,7 +3621,7 @@ void freeze_workqueues_begin(void) list_for_each_entry(wq, &workqueues, list) { struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); - if (cwq && wq->flags & WQ_FREEZEABLE) + if (cwq && wq->flags & WQ_FREEZABLE) cwq->max_active = 0; } @@ -3615,7 +3632,7 @@ void freeze_workqueues_begin(void) } /** - * freeze_workqueues_busy - are freezeable workqueues still busy? + * freeze_workqueues_busy - are freezable workqueues still busy? * * Check whether freezing is complete. This function must be called * between freeze_workqueues_begin() and thaw_workqueues(). @@ -3624,8 +3641,8 @@ void freeze_workqueues_begin(void) * Grabs and releases workqueue_lock. * * RETURNS: - * %true if some freezeable workqueues are still busy. %false if - * freezing is complete. + * %true if some freezable workqueues are still busy. %false if freezing + * is complete. */ bool freeze_workqueues_busy(void) { @@ -3645,7 +3662,7 @@ bool freeze_workqueues_busy(void) list_for_each_entry(wq, &workqueues, list) { struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); - if (!cwq || !(wq->flags & WQ_FREEZEABLE)) + if (!cwq || !(wq->flags & WQ_FREEZABLE)) continue; BUG_ON(cwq->nr_active < 0); @@ -3690,7 +3707,7 @@ void thaw_workqueues(void) list_for_each_entry(wq, &workqueues, list) { struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); - if (!cwq || !(wq->flags & WQ_FREEZEABLE)) + if (!cwq || !(wq->flags & WQ_FREEZABLE)) continue; /* restore max_active and repopulate worklist */ |