diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/acct.c | 11 | ||||
-rw-r--r-- | kernel/audit_tree.c | 13 | ||||
-rw-r--r-- | kernel/exit.c | 2 | ||||
-rw-r--r-- | kernel/fork.c | 26 | ||||
-rw-r--r-- | kernel/futex.c | 102 | ||||
-rw-r--r-- | kernel/irq/handle.c | 3 | ||||
-rw-r--r-- | kernel/module.c | 5 | ||||
-rw-r--r-- | kernel/params.c | 17 | ||||
-rw-r--r-- | kernel/perf_counter.c | 66 | ||||
-rw-r--r-- | kernel/posix-timers.c | 11 | ||||
-rw-r--r-- | kernel/power/Kconfig | 67 | ||||
-rw-r--r-- | kernel/power/Makefile | 5 | ||||
-rw-r--r-- | kernel/power/main.c | 20 | ||||
-rw-r--r-- | kernel/power/power.h | 24 | ||||
-rw-r--r-- | kernel/power/process.c | 25 | ||||
-rw-r--r-- | kernel/power/suspend.c | 3 | ||||
-rw-r--r-- | kernel/sched.c | 9 | ||||
-rw-r--r-- | kernel/signal.c | 3 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 21 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 9 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 23 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter.c | 3 | ||||
-rw-r--r-- | kernel/user.c | 2 |
23 files changed, 206 insertions, 264 deletions
diff --git a/kernel/acct.c b/kernel/acct.c index 9f3391090b3e..a6605ca921b6 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -491,13 +491,17 @@ static void do_acct_process(struct bsd_acct_struct *acct, u64 run_time; struct timespec uptime; struct tty_struct *tty; + const struct cred *orig_cred; + + /* Perform file operations on behalf of whoever enabled accounting */ + orig_cred = override_creds(file->f_cred); /* * First check to see if there is enough free_space to continue * the process accounting system. */ if (!check_free_space(acct, file)) - return; + goto out; /* * Fill the accounting struct with the needed info as recorded @@ -532,7 +536,8 @@ static void do_acct_process(struct bsd_acct_struct *acct, do_div(elapsed, AHZ); ac.ac_btime = get_seconds() - elapsed; /* we really need to bite the bullet and change layout */ - current_uid_gid(&ac.ac_uid, &ac.ac_gid); + ac.ac_uid = orig_cred->uid; + ac.ac_gid = orig_cred->gid; #if ACCT_VERSION==2 ac.ac_ahz = AHZ; #endif @@ -578,6 +583,8 @@ static void do_acct_process(struct bsd_acct_struct *acct, sizeof(acct_t), &file->f_pos); current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim; set_fs(fs); +out: + revert_creds(orig_cred); } /** diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 2451dc6f3282..4b05bd9479db 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -277,7 +277,7 @@ static void untag_chunk(struct node *p) owner->root = NULL; } - for (i = j = 0; i < size; i++, j++) { + for (i = j = 0; j <= size; i++, j++) { struct audit_tree *s; if (&chunk->owners[j] == p) { list_del_init(&p->list); @@ -290,7 +290,7 @@ static void untag_chunk(struct node *p) if (!s) /* result of earlier fallback */ continue; get_tree(s); - list_replace_init(&chunk->owners[i].list, &new->owners[j].list); + list_replace_init(&chunk->owners[j].list, &new->owners[i].list); } list_replace_rcu(&chunk->hash, &new->hash); @@ -373,15 +373,17 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) for (n = 0; n < old->count; n++) { if (old->owners[n].owner == tree) { spin_unlock(&hash_lock); - put_inotify_watch(watch); + put_inotify_watch(&old->watch); return 0; } } spin_unlock(&hash_lock); chunk = alloc_chunk(old->count + 1); - if (!chunk) + if (!chunk) { + put_inotify_watch(&old->watch); return -ENOMEM; + } mutex_lock(&inode->inotify_mutex); if (inotify_clone_watch(&old->watch, &chunk->watch) < 0) { @@ -425,7 +427,8 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) spin_unlock(&hash_lock); inotify_evict_watch(&old->watch); mutex_unlock(&inode->inotify_mutex); - put_inotify_watch(&old->watch); + put_inotify_watch(&old->watch); /* pair to inotify_find_watch */ + put_inotify_watch(&old->watch); /* and kill it */ return 0; } diff --git a/kernel/exit.c b/kernel/exit.c index 869dc221733e..b8606f0f22e4 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -987,8 +987,6 @@ NORET_TYPE void do_exit(long code) tsk->mempolicy = NULL; #endif #ifdef CONFIG_FUTEX - if (unlikely(!list_empty(&tsk->pi_state_list))) - exit_pi_state_list(tsk); if (unlikely(current->pi_state_cache)) kfree(current->pi_state_cache); #endif diff --git a/kernel/fork.c b/kernel/fork.c index 4c21988d6195..4b36858c0f4a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -136,9 +136,6 @@ struct kmem_cache *vm_area_cachep; /* SLAB cache for mm_struct structures (tsk->mm) */ static struct kmem_cache *mm_cachep; -/* Notifier list called when a task struct is freed */ -static ATOMIC_NOTIFIER_HEAD(task_free_notifier); - void free_task(struct task_struct *tsk) { prop_local_destroy_single(&tsk->dirties); @@ -149,18 +146,6 @@ void free_task(struct task_struct *tsk) } EXPORT_SYMBOL(free_task); -int task_free_register(struct notifier_block *n) -{ - return atomic_notifier_chain_register(&task_free_notifier, n); -} -EXPORT_SYMBOL(task_free_register); - -int task_free_unregister(struct notifier_block *n) -{ - return atomic_notifier_chain_unregister(&task_free_notifier, n); -} -EXPORT_SYMBOL(task_free_unregister); - void __put_task_struct(struct task_struct *tsk) { WARN_ON(!tsk->exit_state); @@ -171,7 +156,6 @@ void __put_task_struct(struct task_struct *tsk) put_cred(tsk->cred); delayacct_tsk_free(tsk); - atomic_notifier_call_chain(&task_free_notifier, 0, tsk); if (!profile_handoff_task(tsk)) free_task(tsk); } @@ -560,12 +544,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) /* Get rid of any futexes when releasing the mm */ #ifdef CONFIG_FUTEX - if (unlikely(tsk->robust_list)) + if (unlikely(tsk->robust_list)) { exit_robust_list(tsk); + tsk->robust_list = NULL; + } #ifdef CONFIG_COMPAT - if (unlikely(tsk->compat_robust_list)) + if (unlikely(tsk->compat_robust_list)) { compat_exit_robust_list(tsk); + tsk->compat_robust_list = NULL; + } #endif + if (unlikely(!list_empty(&tsk->pi_state_list))) + exit_pi_state_list(tsk); #endif /* Get rid of any cached register state */ diff --git a/kernel/futex.c b/kernel/futex.c index e18cfbdc7190..fdc88aa1c5f1 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -115,6 +115,9 @@ struct futex_q { /* rt_waiter storage for requeue_pi: */ struct rt_mutex_waiter *rt_waiter; + /* The expected requeue pi target futex key: */ + union futex_key *requeue_pi_key; + /* Bitset for the optional bitmasked wakeup */ u32 bitset; }; @@ -147,7 +150,8 @@ static struct futex_hash_bucket *hash_futex(union futex_key *key) */ static inline int match_futex(union futex_key *key1, union futex_key *key2) { - return (key1->both.word == key2->both.word + return (key1 && key2 + && key1->both.word == key2->both.word && key1->both.ptr == key2->both.ptr && key1->both.offset == key2->both.offset); } @@ -299,8 +303,14 @@ void put_futex_key(int fshared, union futex_key *key) */ static int fault_in_user_writeable(u32 __user *uaddr) { - int ret = get_user_pages(current, current->mm, (unsigned long)uaddr, - 1, 1, 0, NULL, NULL); + struct mm_struct *mm = current->mm; + int ret; + + down_read(&mm->mmap_sem); + ret = get_user_pages(current, mm, (unsigned long)uaddr, + 1, 1, 0, NULL, NULL); + up_read(&mm->mmap_sem); + return ret < 0 ? ret : 0; } @@ -521,8 +531,25 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, return -EINVAL; WARN_ON(!atomic_read(&pi_state->refcount)); - WARN_ON(pid && pi_state->owner && - pi_state->owner->pid != pid); + + /* + * When pi_state->owner is NULL then the owner died + * and another waiter is on the fly. pi_state->owner + * is fixed up by the task which acquires + * pi_state->rt_mutex. + * + * We do not check for pid == 0 which can happen when + * the owner died and robust_list_exit() cleared the + * TID. + */ + if (pid && pi_state->owner) { + /* + * Bail out if user space manipulated the + * futex value. + */ + if (pid != task_pid_vnr(pi_state->owner)) + return -EINVAL; + } atomic_inc(&pi_state->refcount); *ps = pi_state; @@ -749,6 +776,13 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) if (!pi_state) return -EINVAL; + /* + * If current does not own the pi_state then the futex is + * inconsistent and user space fiddled with the futex value. + */ + if (pi_state->owner != current) + return -EINVAL; + spin_lock(&pi_state->pi_mutex.wait_lock); new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); @@ -912,8 +946,8 @@ retry: hb1 = hash_futex(&key1); hb2 = hash_futex(&key2); - double_lock_hb(hb1, hb2); retry_private: + double_lock_hb(hb1, hb2); op_ret = futex_atomic_op_inuser(op, uaddr2); if (unlikely(op_ret < 0)) { @@ -1024,7 +1058,6 @@ static inline void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, struct futex_hash_bucket *hb) { - drop_futex_key_refs(&q->key); get_futex_key_refs(key); q->key = *key; @@ -1089,6 +1122,10 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, if (!top_waiter) return 0; + /* Ensure we requeue to the expected futex. */ + if (!match_futex(top_waiter->requeue_pi_key, key2)) + return -EINVAL; + /* * Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in * the contended case or if set_waiters is 1. The pi_state is returned @@ -1218,6 +1255,7 @@ retry_private: */ if (ret == 1) { WARN_ON(pi_state); + drop_count++; task_count++; ret = get_futex_value_locked(&curval2, uaddr2); if (!ret) @@ -1276,6 +1314,12 @@ retry_private: continue; } + /* Ensure we requeue to the expected futex for requeue_pi. */ + if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) { + ret = -EINVAL; + break; + } + /* * Requeue nr_requeue waiters and possibly one more in the case * of requeue_pi if we couldn't acquire the lock atomically. @@ -1290,6 +1334,7 @@ retry_private: if (ret == 1) { /* We got the lock. */ requeue_pi_wake_futex(this, &key2, hb2); + drop_count++; continue; } else if (ret) { /* -EDEADLK */ @@ -1625,17 +1670,8 @@ out: static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, struct hrtimer_sleeper *timeout) { - queue_me(q, hb); - - /* - * There might have been scheduling since the queue_me(), as we - * cannot hold a spinlock across the get_user() in case it - * faults, and we cannot just set TASK_INTERRUPTIBLE state when - * queueing ourselves into the futex hash. This code thus has to - * rely on the futex_wake() code removing us from hash when it - * wakes us up. - */ set_current_state(TASK_INTERRUPTIBLE); + queue_me(q, hb); /* Arm the timer */ if (timeout) { @@ -1645,8 +1681,8 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, } /* - * !plist_node_empty() is safe here without any lock. - * q.lock_ptr != 0 is not safe, because of ordering against wakeup. + * If we have been removed from the hash list, then another task + * has tried to wake us, and we can skip the call to schedule(). */ if (likely(!plist_node_empty(&q->list))) { /* @@ -1751,6 +1787,7 @@ static int futex_wait(u32 __user *uaddr, int fshared, q.pi_state = NULL; q.bitset = bitset; q.rt_waiter = NULL; + q.requeue_pi_key = NULL; if (abs_time) { to = &timeout; @@ -1762,6 +1799,7 @@ static int futex_wait(u32 __user *uaddr, int fshared, current->timer_slack_ns); } +retry: /* Prepare to wait on uaddr. */ ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); if (ret) @@ -1779,9 +1817,14 @@ static int futex_wait(u32 __user *uaddr, int fshared, goto out_put_key; /* - * We expect signal_pending(current), but another thread may - * have handled it for us already. + * We expect signal_pending(current), but we might be the + * victim of a spurious wakeup as well. */ + if (!signal_pending(current)) { + put_futex_key(fshared, &q.key); + goto retry; + } + ret = -ERESTARTSYS; if (!abs_time) goto out_put_key; @@ -1858,6 +1901,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, q.pi_state = NULL; q.rt_waiter = NULL; + q.requeue_pi_key = NULL; retry: q.key = FUTEX_KEY_INIT; ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE); @@ -1930,7 +1974,7 @@ retry_private: /* Unqueue and drop the lock */ unqueue_me_pi(&q); - goto out; + goto out_put_key; out_unlock_put_key: queue_unlock(&q, hb); @@ -2087,11 +2131,12 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, * Unqueue the futex_q and determine which it was. */ plist_del(&q->list, &q->list.plist); - drop_futex_key_refs(&q->key); + /* Handle spurious wakeups gracefully */ + ret = -EWOULDBLOCK; if (timeout && !timeout->task) ret = -ETIMEDOUT; - else + else if (signal_pending(current)) ret = -ERESTARTNOINTR; } return ret; @@ -2169,15 +2214,16 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, debug_rt_mutex_init_waiter(&rt_waiter); rt_waiter.task = NULL; - q.pi_state = NULL; - q.bitset = bitset; - q.rt_waiter = &rt_waiter; - key2 = FUTEX_KEY_INIT; ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE); if (unlikely(ret != 0)) goto out; + q.pi_state = NULL; + q.bitset = bitset; + q.rt_waiter = &rt_waiter; + q.requeue_pi_key = &key2; + /* Prepare to wait on uaddr. */ ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); if (ret) diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index e8108a1a0eaf..929ecec50913 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -369,6 +369,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) irqreturn_t ret, retval = IRQ_NONE; unsigned int status = 0; + if (!(action->flags & IRQF_DISABLED)) + local_irq_enable_in_hardirq(); + do { trace_irq_handler_entry(irq, action); ret = action->handler(irq, action->dev_id); diff --git a/kernel/module.c b/kernel/module.c index 2d537186191f..cda4d7667d70 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1179,7 +1179,8 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, /* Count loaded sections and allocate structures */ for (i = 0; i < nsect; i++) - if (sechdrs[i].sh_flags & SHF_ALLOC) + if (sechdrs[i].sh_flags & SHF_ALLOC + && sechdrs[i].sh_size) nloaded++; size[0] = ALIGN(sizeof(*sect_attrs) + nloaded * sizeof(sect_attrs->attrs[0]), @@ -1199,6 +1200,8 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, for (i = 0; i < nsect; i++) { if (! (sechdrs[i].sh_flags & SHF_ALLOC)) continue; + if (!sechdrs[i].sh_size) + continue; sattr->address = sechdrs[i].sh_addr; sattr->name = kstrdup(secstrings + sechdrs[i].sh_name, GFP_KERNEL); diff --git a/kernel/params.c b/kernel/params.c index 7f6912ced2ba..f04a1e70c18b 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -217,15 +217,11 @@ int param_set_charp(const char *val, struct kernel_param *kp) return -ENOSPC; } - if (kp->flags & KPARAM_KMALLOCED) - kfree(*(char **)kp->arg); - /* This is a hack. We can't need to strdup in early boot, and we * don't need to; this mangled commandline is preserved. */ if (slab_is_available()) { - kp->flags |= KPARAM_KMALLOCED; *(char **)kp->arg = kstrdup(val, GFP_KERNEL); - if (!kp->arg) + if (!*(char **)kp->arg) return -ENOMEM; } else *(const char **)kp->arg = val; @@ -303,6 +299,7 @@ static int param_array(const char *name, unsigned int min, unsigned int max, void *elem, int elemsize, int (*set)(const char *, struct kernel_param *kp), + u16 flags, unsigned int *num) { int ret; @@ -312,6 +309,7 @@ static int param_array(const char *name, /* Get the name right for errors. */ kp.name = name; kp.arg = elem; + kp.flags = flags; /* No equals sign? */ if (!val) { @@ -357,7 +355,8 @@ int param_array_set(const char *val, struct kernel_param *kp) unsigned int temp_num; return param_array(kp->name, val, 1, arr->max, arr->elem, - arr->elemsize, arr->set, arr->num ?: &temp_num); + arr->elemsize, arr->set, kp->flags, + arr->num ?: &temp_num); } int param_array_get(char *buffer, struct kernel_param *kp) @@ -604,11 +603,7 @@ void module_param_sysfs_remove(struct module *mod) void destroy_params(const struct kernel_param *params, unsigned num) { - unsigned int i; - - for (i = 0; i < num; i++) - if (params[i].flags & KPARAM_KMALLOCED) - kfree(*(char **)params[i].arg); + /* FIXME: This should free kmalloced charp parameters. It doesn't. */ } static void __init kernel_add_sysfs_param(const char *name, diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index d7cbc579fc80..237fd07a369f 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -469,7 +469,8 @@ static void update_counter_times(struct perf_counter *counter) struct perf_counter_context *ctx = counter->ctx; u64 run_end; - if (counter->state < PERF_COUNTER_STATE_INACTIVE) + if (counter->state < PERF_COUNTER_STATE_INACTIVE || + counter->group_leader->state < PERF_COUNTER_STATE_INACTIVE) return; counter->total_time_enabled = ctx->time - counter->tstamp_enabled; @@ -518,7 +519,7 @@ static void __perf_counter_disable(void *info) */ if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { update_context_time(ctx); - update_counter_times(counter); + update_group_times(counter); if (counter == counter->group_leader) group_sched_out(counter, cpuctx, ctx); else @@ -573,7 +574,7 @@ static void perf_counter_disable(struct perf_counter *counter) * in, so we can change the state safely. */ if (counter->state == PERF_COUNTER_STATE_INACTIVE) { - update_counter_times(counter); + update_group_times(counter); counter->state = PERF_COUNTER_STATE_OFF; } @@ -851,6 +852,27 @@ retry: } /* + * Put a counter into inactive state and update time fields. + * Enabling the leader of a group effectively enables all + * the group members that aren't explicitly disabled, so we + * have to update their ->tstamp_enabled also. + * Note: this works for group members as well as group leaders + * since the non-leader members' sibling_lists will be empty. + */ +static void __perf_counter_mark_enabled(struct perf_counter *counter, + struct perf_counter_context *ctx) +{ + struct perf_counter *sub; + + counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->tstamp_enabled = ctx->time - counter->total_time_enabled; + list_for_each_entry(sub, &counter->sibling_list, list_entry) + if (sub->state >= PERF_COUNTER_STATE_INACTIVE) + sub->tstamp_enabled = + ctx->time - sub->total_time_enabled; +} + +/* * Cross CPU call to enable a performance counter */ static void __perf_counter_enable(void *info) @@ -877,8 +899,7 @@ static void __perf_counter_enable(void *info) if (counter->state >= PERF_COUNTER_STATE_INACTIVE) goto unlock; - counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->tstamp_enabled = ctx->time - counter->total_time_enabled; + __perf_counter_mark_enabled(counter, ctx); /* * If the counter is in a group and isn't the group leader, @@ -971,11 +992,9 @@ static void perf_counter_enable(struct perf_counter *counter) * Since we have the lock this context can't be scheduled * in, so we can change the state safely. */ - if (counter->state == PERF_COUNTER_STATE_OFF) { - counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->tstamp_enabled = - ctx->time - counter->total_time_enabled; - } + if (counter->state == PERF_COUNTER_STATE_OFF) + __perf_counter_mark_enabled(counter, ctx); + out: spin_unlock_irq(&ctx->lock); } @@ -1344,7 +1363,7 @@ static void perf_ctx_adjust_freq(struct perf_counter_context *ctx) u64 interrupts, freq; spin_lock(&ctx->lock); - list_for_each_entry(counter, &ctx->counter_list, list_entry) { + list_for_each_entry_rcu(counter, &ctx->counter_list, event_entry) { if (counter->state != PERF_COUNTER_STATE_ACTIVE) continue; @@ -1479,9 +1498,7 @@ static void perf_counter_enable_on_exec(struct task_struct *task) counter->attr.enable_on_exec = 0; if (counter->state >= PERF_COUNTER_STATE_INACTIVE) continue; - counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->tstamp_enabled = - ctx->time - counter->total_time_enabled; + __perf_counter_mark_enabled(counter, ctx); enabled = 1; } @@ -4126,8 +4143,8 @@ done: static int perf_copy_attr(struct perf_counter_attr __user *uattr, struct perf_counter_attr *attr) { - int ret; u32 size; + int ret; if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0)) return -EFAULT; @@ -4152,25 +4169,26 @@ static int perf_copy_attr(struct perf_counter_attr __user *uattr, /* * If we're handed a bigger struct than we know of, - * ensure all the unknown bits are 0. + * ensure all the unknown bits are 0 - i.e. new + * user-space does not rely on any kernel feature + * extensions we dont know about yet. */ if (size > sizeof(*attr)) { - unsigned long val; - unsigned long __user *addr; - unsigned long __user *end; + unsigned char __user *addr; + unsigned char __user *end; + unsigned char val; - addr = PTR_ALIGN((void __user *)uattr + sizeof(*attr), - sizeof(unsigned long)); - end = PTR_ALIGN((void __user *)uattr + size, - sizeof(unsigned long)); + addr = (void __user *)uattr + sizeof(*attr); + end = (void __user *)uattr + size; - for (; addr < end; addr += sizeof(unsigned long)) { + for (; addr < end; addr++) { ret = get_user(val, addr); if (ret) return ret; if (val) goto err_size; } + size = sizeof(*attr); } ret = copy_from_user(attr, uattr, size); diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index d089d052c4a9..187d7a6d7761 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -524,14 +524,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, new_timer->it_id = (timer_t) new_timer_id; new_timer->it_clock = which_clock; new_timer->it_overrun = -1; - error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer)); - if (error) - goto out; - /* - * return the timer_id now. The next step is hard to - * back out if there is an error. - */ if (copy_to_user(created_timer_id, &new_timer_id, sizeof (new_timer_id))) { error = -EFAULT; @@ -562,6 +555,10 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, new_timer->sigq->info.si_tid = new_timer->it_id; new_timer->sigq->info.si_code = SI_TIMER; + error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer)); + if (error) + goto out; + spin_lock_irq(¤t->sighand->siglock); new_timer->it_signal = current->signal; list_add(&new_timer->list, ¤t->signal->posix_timers); diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index bd007aa74549..72067cbdb37f 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -119,73 +119,6 @@ config SUSPEND_FREEZER config HIBERNATION_NVS bool -config HAS_WAKELOCK - bool - -config HAS_EARLYSUSPEND - bool - -config WAKELOCK - bool "Wake lock" - depends on PM && RTC_CLASS - default n - select HAS_WAKELOCK - ---help--- - Enable wakelocks. When user space request a sleep state the - sleep request will be delayed until no wake locks are held. - -config WAKELOCK_STAT - bool "Wake lock stats" - depends on WAKELOCK - default y - ---help--- - Report wake lock stats in /proc/wakelocks - -config USER_WAKELOCK - bool "Userspace wake locks" - depends on WAKELOCK - default y - ---help--- - User-space wake lock api. Write "lockname" or "lockname timeout" - to /sys/power/wake_lock lock and if needed create a wake lock. - Write "lockname" to /sys/power/wake_unlock to unlock a user wake - lock. - -config EARLYSUSPEND - bool "Early suspend" - depends on WAKELOCK - default y - select HAS_EARLYSUSPEND - ---help--- - Call early suspend handlers when the user requested sleep state - changes. - -choice - prompt "User-space screen access" - default FB_EARLYSUSPEND if !FRAMEBUFFER_CONSOLE - default CONSOLE_EARLYSUSPEND - depends on HAS_EARLYSUSPEND - - config NO_USER_SPACE_SCREEN_ACCESS_CONTROL - bool "None" - - config CONSOLE_EARLYSUSPEND - bool "Console switch on early-suspend" - depends on HAS_EARLYSUSPEND && VT - ---help--- - Register early suspend handler to perform a console switch to - when user-space should stop drawing to the screen and a switch - back when it should resume. - - config FB_EARLYSUSPEND - bool "Sysfs interface" - depends on HAS_EARLYSUSPEND - ---help--- - Register early suspend handler that notifies and waits for - user-space through sysfs when user-space should stop drawing - to the screen and notifies user-space when it should resume. -endchoice - config HIBERNATION bool "Hibernation (aka 'suspend to disk')" depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 058ec85a98bd..c3b81c30e5d5 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -12,8 +12,3 @@ obj-$(CONFIG_HIBERNATION) += swsusp.o hibernate.o snapshot.o swap.o user.o obj-$(CONFIG_HIBERNATION_NVS) += hibernate_nvs.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o -obj-$(CONFIG_WAKELOCK) += wakelock.o -obj-$(CONFIG_USER_WAKELOCK) += userwakelock.o -obj-$(CONFIG_EARLYSUSPEND) += earlysuspend.o -obj-$(CONFIG_CONSOLE_EARLYSUSPEND) += consoleearlysuspend.o -obj-$(CONFIG_FB_EARLYSUSPEND) += fbearlysuspend.o diff --git a/kernel/power/main.c b/kernel/power/main.c index ef94a083302f..f710e36930cc 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -146,11 +146,7 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { #ifdef CONFIG_SUSPEND -#ifdef CONFIG_EARLYSUSPEND - suspend_state_t state = PM_SUSPEND_ON; -#else suspend_state_t state = PM_SUSPEND_STANDBY; -#endif const char * const *s; #endif char *p; @@ -172,15 +168,8 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, break; } if (state < PM_SUSPEND_MAX && *s) -#ifdef CONFIG_EARLYSUSPEND - if (state == PM_SUSPEND_ON || valid_state(state)) { - error = 0; - request_suspend_state(state); - } -#else error = enter_state(state); #endif -#endif Exit: return error ? error : n; @@ -213,11 +202,6 @@ pm_trace_store(struct kobject *kobj, struct kobj_attribute *attr, power_attr(pm_trace); #endif /* CONFIG_PM_TRACE */ -#ifdef CONFIG_USER_WAKELOCK -power_attr(wake_lock); -power_attr(wake_unlock); -#endif - static struct attribute * g[] = { &state_attr.attr, #ifdef CONFIG_PM_TRACE @@ -226,10 +210,6 @@ static struct attribute * g[] = { #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_PM_DEBUG) &pm_test_attr.attr, #endif -#ifdef CONFIG_USER_WAKELOCK - &wake_lock_attr.attr, - &wake_unlock_attr.attr, -#endif NULL, }; diff --git a/kernel/power/power.h b/kernel/power/power.h index a665c1e4e597..26d5a26f82e3 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -236,27 +236,3 @@ static inline void suspend_thaw_processes(void) { } #endif - -#ifdef CONFIG_WAKELOCK -/* kernel/power/wakelock.c */ -extern struct workqueue_struct *suspend_work_queue; -extern struct wake_lock main_wake_lock; -extern suspend_state_t requested_suspend_state; -#endif - -#ifdef CONFIG_USER_WAKELOCK -ssize_t wake_lock_show(struct kobject *kobj, struct kobj_attribute *attr, - char *buf); -ssize_t wake_lock_store(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t n); -ssize_t wake_unlock_show(struct kobject *kobj, struct kobj_attribute *attr, - char *buf); -ssize_t wake_unlock_store(struct kobject *kobj, struct kobj_attribute *attr, - const char *buf, size_t n); -#endif - -#ifdef CONFIG_EARLYSUSPEND -/* kernel/power/earlysuspend.c */ -void request_suspend_state(suspend_state_t state); -suspend_state_t get_suspend_state(void); -#endif diff --git a/kernel/power/process.c b/kernel/power/process.c index d366c35f8ba0..da2072d73811 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -13,7 +13,6 @@ #include <linux/module.h> #include <linux/syscalls.h> #include <linux/freezer.h> -#include <linux/wakelock.h> /* * Timeout for stopping processes @@ -37,7 +36,6 @@ static int try_to_freeze_tasks(bool sig_only) struct timeval start, end; u64 elapsed_csecs64; unsigned int elapsed_csecs; - unsigned int wakeup = 0; do_gettimeofday(&start); @@ -64,12 +62,6 @@ static int try_to_freeze_tasks(bool sig_only) } while_each_thread(g, p); read_unlock(&tasklist_lock); yield(); /* Yield is okay here */ -#ifdef CONFIG_WAKELOCK - if (todo && has_wake_lock(WAKE_LOCK_SUSPEND)) { - wakeup = 1; - break; - } -#endif if (time_after(jiffies, end_time)) break; } while (todo); @@ -85,18 +77,11 @@ static int try_to_freeze_tasks(bool sig_only) * and caller must call thaw_processes() if something fails), * but it cleans up leftover PF_FREEZE requests. */ - if(wakeup) { - printk("\n"); - printk(KERN_ERR "Freezing of %s aborted\n", - sig_only ? "user space " : "tasks "); - } - else { - printk("\n"); - printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds " - "(%d tasks refusing to freeze):\n", - elapsed_csecs / 100, elapsed_csecs % 100, todo); - show_state(); - } + printk("\n"); + printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds " + "(%d tasks refusing to freeze):\n", + elapsed_csecs / 100, elapsed_csecs % 100, todo); + show_state(); read_lock(&tasklist_lock); do_each_thread(g, p) { task_lock(p); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index cd515ba1c8ce..6f10dfc2d3e9 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -19,9 +19,6 @@ #include "power.h" const char *const pm_states[PM_SUSPEND_MAX] = { -#ifdef CONFIG_EARLYSUSPEND - [PM_SUSPEND_ON] = "on", -#endif [PM_SUSPEND_STANDBY] = "standby", [PM_SUSPEND_MEM] = "mem", }; diff --git a/kernel/sched.c b/kernel/sched.c index 1b59e265273b..81ede13d2238 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7945,6 +7945,7 @@ static cpumask_var_t cpu_isolated_map; /* Setup the mask of cpus configured for isolated domains */ static int __init isolated_cpu_setup(char *str) { + alloc_bootmem_cpumask_var(&cpu_isolated_map); cpulist_parse(str, cpu_isolated_map); return 1; } @@ -9383,13 +9384,15 @@ void __init sched_init(void) current->sched_class = &fair_sched_class; /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ - alloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); + zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); #ifdef CONFIG_SMP #ifdef CONFIG_NO_HZ - alloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); + zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); #endif - alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); + /* May be allocated at isolcpus cmdline parse time */ + if (cpu_isolated_map == NULL) + zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); #endif /* SMP */ perf_counter_init(); diff --git a/kernel/signal.c b/kernel/signal.c index 64c5deeaca5d..86464ce4ceba 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -939,7 +939,8 @@ static void print_fatal_signal(struct pt_regs *regs, int signr) for (i = 0; i < 16; i++) { unsigned char insn; - __get_user(insn, (unsigned char *)(regs->ip + i)); + if (get_user(insn, (unsigned char *)(regs->ip + i))) + break; printk("%02x ", insn); } } diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 620b58abdc32..0d809ae02d60 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -20,6 +20,8 @@ #include <linux/sysdev.h> #include <linux/tick.h> +#include "tick-internal.h" + /* The registered clock event devices */ static LIST_HEAD(clockevent_devices); static LIST_HEAD(clockevents_released); @@ -237,8 +239,9 @@ void clockevents_exchange_device(struct clock_event_device *old, */ void clockevents_notify(unsigned long reason, void *arg) { - struct list_head *node, *tmp; + struct clock_event_device *dev, *tmp; unsigned long flags; + int cpu; spin_lock_irqsave(&clockevents_lock, flags); clockevents_do_notify(reason, arg); @@ -249,8 +252,20 @@ void clockevents_notify(unsigned long reason, void *arg) * Unregister the clock event devices which were * released from the users in the notify chain. */ - list_for_each_safe(node, tmp, &clockevents_released) - list_del(node); + list_for_each_entry_safe(dev, tmp, &clockevents_released, list) + list_del(&dev->list); + /* + * Now check whether the CPU has left unused per cpu devices + */ + cpu = *((int *)arg); + list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) { + if (cpumask_test_cpu(cpu, dev->cpumask) && + cpumask_weight(dev->cpumask) == 1 && + !tick_is_broadcast_device(dev)) { + BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); + list_del(&dev->list); + } + } break; default: break; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index e0f59a21c061..89aed5933ed4 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -231,6 +231,13 @@ void tick_nohz_stop_sched_tick(int inidle) if (!inidle && !ts->inidle) goto end; + /* + * Set ts->inidle unconditionally. Even if the system did not + * switch to NOHZ mode the cpu frequency governers rely on the + * update of the idle time accounting in tick_nohz_start_idle(). + */ + ts->inidle = 1; + now = tick_nohz_start_idle(ts); /* @@ -248,8 +255,6 @@ void tick_nohz_stop_sched_tick(int inidle) if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) goto end; - ts->inidle = 1; - if (need_resched()) goto end; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 25edd5cc5935..52eb25189928 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1110,14 +1110,9 @@ static void ftrace_replace_code(int enable) failed = __ftrace_replace_code(rec, enable); if (failed) { rec->flags |= FTRACE_FL_FAILED; - if ((system_state == SYSTEM_BOOTING) || - !core_kernel_text(rec->ip)) { - ftrace_free_rec(rec); - } else { - ftrace_bug(failed, rec->ip); - /* Stop processing */ - return; - } + ftrace_bug(failed, rec->ip); + /* Stop processing */ + return; } } while_for_each_ftrace_rec(); } @@ -2801,19 +2796,17 @@ static int ftrace_convert_nops(struct module *mod, } #ifdef CONFIG_MODULES -void ftrace_release(void *start, void *end) +void ftrace_release_mod(struct module *mod) { struct dyn_ftrace *rec; struct ftrace_page *pg; - unsigned long s = (unsigned long)start; - unsigned long e = (unsigned long)end; - if (ftrace_disabled || !start || start == end) + if (ftrace_disabled) return; mutex_lock(&ftrace_lock); do_for_each_ftrace_rec(pg, rec) { - if ((rec->ip >= s) && (rec->ip < e)) { + if (within_module_core(rec->ip, mod)) { /* * rec->ip is changed in ftrace_free_rec() * It should not between s and e if record was freed. @@ -2845,9 +2838,7 @@ static int ftrace_module_notify(struct notifier_block *self, mod->num_ftrace_callsites); break; case MODULE_STATE_GOING: - ftrace_release(mod->ftrace_callsites, - mod->ftrace_callsites + - mod->num_ftrace_callsites); + ftrace_release_mod(mod); break; } diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index f32dc9d1ea7b..a6b0d73a0d9d 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -844,8 +844,9 @@ static void postfix_clear(struct filter_parse_state *ps) while (!list_empty(&ps->postfix)) { elt = list_first_entry(&ps->postfix, struct postfix_elt, list); - kfree(elt->operand); list_del(&elt->list); + kfree(elt->operand); + kfree(elt); } } diff --git a/kernel/user.c b/kernel/user.c index 2c000e7132ac..46d0165ca70c 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -330,9 +330,9 @@ done: */ static void free_user(struct user_struct *up, unsigned long flags) { - spin_unlock_irqrestore(&uidhash_lock, flags); INIT_DELAYED_WORK(&up->work, cleanup_user_struct); schedule_delayed_work(&up->work, msecs_to_jiffies(1000)); + spin_unlock_irqrestore(&uidhash_lock, flags); } #else /* CONFIG_USER_SCHED && CONFIG_SYSFS */ |