summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c17
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/futex.c31
-rw-r--r--kernel/irq/proc.c2
-rw-r--r--kernel/latencytop.c17
-rw-r--r--kernel/perf_event.c30
-rw-r--r--kernel/power/hibernate.c22
-rw-r--r--kernel/power/suspend.c5
-rw-r--r--kernel/power/user.c4
-rw-r--r--kernel/printk.c4
-rw-r--r--kernel/sched.c181
-rw-r--r--kernel/timer.c8
-rw-r--r--kernel/trace/trace.c10
-rw-r--r--kernel/user.c1
-rw-r--r--kernel/watchdog.c3
15 files changed, 266 insertions, 70 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 03120229db28..abc46d9cf5d8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -95,6 +95,14 @@ static void __exit_signal(struct task_struct *tsk)
sig->tty = NULL;
} else {
/*
+ * This can only happen if the caller is de_thread().
+ * FIXME: this is the temporary hack, we should teach
+ * posix-cpu-timers to handle this case correctly.
+ */
+ if (unlikely(has_group_leader_pid(tsk)))
+ posix_cpu_timers_exit_group(tsk);
+
+ /*
* If there is any task waiting for the group exit
* then notify it:
*/
@@ -903,6 +911,15 @@ NORET_TYPE void do_exit(long code)
if (unlikely(!tsk->pid))
panic("Attempted to kill the idle task!");
+ /*
+ * If do_exit is called because this processes oopsed, it's possible
+ * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before
+ * continuing. Amongst other possible reasons, this is to prevent
+ * mm_release()->clear_child_tid() from writing to a user-controlled
+ * kernel address.
+ */
+ set_fs(USER_DS);
+
tracehook_report_exit(&code);
validate_creds_for_do_exit(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index f3d93ab730c6..b1962c287d3f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -288,6 +288,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
setup_thread_stack(tsk, orig);
clear_user_return_notifier(tsk);
+ clear_tsk_need_resched(tsk);
stackend = end_of_stack(tsk);
*stackend = STACK_END_MAGIC; /* for overflow detection */
diff --git a/kernel/futex.c b/kernel/futex.c
index 6a3a5fa1526d..e328f574c97c 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1363,7 +1363,6 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
{
struct futex_hash_bucket *hb;
- get_futex_key_refs(&q->key);
hb = hash_futex(&q->key);
q->lock_ptr = &hb->lock;
@@ -1375,7 +1374,6 @@ static inline void
queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
{
spin_unlock(&hb->lock);
- drop_futex_key_refs(&q->key);
}
/**
@@ -1480,8 +1478,6 @@ static void unqueue_me_pi(struct futex_q *q)
q->pi_state = NULL;
spin_unlock(q->lock_ptr);
-
- drop_futex_key_refs(&q->key);
}
/*
@@ -1812,7 +1808,10 @@ static int futex_wait(u32 __user *uaddr, int fshared,
}
retry:
- /* Prepare to wait on uaddr. */
+ /*
+ * Prepare to wait on uaddr. On success, holds hb lock and increments
+ * q.key refs.
+ */
ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
if (ret)
goto out;
@@ -1822,24 +1821,23 @@ retry:
/* If we were woken (and unqueued), we succeeded, whatever. */
ret = 0;
+ /* unqueue_me() drops q.key ref */
if (!unqueue_me(&q))
- goto out_put_key;
+ goto out;
ret = -ETIMEDOUT;
if (to && !to->task)
- goto out_put_key;
+ goto out;
/*
* We expect signal_pending(current), but we might be the
* victim of a spurious wakeup as well.
*/
- if (!signal_pending(current)) {
- put_futex_key(fshared, &q.key);
+ if (!signal_pending(current))
goto retry;
- }
ret = -ERESTARTSYS;
if (!abs_time)
- goto out_put_key;
+ goto out;
restart = &current_thread_info()->restart_block;
restart->fn = futex_wait_restart;
@@ -1856,8 +1854,6 @@ retry:
ret = -ERESTART_RESTARTBLOCK;
-out_put_key:
- put_futex_key(fshared, &q.key);
out:
if (to) {
hrtimer_cancel(&to->timer);
@@ -2236,7 +2232,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
q.rt_waiter = &rt_waiter;
q.requeue_pi_key = &key2;
- /* Prepare to wait on uaddr. */
+ /*
+ * Prepare to wait on uaddr. On success, increments q.key (key1) ref
+ * count.
+ */
ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
if (ret)
goto out_key2;
@@ -2254,7 +2253,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
* In order for us to be here, we know our q.key == key2, and since
* we took the hb->lock above, we also know that futex_requeue() has
* completed and we no longer have to concern ourselves with a wakeup
- * race with the atomic proxy lock acquition by the requeue code.
+ * race with the atomic proxy lock acquisition by the requeue code. The
+ * futex_requeue dropped our key1 reference and incremented our key2
+ * reference count.
*/
/* Check if the requeue code acquired the second futex for us. */
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 09a2ee540bd2..345e0b75fe1e 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -214,7 +214,7 @@ static int irq_spurious_proc_show(struct seq_file *m, void *v)
static int irq_spurious_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, irq_spurious_proc_show, NULL);
+ return single_open(file, irq_spurious_proc_show, PDE(inode)->data);
}
static const struct file_operations irq_spurious_proc_fops = {
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index 877fb306d415..17110a4a4fc2 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -194,14 +194,7 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
account_global_scheduler_latency(tsk, &lat);
- /*
- * short term hack; if we're > 32 we stop; future we recycle:
- */
- tsk->latency_record_count++;
- if (tsk->latency_record_count >= LT_SAVECOUNT)
- goto out_unlock;
-
- for (i = 0; i < LT_SAVECOUNT; i++) {
+ for (i = 0; i < tsk->latency_record_count; i++) {
struct latency_record *mylat;
int same = 1;
@@ -227,8 +220,14 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
}
}
+ /*
+ * short term hack; if we're > 32 we stop; future we recycle:
+ */
+ if (tsk->latency_record_count >= LT_SAVECOUNT)
+ goto out_unlock;
+
/* Allocated a new one: */
- i = tsk->latency_record_count;
+ i = tsk->latency_record_count++;
memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record));
out_unlock:
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index b98bed3d8182..65b09a836cc3 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1620,8 +1620,12 @@ static void rotate_ctx(struct perf_event_context *ctx)
{
raw_spin_lock(&ctx->lock);
- /* Rotate the first entry last of non-pinned groups */
- list_rotate_left(&ctx->flexible_groups);
+ /*
+ * Rotate the first entry last of non-pinned groups. Rotation might be
+ * disabled by the inheritance code.
+ */
+ if (!ctx->rotate_disable)
+ list_rotate_left(&ctx->flexible_groups);
raw_spin_unlock(&ctx->lock);
}
@@ -1773,7 +1777,13 @@ static u64 perf_event_read(struct perf_event *event)
unsigned long flags;
raw_spin_lock_irqsave(&ctx->lock, flags);
- update_context_time(ctx);
+ /*
+ * may read while context is not active
+ * (e.g., thread is blocked), in that case
+ * we cannot update context time
+ */
+ if (ctx->is_active)
+ update_context_time(ctx);
update_event_times(event);
raw_spin_unlock_irqrestore(&ctx->lock, flags);
}
@@ -5616,6 +5626,7 @@ int perf_event_init_task(struct task_struct *child)
struct perf_event *event;
struct task_struct *parent = current;
int inherited_all = 1;
+ unsigned long flags;
int ret = 0;
child->perf_event_ctxp = NULL;
@@ -5656,6 +5667,15 @@ int perf_event_init_task(struct task_struct *child)
break;
}
+ /*
+ * We can't hold ctx->lock when iterating the ->flexible_group list due
+ * to allocations, but we need to prevent rotation because
+ * rotate_ctx() will change the list from interrupt context.
+ */
+ raw_spin_lock_irqsave(&parent_ctx->lock, flags);
+ parent_ctx->rotate_disable = 1;
+ raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
+
list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
ret = inherit_task_group(event, parent, parent_ctx, child,
&inherited_all);
@@ -5663,6 +5683,10 @@ int perf_event_init_task(struct task_struct *child)
break;
}
+ raw_spin_lock_irqsave(&parent_ctx->lock, flags);
+ parent_ctx->rotate_disable = 0;
+ raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
+
child_ctx = child->perf_event_ctxp;
if (child_ctx && inherited_all) {
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 8dc31e02ae12..7a931a90e4a2 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -326,7 +326,6 @@ static int create_image(int platform_mode)
int hibernation_snapshot(int platform_mode)
{
int error;
- gfp_t saved_mask;
error = platform_begin(platform_mode);
if (error)
@@ -338,7 +337,7 @@ int hibernation_snapshot(int platform_mode)
goto Close;
suspend_console();
- saved_mask = clear_gfp_allowed_mask(GFP_IOFS);
+ pm_restrict_gfp_mask();
error = dpm_suspend_start(PMSG_FREEZE);
if (error)
goto Recover_platform;
@@ -347,7 +346,10 @@ int hibernation_snapshot(int platform_mode)
goto Recover_platform;
error = create_image(platform_mode);
- /* Control returns here after successful restore */
+ /*
+ * Control returns here (1) after the image has been created or the
+ * image creation has failed and (2) after a successful restore.
+ */
Resume_devices:
/* We may need to release the preallocated image pages here. */
@@ -356,7 +358,10 @@ int hibernation_snapshot(int platform_mode)
dpm_resume_end(in_suspend ?
(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
- set_gfp_allowed_mask(saved_mask);
+
+ if (error || !in_suspend)
+ pm_restore_gfp_mask();
+
resume_console();
Close:
platform_end(platform_mode);
@@ -451,17 +456,16 @@ static int resume_target_kernel(bool platform_mode)
int hibernation_restore(int platform_mode)
{
int error;
- gfp_t saved_mask;
pm_prepare_console();
suspend_console();
- saved_mask = clear_gfp_allowed_mask(GFP_IOFS);
+ pm_restrict_gfp_mask();
error = dpm_suspend_start(PMSG_QUIESCE);
if (!error) {
error = resume_target_kernel(platform_mode);
dpm_resume_end(PMSG_RECOVER);
}
- set_gfp_allowed_mask(saved_mask);
+ pm_restore_gfp_mask();
resume_console();
pm_restore_console();
return error;
@@ -475,7 +479,6 @@ int hibernation_restore(int platform_mode)
int hibernation_platform_enter(void)
{
int error;
- gfp_t saved_mask;
if (!hibernation_ops)
return -ENOSYS;
@@ -491,7 +494,6 @@ int hibernation_platform_enter(void)
entering_platform_hibernation = true;
suspend_console();
- saved_mask = clear_gfp_allowed_mask(GFP_IOFS);
error = dpm_suspend_start(PMSG_HIBERNATE);
if (error) {
if (hibernation_ops->recover)
@@ -535,7 +537,6 @@ int hibernation_platform_enter(void)
Resume_devices:
entering_platform_hibernation = false;
dpm_resume_end(PMSG_RESTORE);
- set_gfp_allowed_mask(saved_mask);
resume_console();
Close:
@@ -643,6 +644,7 @@ int hibernate(void)
swsusp_free();
if (!error)
power_down();
+ pm_restore_gfp_mask();
} else {
pr_debug("PM: Image restored successfully.\n");
}
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index e84f5b26f1dd..ac7147c7dd2e 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -200,7 +200,6 @@ static int suspend_enter(suspend_state_t state)
int suspend_devices_and_enter(suspend_state_t state)
{
int error;
- gfp_t saved_mask;
if (!suspend_ops)
return -ENOSYS;
@@ -211,7 +210,7 @@ int suspend_devices_and_enter(suspend_state_t state)
goto Close;
}
suspend_console();
- saved_mask = clear_gfp_allowed_mask(GFP_IOFS);
+ pm_restrict_gfp_mask();
suspend_test_start();
error = dpm_suspend_start(PMSG_SUSPEND);
if (error) {
@@ -228,7 +227,7 @@ int suspend_devices_and_enter(suspend_state_t state)
suspend_test_start();
dpm_resume_end(PMSG_RESUME);
suspend_test_finish("resume devices");
- set_gfp_allowed_mask(saved_mask);
+ pm_restore_gfp_mask();
resume_console();
Close:
if (suspend_ops->end)
diff --git a/kernel/power/user.c b/kernel/power/user.c
index e819e17877ca..c36c3b9e8a84 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -137,7 +137,7 @@ static int snapshot_release(struct inode *inode, struct file *filp)
free_all_swap_pages(data->swap);
if (data->frozen)
thaw_processes();
- pm_notifier_call_chain(data->mode == O_WRONLY ?
+ pm_notifier_call_chain(data->mode == O_RDONLY ?
PM_POST_HIBERNATION : PM_POST_RESTORE);
atomic_inc(&snapshot_device_available);
@@ -263,6 +263,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
case SNAPSHOT_UNFREEZE:
if (!data->frozen || data->ready)
break;
+ pm_restore_gfp_mask();
thaw_processes();
usermodehelper_enable();
data->frozen = 0;
@@ -275,6 +276,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
error = -EPERM;
break;
}
+ pm_restore_gfp_mask();
error = hibernation_snapshot(data->platform_support);
if (!error)
error = put_user(in_suspend, (int __user *)arg);
diff --git a/kernel/printk.c b/kernel/printk.c
index 48ce53f5bb0a..8d41a50af0db 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1131,13 +1131,15 @@ void printk_tick(void)
int printk_needs_cpu(int cpu)
{
+ if (unlikely(cpu_is_offline(cpu)))
+ printk_tick();
return per_cpu(printk_pending, cpu);
}
void wake_up_klogd(void)
{
if (waitqueue_active(&log_wait))
- __raw_get_cpu_var(printk_pending) = 1;
+ this_cpu_write(printk_pending, 1);
}
/**
diff --git a/kernel/sched.c b/kernel/sched.c
index 17c6925e2016..a2e90f022cdb 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -567,7 +567,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
* A queue event has occurred, and we're going to schedule. In
* this case, we can save a useless back to back clock update.
*/
- if (test_tsk_need_resched(p))
+ if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr))
rq->skip_clock_update = 1;
}
@@ -724,7 +724,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
char buf[64];
- char *cmp = buf;
+ char *cmp;
int neg = 0;
int i;
@@ -735,6 +735,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
return -EFAULT;
buf[cnt] = 0;
+ cmp = strstrip(buf);
if (strncmp(buf, "NO_", 3) == 0) {
neg = 1;
@@ -742,9 +743,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
}
for (i = 0; sched_feat_names[i]; i++) {
- int len = strlen(sched_feat_names[i]);
-
- if (strncmp(cmp, sched_feat_names[i], len) == 0) {
+ if (strcmp(cmp, sched_feat_names[i]) == 0) {
if (neg)
sysctl_sched_features &= ~(1UL << i);
else
@@ -1859,12 +1858,6 @@ static void dec_nr_running(struct rq *rq)
static void set_load_weight(struct task_struct *p)
{
- if (task_has_rt_policy(p)) {
- p->se.load.weight = 0;
- p->se.load.inv_weight = WMULT_CONST;
- return;
- }
-
/*
* SCHED_IDLE tasks get minimal weight:
*/
@@ -2975,6 +2968,15 @@ static long calc_load_fold_active(struct rq *this_rq)
return delta;
}
+static unsigned long
+calc_load(unsigned long load, unsigned long exp, unsigned long active)
+{
+ load *= exp;
+ load += active * (FIXED_1 - exp);
+ load += 1UL << (FSHIFT - 1);
+ return load >> FSHIFT;
+}
+
#ifdef CONFIG_NO_HZ
/*
* For NO_HZ we delay the active fold to the next LOAD_FREQ update.
@@ -3004,6 +3006,128 @@ static long calc_load_fold_idle(void)
return delta;
}
+
+/**
+ * fixed_power_int - compute: x^n, in O(log n) time
+ *
+ * @x: base of the power
+ * @frac_bits: fractional bits of @x
+ * @n: power to raise @x to.
+ *
+ * By exploiting the relation between the definition of the natural power
+ * function: x^n := x*x*...*x (x multiplied by itself for n times), and
+ * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
+ * (where: n_i \elem {0, 1}, the binary vector representing n),
+ * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
+ * of course trivially computable in O(log_2 n), the length of our binary
+ * vector.
+ */
+static unsigned long
+fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
+{
+ unsigned long result = 1UL << frac_bits;
+
+ if (n) for (;;) {
+ if (n & 1) {
+ result *= x;
+ result += 1UL << (frac_bits - 1);
+ result >>= frac_bits;
+ }
+ n >>= 1;
+ if (!n)
+ break;
+ x *= x;
+ x += 1UL << (frac_bits - 1);
+ x >>= frac_bits;
+ }
+
+ return result;
+}
+
+/*
+ * a1 = a0 * e + a * (1 - e)
+ *
+ * a2 = a1 * e + a * (1 - e)
+ * = (a0 * e + a * (1 - e)) * e + a * (1 - e)
+ * = a0 * e^2 + a * (1 - e) * (1 + e)
+ *
+ * a3 = a2 * e + a * (1 - e)
+ * = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e)
+ * = a0 * e^3 + a * (1 - e) * (1 + e + e^2)
+ *
+ * ...
+ *
+ * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1]
+ * = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e)
+ * = a0 * e^n + a * (1 - e^n)
+ *
+ * [1] application of the geometric series:
+ *
+ * n 1 - x^(n+1)
+ * S_n := \Sum x^i = -------------
+ * i=0 1 - x
+ */
+static unsigned long
+calc_load_n(unsigned long load, unsigned long exp,
+ unsigned long active, unsigned int n)
+{
+
+ return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
+}
+
+/*
+ * NO_HZ can leave us missing all per-cpu ticks calling
+ * calc_load_account_active(), but since an idle CPU folds its delta into
+ * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold
+ * in the pending idle delta if our idle period crossed a load cycle boundary.
+ *
+ * Once we've updated the global active value, we need to apply the exponential
+ * weights adjusted to the number of cycles missed.
+ */
+static void calc_global_nohz(unsigned long ticks)
+{
+ long delta, active, n;
+
+ if (time_before(jiffies, calc_load_update))
+ return;
+
+ /*
+ * If we crossed a calc_load_update boundary, make sure to fold
+ * any pending idle changes, the respective CPUs might have
+ * missed the tick driven calc_load_account_active() update
+ * due to NO_HZ.
+ */
+ delta = calc_load_fold_idle();
+ if (delta)
+ atomic_long_add(delta, &calc_load_tasks);
+
+ /*
+ * If we were idle for multiple load cycles, apply them.
+ */
+ if (ticks >= LOAD_FREQ) {
+ n = ticks / LOAD_FREQ;
+
+ active = atomic_long_read(&calc_load_tasks);
+ active = active > 0 ? active * FIXED_1 : 0;
+
+ avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
+ avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
+ avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
+
+ calc_load_update += n * LOAD_FREQ;
+ }
+
+ /*
+ * Its possible the remainder of the above division also crosses
+ * a LOAD_FREQ period, the regular check in calc_global_load()
+ * which comes after this will take care of that.
+ *
+ * Consider us being 11 ticks before a cycle completion, and us
+ * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will
+ * age us 4 cycles, and the test in calc_global_load() will
+ * pick up the final one.
+ */
+}
#else
static void calc_load_account_idle(struct rq *this_rq)
{
@@ -3013,6 +3137,10 @@ static inline long calc_load_fold_idle(void)
{
return 0;
}
+
+static void calc_global_nohz(unsigned long ticks)
+{
+}
#endif
/**
@@ -3030,24 +3158,17 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
loads[2] = (avenrun[2] + offset) << shift;
}
-static unsigned long
-calc_load(unsigned long load, unsigned long exp, unsigned long active)
-{
- load *= exp;
- load += active * (FIXED_1 - exp);
- return load >> FSHIFT;
-}
-
/*
* calc_load - update the avenrun load estimates 10 ticks after the
* CPUs have updated calc_load_tasks.
*/
-void calc_global_load(void)
+void calc_global_load(unsigned long ticks)
{
- unsigned long upd = calc_load_update + 10;
long active;
- if (time_before(jiffies, upd))
+ calc_global_nohz(ticks);
+
+ if (time_before(jiffies, calc_load_update + 10))
return;
active = atomic_long_read(&calc_load_tasks);
@@ -3701,7 +3822,6 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev)
{
if (prev->se.on_rq)
update_rq_clock(rq);
- rq->skip_clock_update = 0;
prev->sched_class->put_prev_task(rq, prev);
}
@@ -3763,7 +3883,6 @@ need_resched_nonpreemptible:
hrtick_clear(rq);
raw_spin_lock_irq(&rq->lock);
- clear_tsk_need_resched(prev);
switch_count = &prev->nivcsw;
if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
@@ -3795,6 +3914,8 @@ need_resched_nonpreemptible:
put_prev_task(rq, prev);
next = pick_next_task(rq);
+ clear_tsk_need_resched(prev);
+ rq->skip_clock_update = 0;
if (likely(prev != next)) {
sched_info_switch(prev, next);
@@ -5338,7 +5459,19 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
idle->se.exec_start = sched_clock();
cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
+ /*
+ * We're having a chicken and egg problem, even though we are
+ * holding rq->lock, the cpu isn't yet set to this cpu so the
+ * lockdep check in task_group() will fail.
+ *
+ * Similar case to sched_fork(). / Alternatively we could
+ * use task_rq_lock() here and obtain the other rq->lock.
+ *
+ * Silence PROVE_RCU
+ */
+ rcu_read_lock();
__set_task_cpu(idle, cpu);
+ rcu_read_unlock();
rq->curr = rq->idle = idle;
#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
diff --git a/kernel/timer.c b/kernel/timer.c
index 97bf05baade7..102ad370dddb 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1252,6 +1252,12 @@ unsigned long get_next_timer_interrupt(unsigned long now)
struct tvec_base *base = __get_cpu_var(tvec_bases);
unsigned long expires;
+ /*
+ * Pretend that there is no timer pending if the cpu is offline.
+ * Possible pending timers will be migrated later to an active cpu.
+ */
+ if (cpu_is_offline(smp_processor_id()))
+ return now + NEXT_TIMER_MAX_DELTA;
spin_lock(&base->lock);
if (time_before_eq(base->next_timer, base->timer_jiffies))
base->next_timer = __next_timer_interrupt(base);
@@ -1316,7 +1322,7 @@ void do_timer(unsigned long ticks)
{
jiffies_64 += ticks;
update_wall_time();
- calc_global_load();
+ calc_global_load(ticks);
}
#ifdef __ARCH_WANT_SYS_ALARM
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9ec59f541156..7702f5aecd07 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2320,11 +2320,19 @@ tracing_write_stub(struct file *filp, const char __user *ubuf,
return count;
}
+static loff_t tracing_seek(struct file *file, loff_t offset, int origin)
+{
+ if (file->f_mode & FMODE_READ)
+ return seq_lseek(file, offset, origin);
+ else
+ return 0;
+}
+
static const struct file_operations tracing_fops = {
.open = tracing_open,
.read = seq_read,
.write = tracing_write_stub,
- .llseek = seq_lseek,
+ .llseek = tracing_seek,
.release = tracing_release,
};
diff --git a/kernel/user.c b/kernel/user.c
index 7e72614b736d..8ce395f74d47 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -157,6 +157,7 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid)
spin_lock_irq(&uidhash_lock);
up = uid_hash_find(uid, hashent);
if (up) {
+ put_user_ns(ns);
key_put(new->uid_keyring);
key_put(new->session_keyring);
kmem_cache_free(uid_cachep, new);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 7f9c3c52ecc1..e7f81f8bc43e 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -377,7 +377,8 @@ static int watchdog_nmi_enable(int cpu)
goto out_save;
}
- printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event);
+ printk(KERN_ERR "NMI watchdog disabled for cpu%i: unable to create perf event: %ld\n",
+ cpu, PTR_ERR(event));
return -1;
/* success path */