diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/exit.c | 8 | ||||
-rw-r--r-- | kernel/fork.c | 1 | ||||
-rw-r--r-- | kernel/power/user.c | 2 | ||||
-rw-r--r-- | kernel/printk.c | 4 | ||||
-rw-r--r-- | kernel/sched.c | 156 | ||||
-rw-r--r-- | kernel/timer.c | 8 | ||||
-rw-r--r-- | kernel/trace/trace.c | 10 | ||||
-rw-r--r-- | kernel/user.c | 1 | ||||
-rw-r--r-- | kernel/watchdog.c | 3 |
9 files changed, 174 insertions, 19 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index d72167d500cb..abc46d9cf5d8 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -95,6 +95,14 @@ static void __exit_signal(struct task_struct *tsk) sig->tty = NULL; } else { /* + * This can only happen if the caller is de_thread(). + * FIXME: this is the temporary hack, we should teach + * posix-cpu-timers to handle this case correctly. + */ + if (unlikely(has_group_leader_pid(tsk))) + posix_cpu_timers_exit_group(tsk); + + /* * If there is any task waiting for the group exit * then notify it: */ diff --git a/kernel/fork.c b/kernel/fork.c index f3d93ab730c6..b1962c287d3f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -288,6 +288,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) setup_thread_stack(tsk, orig); clear_user_return_notifier(tsk); + clear_tsk_need_resched(tsk); stackend = end_of_stack(tsk); *stackend = STACK_END_MAGIC; /* for overflow detection */ diff --git a/kernel/power/user.c b/kernel/power/user.c index 1b2ea31e6bd8..c36c3b9e8a84 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -137,7 +137,7 @@ static int snapshot_release(struct inode *inode, struct file *filp) free_all_swap_pages(data->swap); if (data->frozen) thaw_processes(); - pm_notifier_call_chain(data->mode == O_WRONLY ? + pm_notifier_call_chain(data->mode == O_RDONLY ? PM_POST_HIBERNATION : PM_POST_RESTORE); atomic_inc(&snapshot_device_available); diff --git a/kernel/printk.c b/kernel/printk.c index 48ce53f5bb0a..8d41a50af0db 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1131,13 +1131,15 @@ void printk_tick(void) int printk_needs_cpu(int cpu) { + if (unlikely(cpu_is_offline(cpu))) + printk_tick(); return per_cpu(printk_pending, cpu); } void wake_up_klogd(void) { if (waitqueue_active(&log_wait)) - __raw_get_cpu_var(printk_pending) = 1; + this_cpu_write(printk_pending, 1); } /** diff --git a/kernel/sched.c b/kernel/sched.c index 6da730388fc2..a2e90f022cdb 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -567,7 +567,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) * A queue event has occurred, and we're going to schedule. In * this case, we can save a useless back to back clock update. */ - if (test_tsk_need_resched(p)) + if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr)) rq->skip_clock_update = 1; } @@ -2968,6 +2968,15 @@ static long calc_load_fold_active(struct rq *this_rq) return delta; } +static unsigned long +calc_load(unsigned long load, unsigned long exp, unsigned long active) +{ + load *= exp; + load += active * (FIXED_1 - exp); + load += 1UL << (FSHIFT - 1); + return load >> FSHIFT; +} + #ifdef CONFIG_NO_HZ /* * For NO_HZ we delay the active fold to the next LOAD_FREQ update. @@ -2997,6 +3006,128 @@ static long calc_load_fold_idle(void) return delta; } + +/** + * fixed_power_int - compute: x^n, in O(log n) time + * + * @x: base of the power + * @frac_bits: fractional bits of @x + * @n: power to raise @x to. + * + * By exploiting the relation between the definition of the natural power + * function: x^n := x*x*...*x (x multiplied by itself for n times), and + * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i, + * (where: n_i \elem {0, 1}, the binary vector representing n), + * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is + * of course trivially computable in O(log_2 n), the length of our binary + * vector. + */ +static unsigned long +fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n) +{ + unsigned long result = 1UL << frac_bits; + + if (n) for (;;) { + if (n & 1) { + result *= x; + result += 1UL << (frac_bits - 1); + result >>= frac_bits; + } + n >>= 1; + if (!n) + break; + x *= x; + x += 1UL << (frac_bits - 1); + x >>= frac_bits; + } + + return result; +} + +/* + * a1 = a0 * e + a * (1 - e) + * + * a2 = a1 * e + a * (1 - e) + * = (a0 * e + a * (1 - e)) * e + a * (1 - e) + * = a0 * e^2 + a * (1 - e) * (1 + e) + * + * a3 = a2 * e + a * (1 - e) + * = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e) + * = a0 * e^3 + a * (1 - e) * (1 + e + e^2) + * + * ... + * + * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1] + * = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e) + * = a0 * e^n + a * (1 - e^n) + * + * [1] application of the geometric series: + * + * n 1 - x^(n+1) + * S_n := \Sum x^i = ------------- + * i=0 1 - x + */ +static unsigned long +calc_load_n(unsigned long load, unsigned long exp, + unsigned long active, unsigned int n) +{ + + return calc_load(load, fixed_power_int(exp, FSHIFT, n), active); +} + +/* + * NO_HZ can leave us missing all per-cpu ticks calling + * calc_load_account_active(), but since an idle CPU folds its delta into + * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold + * in the pending idle delta if our idle period crossed a load cycle boundary. + * + * Once we've updated the global active value, we need to apply the exponential + * weights adjusted to the number of cycles missed. + */ +static void calc_global_nohz(unsigned long ticks) +{ + long delta, active, n; + + if (time_before(jiffies, calc_load_update)) + return; + + /* + * If we crossed a calc_load_update boundary, make sure to fold + * any pending idle changes, the respective CPUs might have + * missed the tick driven calc_load_account_active() update + * due to NO_HZ. + */ + delta = calc_load_fold_idle(); + if (delta) + atomic_long_add(delta, &calc_load_tasks); + + /* + * If we were idle for multiple load cycles, apply them. + */ + if (ticks >= LOAD_FREQ) { + n = ticks / LOAD_FREQ; + + active = atomic_long_read(&calc_load_tasks); + active = active > 0 ? active * FIXED_1 : 0; + + avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); + avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); + avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); + + calc_load_update += n * LOAD_FREQ; + } + + /* + * Its possible the remainder of the above division also crosses + * a LOAD_FREQ period, the regular check in calc_global_load() + * which comes after this will take care of that. + * + * Consider us being 11 ticks before a cycle completion, and us + * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will + * age us 4 cycles, and the test in calc_global_load() will + * pick up the final one. + */ +} #else static void calc_load_account_idle(struct rq *this_rq) { @@ -3006,6 +3137,10 @@ static inline long calc_load_fold_idle(void) { return 0; } + +static void calc_global_nohz(unsigned long ticks) +{ +} #endif /** @@ -3023,24 +3158,17 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift) loads[2] = (avenrun[2] + offset) << shift; } -static unsigned long -calc_load(unsigned long load, unsigned long exp, unsigned long active) -{ - load *= exp; - load += active * (FIXED_1 - exp); - return load >> FSHIFT; -} - /* * calc_load - update the avenrun load estimates 10 ticks after the * CPUs have updated calc_load_tasks. */ -void calc_global_load(void) +void calc_global_load(unsigned long ticks) { - unsigned long upd = calc_load_update + 10; long active; - if (time_before(jiffies, upd)) + calc_global_nohz(ticks); + + if (time_before(jiffies, calc_load_update + 10)) return; active = atomic_long_read(&calc_load_tasks); @@ -3694,7 +3822,6 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev) { if (prev->se.on_rq) update_rq_clock(rq); - rq->skip_clock_update = 0; prev->sched_class->put_prev_task(rq, prev); } @@ -3756,7 +3883,6 @@ need_resched_nonpreemptible: hrtick_clear(rq); raw_spin_lock_irq(&rq->lock); - clear_tsk_need_resched(prev); switch_count = &prev->nivcsw; if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { @@ -3788,6 +3914,8 @@ need_resched_nonpreemptible: put_prev_task(rq, prev); next = pick_next_task(rq); + clear_tsk_need_resched(prev); + rq->skip_clock_update = 0; if (likely(prev != next)) { sched_info_switch(prev, next); diff --git a/kernel/timer.c b/kernel/timer.c index 97bf05baade7..102ad370dddb 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1252,6 +1252,12 @@ unsigned long get_next_timer_interrupt(unsigned long now) struct tvec_base *base = __get_cpu_var(tvec_bases); unsigned long expires; + /* + * Pretend that there is no timer pending if the cpu is offline. + * Possible pending timers will be migrated later to an active cpu. + */ + if (cpu_is_offline(smp_processor_id())) + return now + NEXT_TIMER_MAX_DELTA; spin_lock(&base->lock); if (time_before_eq(base->next_timer, base->timer_jiffies)) base->next_timer = __next_timer_interrupt(base); @@ -1316,7 +1322,7 @@ void do_timer(unsigned long ticks) { jiffies_64 += ticks; update_wall_time(); - calc_global_load(); + calc_global_load(ticks); } #ifdef __ARCH_WANT_SYS_ALARM diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9ec59f541156..7702f5aecd07 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2320,11 +2320,19 @@ tracing_write_stub(struct file *filp, const char __user *ubuf, return count; } +static loff_t tracing_seek(struct file *file, loff_t offset, int origin) +{ + if (file->f_mode & FMODE_READ) + return seq_lseek(file, offset, origin); + else + return 0; +} + static const struct file_operations tracing_fops = { .open = tracing_open, .read = seq_read, .write = tracing_write_stub, - .llseek = seq_lseek, + .llseek = tracing_seek, .release = tracing_release, }; diff --git a/kernel/user.c b/kernel/user.c index 7e72614b736d..8ce395f74d47 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -157,6 +157,7 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) spin_lock_irq(&uidhash_lock); up = uid_hash_find(uid, hashent); if (up) { + put_user_ns(ns); key_put(new->uid_keyring); key_put(new->session_keyring); kmem_cache_free(uid_cachep, new); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 7f9c3c52ecc1..e7f81f8bc43e 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -377,7 +377,8 @@ static int watchdog_nmi_enable(int cpu) goto out_save; } - printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event); + printk(KERN_ERR "NMI watchdog disabled for cpu%i: unable to create perf event: %ld\n", + cpu, PTR_ERR(event)); return -1; /* success path */ |