diff options
Diffstat (limited to 'kernel')
98 files changed, 650 insertions, 179 deletions
diff --git a/kernel/async.c b/kernel/async.c index 4c2843c0043e..80b74b88fefe 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -51,7 +51,7 @@ asynchronous and synchronous parts of the kernel. #include <linux/async.h> #include <linux/atomic.h> #include <linux/ktime.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/wait.h> #include <linux/sched.h> #include <linux/slab.h> diff --git a/kernel/audit.c b/kernel/audit.c index 0a1355ca3d79..09fae2677a45 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -45,7 +45,7 @@ #include <asm/types.h> #include <linux/atomic.h> #include <linux/mm.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/slab.h> #include <linux/err.h> #include <linux/kthread.h> diff --git a/kernel/auditsc.c b/kernel/auditsc.c index ce4b054acee5..47b7fc1ea893 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -48,7 +48,7 @@ #include <linux/fs.h> #include <linux/namei.h> #include <linux/mm.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/slab.h> #include <linux/mount.h> #include <linux/socket.h> diff --git a/kernel/capability.c b/kernel/capability.c index 283c529f8b1c..b463871a4e69 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -10,7 +10,7 @@ #include <linux/audit.h> #include <linux/capability.h> #include <linux/mm.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/security.h> #include <linux/syscalls.h> #include <linux/pid_namespace.h> diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index e691818d7e45..213c0351dad8 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -14,7 +14,7 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/slab.h> #include <linux/cgroup.h> #include <linux/fs.h> @@ -153,6 +153,13 @@ static void freezer_destroy(struct cgroup_subsys *ss, kfree(cgroup_freezer(cgroup)); } +/* task is frozen or will freeze immediately when next it gets woken */ +static bool is_task_frozen_enough(struct task_struct *task) +{ + return frozen(task) || + (task_is_stopped_or_traced(task) && freezing(task)); +} + /* * The call to cgroup_lock() in the freezer.state write method prevents * a write to that file racing against an attach, and hence the @@ -231,7 +238,7 @@ static void update_if_frozen(struct cgroup *cgroup, cgroup_iter_start(cgroup, &it); while ((task = cgroup_iter_next(cgroup, &it))) { ntotal++; - if (frozen(task)) + if (is_task_frozen_enough(task)) nfrozen++; } @@ -284,7 +291,7 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) while ((task = cgroup_iter_next(cgroup, &it))) { if (!freeze_task(task, true)) continue; - if (frozen(task)) + if (is_task_frozen_enough(task)) continue; if (!freezing(task) && !freezer_should_skip(task)) num_cant_freeze_now++; diff --git a/kernel/compat.c b/kernel/compat.c index e2435ee9993a..f346cedfe24d 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -21,6 +21,7 @@ #include <linux/unistd.h> #include <linux/security.h> #include <linux/timex.h> +#include <linux/export.h> #include <linux/migrate.h> #include <linux/posix-timers.h> #include <linux/times.h> diff --git a/kernel/cpu.c b/kernel/cpu.c index 12b7458f23b1..563f13609470 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -10,11 +10,12 @@ #include <linux/sched.h> #include <linux/unistd.h> #include <linux/cpu.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/kthread.h> #include <linux/stop_machine.h> #include <linux/mutex.h> #include <linux/gfp.h> +#include <linux/suspend.h> #ifdef CONFIG_SMP /* Serializes the updates to cpu_online_mask, cpu_present_mask */ @@ -476,6 +477,79 @@ static int alloc_frozen_cpus(void) return 0; } core_initcall(alloc_frozen_cpus); + +/* + * Prevent regular CPU hotplug from racing with the freezer, by disabling CPU + * hotplug when tasks are about to be frozen. Also, don't allow the freezer + * to continue until any currently running CPU hotplug operation gets + * completed. + * To modify the 'cpu_hotplug_disabled' flag, we need to acquire the + * 'cpu_add_remove_lock'. And this same lock is also taken by the regular + * CPU hotplug path and released only after it is complete. Thus, we + * (and hence the freezer) will block here until any currently running CPU + * hotplug operation gets completed. + */ +void cpu_hotplug_disable_before_freeze(void) +{ + cpu_maps_update_begin(); + cpu_hotplug_disabled = 1; + cpu_maps_update_done(); +} + + +/* + * When tasks have been thawed, re-enable regular CPU hotplug (which had been + * disabled while beginning to freeze tasks). + */ +void cpu_hotplug_enable_after_thaw(void) +{ + cpu_maps_update_begin(); + cpu_hotplug_disabled = 0; + cpu_maps_update_done(); +} + +/* + * When callbacks for CPU hotplug notifications are being executed, we must + * ensure that the state of the system with respect to the tasks being frozen + * or not, as reported by the notification, remains unchanged *throughout the + * duration* of the execution of the callbacks. + * Hence we need to prevent the freezer from racing with regular CPU hotplug. + * + * This synchronization is implemented by mutually excluding regular CPU + * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/ + * Hibernate notifications. + */ +static int +cpu_hotplug_pm_callback(struct notifier_block *nb, + unsigned long action, void *ptr) +{ + switch (action) { + + case PM_SUSPEND_PREPARE: + case PM_HIBERNATION_PREPARE: + cpu_hotplug_disable_before_freeze(); + break; + + case PM_POST_SUSPEND: + case PM_POST_HIBERNATION: + cpu_hotplug_enable_after_thaw(); + break; + + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + + +int cpu_hotplug_pm_sync_init(void) +{ + pm_notifier(cpu_hotplug_pm_callback, 0); + return 0; +} +core_initcall(cpu_hotplug_pm_sync_init); + #endif /* CONFIG_PM_SLEEP_SMP */ /** diff --git a/kernel/cpuset.c b/kernel/cpuset.c index ed0ff443f036..9fe58c46a426 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -37,7 +37,7 @@ #include <linux/mempolicy.h> #include <linux/mm.h> #include <linux/memory.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/pagemap.h> diff --git a/kernel/crash_dump.c b/kernel/crash_dump.c index 69ebf3380bac..c766ee54c0b1 100644 --- a/kernel/crash_dump.c +++ b/kernel/crash_dump.c @@ -2,7 +2,7 @@ #include <linux/crash_dump.h> #include <linux/init.h> #include <linux/errno.h> -#include <linux/module.h> +#include <linux/export.h> /* * If we have booted due to a crash, max_pfn will be a very low value. We need diff --git a/kernel/cred.c b/kernel/cred.c index bb55d052d858..5791612a4045 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -8,7 +8,7 @@ * as published by the Free Software Foundation; either version * 2 of the Licence, or (at your option) any later version. */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/cred.h> #include <linux/slab.h> #include <linux/sched.h> diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c index d9ca9aa481ec..8b68ce78ff17 100644 --- a/kernel/debug/kdb/kdb_debugger.c +++ b/kernel/debug/kdb/kdb_debugger.c @@ -11,6 +11,7 @@ #include <linux/kgdb.h> #include <linux/kdb.h> #include <linux/kdebug.h> +#include <linux/export.h> #include "kdb_private.h" #include "../debug_core.h" diff --git a/kernel/dma.c b/kernel/dma.c index f903189c5304..68a2306522c8 100644 --- a/kernel/dma.c +++ b/kernel/dma.c @@ -9,7 +9,7 @@ * [It also happened to remove the sizeof(char *) == sizeof(int) * assumption introduced because of those /proc/dma patches. -- Hennus] */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/spinlock.h> diff --git a/kernel/events/core.c b/kernel/events/core.c index e1253faa34dd..d3b9df5962c2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -25,6 +25,7 @@ #include <linux/reboot.h> #include <linux/vmstat.h> #include <linux/device.h> +#include <linux/export.h> #include <linux/vmalloc.h> #include <linux/hardirq.h> #include <linux/rculist.h> @@ -184,6 +185,9 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, static void update_context_time(struct perf_event_context *ctx); static u64 perf_event_time(struct perf_event *event); +static void ring_buffer_attach(struct perf_event *event, + struct ring_buffer *rb); + void __weak perf_event_print_debug(void) { } extern __weak const char *perf_pmu_name(void) @@ -2170,9 +2174,10 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, */ cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); - perf_event_sched_in(cpuctx, ctx, task); + if (ctx->nr_events) + cpuctx->task_ctx = ctx; - cpuctx->task_ctx = ctx; + perf_event_sched_in(cpuctx, cpuctx->task_ctx, task); perf_pmu_enable(ctx->pmu); perf_ctx_unlock(cpuctx, ctx); @@ -3189,12 +3194,33 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) struct ring_buffer *rb; unsigned int events = POLL_HUP; + /* + * Race between perf_event_set_output() and perf_poll(): perf_poll() + * grabs the rb reference but perf_event_set_output() overrides it. + * Here is the timeline for two threads T1, T2: + * t0: T1, rb = rcu_dereference(event->rb) + * t1: T2, old_rb = event->rb + * t2: T2, event->rb = new rb + * t3: T2, ring_buffer_detach(old_rb) + * t4: T1, ring_buffer_attach(rb1) + * t5: T1, poll_wait(event->waitq) + * + * To avoid this problem, we grab mmap_mutex in perf_poll() + * thereby ensuring that the assignment of the new ring buffer + * and the detachment of the old buffer appear atomic to perf_poll() + */ + mutex_lock(&event->mmap_mutex); + rcu_read_lock(); rb = rcu_dereference(event->rb); - if (rb) + if (rb) { + ring_buffer_attach(event, rb); events = atomic_xchg(&rb->poll, 0); + } rcu_read_unlock(); + mutex_unlock(&event->mmap_mutex); + poll_wait(file, &event->waitq, wait); return events; @@ -3495,6 +3521,49 @@ unlock: return ret; } +static void ring_buffer_attach(struct perf_event *event, + struct ring_buffer *rb) +{ + unsigned long flags; + + if (!list_empty(&event->rb_entry)) + return; + + spin_lock_irqsave(&rb->event_lock, flags); + if (!list_empty(&event->rb_entry)) + goto unlock; + + list_add(&event->rb_entry, &rb->event_list); +unlock: + spin_unlock_irqrestore(&rb->event_lock, flags); +} + +static void ring_buffer_detach(struct perf_event *event, + struct ring_buffer *rb) +{ + unsigned long flags; + + if (list_empty(&event->rb_entry)) + return; + + spin_lock_irqsave(&rb->event_lock, flags); + list_del_init(&event->rb_entry); + wake_up_all(&event->waitq); + spin_unlock_irqrestore(&rb->event_lock, flags); +} + +static void ring_buffer_wakeup(struct perf_event *event) +{ + struct ring_buffer *rb; + + rcu_read_lock(); + rb = rcu_dereference(event->rb); + list_for_each_entry_rcu(event, &rb->event_list, rb_entry) { + wake_up_all(&event->waitq); + } + rcu_read_unlock(); +} + static void rb_free_rcu(struct rcu_head *rcu_head) { struct ring_buffer *rb; @@ -3520,9 +3589,19 @@ static struct ring_buffer *ring_buffer_get(struct perf_event *event) static void ring_buffer_put(struct ring_buffer *rb) { + struct perf_event *event, *n; + unsigned long flags; + if (!atomic_dec_and_test(&rb->refcount)) return; + spin_lock_irqsave(&rb->event_lock, flags); + list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) { + list_del_init(&event->rb_entry); + wake_up_all(&event->waitq); + } + spin_unlock_irqrestore(&rb->event_lock, flags); + call_rcu(&rb->rcu_head, rb_free_rcu); } @@ -3545,6 +3624,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); vma->vm_mm->pinned_vm -= event->mmap_locked; rcu_assign_pointer(event->rb, NULL); + ring_buffer_detach(event, rb); mutex_unlock(&event->mmap_mutex); ring_buffer_put(rb); @@ -3699,7 +3779,7 @@ static const struct file_operations perf_fops = { void perf_event_wakeup(struct perf_event *event) { - wake_up_all(&event->waitq); + ring_buffer_wakeup(event); if (event->pending_kill) { kill_fasync(&event->fasync, SIGIO, event->pending_kill); @@ -5821,6 +5901,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, INIT_LIST_HEAD(&event->group_entry); INIT_LIST_HEAD(&event->event_entry); INIT_LIST_HEAD(&event->sibling_list); + INIT_LIST_HEAD(&event->rb_entry); + init_waitqueue_head(&event->waitq); init_irq_work(&event->pending, perf_pending_event); @@ -6027,6 +6109,8 @@ set: old_rb = event->rb; rcu_assign_pointer(event->rb, rb); + if (old_rb) + ring_buffer_detach(event, old_rb); ret = 0; unlock: mutex_unlock(&event->mmap_mutex); diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 09097dd8116c..64568a699375 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -22,6 +22,9 @@ struct ring_buffer { local_t lost; /* nr records lost */ long watermark; /* wakeup watermark */ + /* poll crap */ + spinlock_t event_lock; + struct list_head event_list; struct perf_event_mmap_page *user_page; void *data_pages[0]; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index a2a29205cc0f..7f3011c6b57f 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -209,6 +209,9 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) rb->writable = 1; atomic_set(&rb->refcount, 1); + + INIT_LIST_HEAD(&rb->event_list); + spin_lock_init(&rb->event_lock); } #ifndef CONFIG_PERF_USE_VMALLOC diff --git a/kernel/fork.c b/kernel/fork.c index 70d76191afb9..da4a6a10d088 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -162,7 +162,6 @@ static void account_kernel_stack(struct thread_info *ti, int account) void free_task(struct task_struct *tsk) { - prop_local_destroy_single(&tsk->dirties); account_kernel_stack(tsk->stack, -1); free_thread_info(tsk->stack); rt_mutex_debug_task_free(tsk); @@ -274,10 +273,6 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) tsk->stack = ti; - err = prop_local_init_single(&tsk->dirties); - if (err) - goto out; - setup_thread_stack(tsk, orig); clear_user_return_notifier(tsk); clear_tsk_need_resched(tsk); @@ -1299,6 +1294,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->pdeath_signal = 0; p->exit_state = 0; + p->nr_dirtied = 0; + p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); + /* * Ok, make it visible to the rest of the system. * We dont wake it up yet. diff --git a/kernel/freezer.c b/kernel/freezer.c index 66a594e8ad2f..7be56c534397 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -6,7 +6,7 @@ #include <linux/interrupt.h> #include <linux/suspend.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/syscalls.h> #include <linux/freezer.h> @@ -67,7 +67,7 @@ static void fake_signal_wake_up(struct task_struct *p) unsigned long flags; spin_lock_irqsave(&p->sighand->siglock, flags); - signal_wake_up(p, 1); + signal_wake_up(p, 0); spin_unlock_irqrestore(&p->sighand->siglock, flags); } diff --git a/kernel/futex.c b/kernel/futex.c index 1511dff0cfd6..ea87f4d2f455 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -55,7 +55,7 @@ #include <linux/pagemap.h> #include <linux/syscalls.h> #include <linux/signal.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/magic.h> #include <linux/pid.h> #include <linux/nsproxy.h> diff --git a/kernel/groups.c b/kernel/groups.c index 1cc476d52dd3..99b53d1eb7ea 100644 --- a/kernel/groups.c +++ b/kernel/groups.c @@ -2,7 +2,7 @@ * Supplementary group IDs */ #include <linux/cred.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/slab.h> #include <linux/security.h> #include <linux/syscalls.h> diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index a9205e32a059..ae34bf51682b 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -32,7 +32,7 @@ */ #include <linux/cpu.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/percpu.h> #include <linux/hrtimer.h> #include <linux/notifier.h> @@ -885,10 +885,13 @@ static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, unsigned long newstate, int reprogram) { + struct timerqueue_node *next_timer; if (!(timer->state & HRTIMER_STATE_ENQUEUED)) goto out; - if (&timer->node == timerqueue_getnext(&base->active)) { + next_timer = timerqueue_getnext(&base->active); + timerqueue_del(&base->active, &timer->node); + if (&timer->node == next_timer) { #ifdef CONFIG_HIGH_RES_TIMERS /* Reprogram the clock event device. if enabled */ if (reprogram && hrtimer_hres_active()) { @@ -901,7 +904,6 @@ static void __remove_hrtimer(struct hrtimer *timer, } #endif } - timerqueue_del(&base->active, &timer->node); if (!timerqueue_getnext(&base->active)) base->cpu_base->active_bases &= ~(1 << base->index); out: diff --git a/kernel/hung_task.c b/kernel/hung_task.c index ea640120ab86..8b1748d0172c 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -13,7 +13,7 @@ #include <linux/freezer.h> #include <linux/kthread.h> #include <linux/lockdep.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/sysctl.h> /* diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index 6cb7613e4bf4..c89295a8f668 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -6,6 +6,7 @@ #include <linux/io.h> #include <linux/irq.h> #include <linux/slab.h> +#include <linux/export.h> #include <linux/interrupt.h> #include <linux/kernel_stat.h> #include <linux/syscore_ops.h> diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 1550e8447a16..d86e254b95eb 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -9,7 +9,7 @@ */ #include <linux/irq.h> #include <linux/slab.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/interrupt.h> #include <linux/kernel_stat.h> #include <linux/radix-tree.h> diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 67ce837ae52c..1da999f5e746 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -623,8 +623,9 @@ static irqreturn_t irq_nested_primary_handler(int irq, void *dev_id) static int irq_wait_for_interrupt(struct irqaction *action) { + set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) { - set_current_state(TASK_INTERRUPTIBLE); if (test_and_clear_bit(IRQTF_RUNTHREAD, &action->thread_flags)) { @@ -632,7 +633,9 @@ static int irq_wait_for_interrupt(struct irqaction *action) return 0; } schedule(); + set_current_state(TASK_INTERRUPTIBLE); } + __set_current_state(TASK_RUNNING); return -1; } @@ -1596,7 +1599,7 @@ int request_percpu_irq(unsigned int irq, irq_handler_t handler, return -ENOMEM; action->handler = handler; - action->flags = IRQF_PERCPU; + action->flags = IRQF_PERCPU | IRQF_NO_SUSPEND; action->name = devname; action->percpu_dev_id = dev_id; diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index aa57d5da18c1..dc813a948be2 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -84,7 +84,9 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force) */ action = desc->action; if (!action || !(action->flags & IRQF_SHARED) || - (action->flags & __IRQF_TIMER) || !action->next) + (action->flags & __IRQF_TIMER) || + (action->handler(irq, action->dev_id) == IRQ_HANDLED) || + !action->next) goto out; /* Already running on another processor */ @@ -115,7 +117,7 @@ static int misrouted_irq(int irq) struct irq_desc *desc; int i, ok = 0; - if (atomic_inc_return(&irq_poll_active) == 1) + if (atomic_inc_return(&irq_poll_active) != 1) goto out; irq_poll_cpu = smp_processor_id(); diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 0e2cde4f380b..c3c46c72046e 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -6,9 +6,11 @@ */ #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/irq_work.h> +#include <linux/percpu.h> #include <linux/hardirq.h> +#include <asm/processor.h> /* * An entry can be in one of four states: diff --git a/kernel/jump_label.c b/kernel/jump_label.c index a8ce45097f3d..66ff7109f697 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -66,8 +66,9 @@ void jump_label_inc(struct jump_label_key *key) return; jump_label_lock(); - if (atomic_add_return(1, &key->enabled) == 1) + if (atomic_read(&key->enabled) == 0) jump_label_update(key, JUMP_LABEL_ENABLE); + atomic_inc(&key->enabled); jump_label_unlock(); } @@ -104,6 +105,18 @@ static int __jump_label_text_reserved(struct jump_entry *iter_start, return 0; } +/* + * Update code which is definitely not currently executing. + * Architectures which need heavyweight synchronization to modify + * running code can override this to make the non-live update case + * cheaper. + */ +void __weak arch_jump_label_transform_static(struct jump_entry *entry, + enum jump_label_type type) +{ + arch_jump_label_transform(entry, type); +} + static void __jump_label_update(struct jump_label_key *key, struct jump_entry *entry, struct jump_entry *stop, int enable) @@ -121,14 +134,7 @@ static void __jump_label_update(struct jump_label_key *key, } } -/* - * Not all archs need this. - */ -void __weak arch_jump_label_text_poke_early(jump_label_t addr) -{ -} - -static __init int jump_label_init(void) +void __init jump_label_init(void) { struct jump_entry *iter_start = __start___jump_table; struct jump_entry *iter_stop = __stop___jump_table; @@ -139,22 +145,22 @@ static __init int jump_label_init(void) jump_label_sort_entries(iter_start, iter_stop); for (iter = iter_start; iter < iter_stop; iter++) { - arch_jump_label_text_poke_early(iter->code); - if (iter->key == (jump_label_t)(unsigned long)key) + struct jump_label_key *iterk; + + iterk = (struct jump_label_key *)(unsigned long)iter->key; + arch_jump_label_transform_static(iter, jump_label_enabled(iterk) ? + JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE); + if (iterk == key) continue; - key = (struct jump_label_key *)(unsigned long)iter->key; - atomic_set(&key->enabled, 0); + key = iterk; key->entries = iter; #ifdef CONFIG_MODULES key->next = NULL; #endif } jump_label_unlock(); - - return 0; } -early_initcall(jump_label_init); #ifdef CONFIG_MODULES @@ -212,7 +218,7 @@ void jump_label_apply_nops(struct module *mod) return; for (iter = iter_start; iter < iter_stop; iter++) - arch_jump_label_text_poke_early(iter->code); + arch_jump_label_transform_static(iter, JUMP_LABEL_DISABLE); } static int jump_label_add_module(struct module *mod) diff --git a/kernel/kfifo.c b/kernel/kfifo.c index 01a0700e873f..c744b88c44e2 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c @@ -20,7 +20,7 @@ */ #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/slab.h> #include <linux/err.h> #include <linux/log2.h> diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 2f193d0ba7f2..e5d84644823b 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -36,7 +36,7 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/stddef.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/moduleloader.h> #include <linux/kallsyms.h> #include <linux/freezer.h> diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 3b053c04dd86..4e316e1acf58 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -11,10 +11,11 @@ #include <linux/kobject.h> #include <linux/string.h> #include <linux/sysfs.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/init.h> #include <linux/kexec.h> #include <linux/profile.h> +#include <linux/stat.h> #include <linux/sched.h> #include <linux/capability.h> diff --git a/kernel/kthread.c b/kernel/kthread.c index 4ba7cccb4994..b6d216a92639 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -12,7 +12,7 @@ #include <linux/cpuset.h> #include <linux/unistd.h> #include <linux/file.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/mutex.h> #include <linux/slab.h> #include <linux/freezer.h> diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 4ac8ebfcab59..a462b317f9a0 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -53,7 +53,7 @@ #include <linux/notifier.h> #include <linux/spinlock.h> #include <linux/proc_fs.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/sched.h> #include <linux/list.h> #include <linux/stacktrace.h> diff --git a/kernel/lockdep.c b/kernel/lockdep.c index e69434b070da..b2e08c932d91 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -44,6 +44,7 @@ #include <linux/stringify.h> #include <linux/bitops.h> #include <linux/gfp.h> +#include <linux/kmemcheck.h> #include <asm/sections.h> @@ -2948,7 +2949,12 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, void lockdep_init_map(struct lockdep_map *lock, const char *name, struct lock_class_key *key, int subclass) { - memset(lock, 0, sizeof(*lock)); + int i; + + kmemcheck_mark_initialized(lock, sizeof(*lock)); + + for (i = 0; i < NR_LOCKDEP_CACHING_CLASSES; i++) + lock->class_cache[i] = NULL; #ifdef CONFIG_LOCK_STAT lock->cpu = raw_smp_processor_id(); diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index 71edd2f60c02..91c32a0b612c 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -11,7 +11,7 @@ * Code for /proc/lockdep and /proc/lockdep_stats: * */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/kallsyms.h> diff --git a/kernel/module.c b/kernel/module.c index 93342d992f34..178333c48d1e 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -16,7 +16,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/moduleloader.h> #include <linux/ftrace_event.h> #include <linux/init.h> @@ -2487,6 +2487,9 @@ static int check_modinfo(struct module *mod, struct load_info *info) return -ENOEXEC; } + if (!get_modinfo(info, "intree")) + add_taint_module(mod, TAINT_OOT_MODULE); + if (get_modinfo(info, "staging")) { add_taint_module(mod, TAINT_CRAP); printk(KERN_WARNING "%s: module is from the staging directory," @@ -2878,8 +2881,7 @@ static struct module *load_module(void __user *umod, } /* This has to be done once we're sure module name is unique. */ - if (!mod->taints || mod->taints == (1U<<TAINT_CRAP)) - dynamic_debug_setup(info.debug, info.num_debug); + dynamic_debug_setup(info.debug, info.num_debug); /* Find duplicate symbols */ err = verify_export_symbols(mod); @@ -2915,8 +2917,7 @@ static struct module *load_module(void __user *umod, module_bug_cleanup(mod); ddebug: - if (!mod->taints || mod->taints == (1U<<TAINT_CRAP)) - dynamic_debug_remove(info.debug); + dynamic_debug_remove(info.debug); unlock: mutex_unlock(&module_mutex); synchronize_sched(); @@ -3257,6 +3258,8 @@ static char *module_flags(struct module *mod, char *buf) buf[bx++] = '('; if (mod->taints & (1 << TAINT_PROPRIETARY_MODULE)) buf[bx++] = 'P'; + else if (mod->taints & (1 << TAINT_OOT_MODULE)) + buf[bx++] = 'O'; if (mod->taints & (1 << TAINT_FORCED_MODULE)) buf[bx++] = 'F'; if (mod->taints & (1 << TAINT_CRAP)) diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c index 73da83aff418..7e3443fe1f48 100644 --- a/kernel/mutex-debug.c +++ b/kernel/mutex-debug.c @@ -14,7 +14,7 @@ */ #include <linux/mutex.h> #include <linux/delay.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/poison.h> #include <linux/sched.h> #include <linux/spinlock.h> diff --git a/kernel/mutex.c b/kernel/mutex.c index d607ed5dd441..89096dd8786f 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -19,7 +19,7 @@ */ #include <linux/mutex.h> #include <linux/sched.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/debug_locks.h> diff --git a/kernel/notifier.c b/kernel/notifier.c index 8d7b435806c9..2d5cc4ccff7f 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -1,6 +1,6 @@ #include <linux/kdebug.h> #include <linux/kprobes.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/notifier.h> #include <linux/rcupdate.h> #include <linux/vmalloc.h> diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 9aeab4b98c64..b576f7f14bc6 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -14,7 +14,7 @@ */ #include <linux/slab.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/nsproxy.h> #include <linux/init_task.h> #include <linux/mnt_namespace.h> diff --git a/kernel/padata.c b/kernel/padata.c index b91941df5e63..b45259931512 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -18,7 +18,7 @@ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/cpumask.h> #include <linux/err.h> #include <linux/cpu.h> diff --git a/kernel/panic.c b/kernel/panic.c index d7bb6974efb5..b26593604214 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -177,6 +177,7 @@ static const struct tnt tnts[] = { { TAINT_WARN, 'W', ' ' }, { TAINT_CRAP, 'C', ' ' }, { TAINT_FIRMWARE_WORKAROUND, 'I', ' ' }, + { TAINT_OOT_MODULE, 'O', ' ' }, }; /** @@ -194,6 +195,7 @@ static const struct tnt tnts[] = { * 'W' - Taint on warning. * 'C' - modules from drivers/staging are loaded. * 'I' - Working around severe firmware bug. + * 'O' - Out-of-tree module has been loaded. * * The string is overwritten by the next call to print_tainted(). */ diff --git a/kernel/params.c b/kernel/params.c index 821788947e40..65aae11eb93f 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -15,7 +15,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include <linux/moduleparam.h> +#include <linux/module.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> diff --git a/kernel/pid.c b/kernel/pid.c index 8cafe7e72ad2..fa5f72227e5f 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -27,7 +27,7 @@ */ #include <linux/mm.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/rculist.h> diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 4556182527f3..69185ae6b701 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -46,7 +46,7 @@ #include <linux/syscalls.h> #include <linux/wait.h> #include <linux/workqueue.h> -#include <linux/module.h> +#include <linux/export.h> /* * Management arrays for POSIX timers. Timers are kept in slab memory diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 1c53f7fad5f7..a6b0503574ee 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -9,6 +9,7 @@ * This file is released under the GPLv2. */ +#include <linux/export.h> #include <linux/suspend.h> #include <linux/syscalls.h> #include <linux/reboot.h> @@ -54,6 +55,8 @@ enum { static int hibernation_mode = HIBERNATION_SHUTDOWN; +static bool freezer_test_done; + static const struct platform_hibernation_ops *hibernation_ops; /** @@ -344,11 +347,24 @@ int hibernation_snapshot(int platform_mode) error = freeze_kernel_threads(); if (error) - goto Close; + goto Cleanup; + + if (hibernation_test(TEST_FREEZER) || + hibernation_testmode(HIBERNATION_TESTPROC)) { + + /* + * Indicate to the caller that we are returning due to a + * successful freezer test. + */ + freezer_test_done = true; + goto Cleanup; + } error = dpm_prepare(PMSG_FREEZE); - if (error) - goto Complete_devices; + if (error) { + dpm_complete(msg); + goto Cleanup; + } suspend_console(); pm_restrict_gfp_mask(); @@ -377,8 +393,6 @@ int hibernation_snapshot(int platform_mode) pm_restore_gfp_mask(); resume_console(); - - Complete_devices: dpm_complete(msg); Close: @@ -388,6 +402,10 @@ int hibernation_snapshot(int platform_mode) Recover_platform: platform_recover(platform_mode); goto Resume_devices; + + Cleanup: + swsusp_free(); + goto Close; } /** @@ -640,15 +658,13 @@ int hibernate(void) if (error) goto Finish; - if (hibernation_test(TEST_FREEZER)) - goto Thaw; - - if (hibernation_testmode(HIBERNATION_TESTPROC)) - goto Thaw; - error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); if (error) goto Thaw; + if (freezer_test_done) { + freezer_test_done = false; + goto Thaw; + } if (in_suspend) { unsigned int flags = 0; diff --git a/kernel/power/main.c b/kernel/power/main.c index a52e88425a31..36e0f0903c32 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -8,6 +8,7 @@ * */ +#include <linux/export.h> #include <linux/kobject.h> #include <linux/string.h> #include <linux/resume-trace.h> @@ -289,13 +290,14 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) break; } - if (state < PM_SUSPEND_MAX && *s) + if (state < PM_SUSPEND_MAX && *s) { error = enter_state(state); if (error) { suspend_stats.fail++; dpm_save_failed_errno(error); } else suspend_stats.success++; + } #endif Exit: diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 1c1797dd1d1d..995e3bd3417b 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -43,6 +43,7 @@ #include <linux/kernel.h> #include <linux/uaccess.h> +#include <linux/export.h> /* * locking rule: all changes to constraints or notifiers lists @@ -69,6 +70,7 @@ static struct pm_qos_constraints cpu_dma_constraints = { }; static struct pm_qos_object cpu_dma_pm_qos = { .constraints = &cpu_dma_constraints, + .name = "cpu_dma_latency", }; static BLOCKING_NOTIFIER_HEAD(network_lat_notifier); @@ -386,8 +388,7 @@ static int pm_qos_power_open(struct inode *inode, struct file *filp) pm_qos_add_request(req, pm_qos_class, PM_QOS_DEFAULT_VALUE); filp->private_data = req; - if (filp->private_data) - return 0; + return 0; } return -EPERM; } diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index fdd4263b995d..4953dc054c53 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -12,6 +12,7 @@ #include <linux/delay.h> #include <linux/errno.h> #include <linux/init.h> +#include <linux/kmod.h> #include <linux/console.h> #include <linux/cpu.h> #include <linux/syscalls.h> @@ -21,6 +22,7 @@ #include <linux/list.h> #include <linux/mm.h> #include <linux/slab.h> +#include <linux/export.h> #include <linux/suspend.h> #include <linux/syscore_ops.h> #include <trace/events/power.h> diff --git a/kernel/power/user.c b/kernel/power/user.c index 42ddbc6f0de6..6d8f535c2b88 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -12,6 +12,7 @@ #include <linux/suspend.h> #include <linux/syscalls.h> #include <linux/reboot.h> +#include <linux/kmod.h> #include <linux/string.h> #include <linux/device.h> #include <linux/miscdevice.h> diff --git a/kernel/printk.c b/kernel/printk.c index 1455a0d4eedd..7982a0a841ea 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1293,10 +1293,11 @@ again: raw_spin_lock(&logbuf_lock); if (con_start != log_end) retry = 1; + raw_spin_unlock_irqrestore(&logbuf_lock, flags); + if (retry && console_trylock()) goto again; - raw_spin_unlock_irqrestore(&logbuf_lock, flags); if (wake_klogd) wake_up_klogd(); } diff --git a/kernel/profile.c b/kernel/profile.c index 961b389fe52f..76b8e77773ee 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -13,7 +13,7 @@ * to resolve timer interrupt livelocks, William Irwin, Oracle, 2004 */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/profile.h> #include <linux/bootmem.h> #include <linux/notifier.h> diff --git a/kernel/ptrace.c b/kernel/ptrace.c index a70d2a5d8c7b..24d04477b257 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -8,7 +8,7 @@ */ #include <linux/capability.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/sched.h> #include <linux/errno.h> #include <linux/mm.h> diff --git a/kernel/range.c b/kernel/range.c index 37fa9b99ad58..9b8ae2d6ed68 100644 --- a/kernel/range.c +++ b/kernel/range.c @@ -1,7 +1,7 @@ /* * Range add and subtract */ -#include <linux/module.h> +#include <linux/kernel.h> #include <linux/init.h> #include <linux/sort.h> diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index ca0d23b6b3e8..c5b98e565aee 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -43,7 +43,7 @@ #include <linux/notifier.h> #include <linux/cpu.h> #include <linux/mutex.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/hardirq.h> #define CREATE_TRACE_POINTS diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index da775c87f27f..636af6d9c6e5 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -22,13 +22,12 @@ * For detailed explanation of Read-Copy Update mechanism see - * Documentation/RCU */ -#include <linux/moduleparam.h> #include <linux/completion.h> #include <linux/interrupt.h> #include <linux/notifier.h> #include <linux/rcupdate.h> #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/mutex.h> #include <linux/sched.h> #include <linux/types.h> diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 02aa7139861c..2b0484a5dc28 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -23,6 +23,7 @@ */ #include <linux/kthread.h> +#include <linux/module.h> #include <linux/debugfs.h> #include <linux/seq_file.h> diff --git a/kernel/rcutree.c b/kernel/rcutree.c index e234eb92a177..6b76d812740c 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -38,7 +38,7 @@ #include <linux/nmi.h> #include <linux/atomic.h> #include <linux/bitops.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/completion.h> #include <linux/moduleparam.h> #include <linux/percpu.h> diff --git a/kernel/relay.c b/kernel/relay.c index 859ea5a9605f..226fade4d727 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -15,7 +15,7 @@ #include <linux/errno.h> #include <linux/stddef.h> #include <linux/slab.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/string.h> #include <linux/relay.h> #include <linux/vmalloc.h> diff --git a/kernel/resource.c b/kernel/resource.c index c8dc249da5ce..7640b3a947d0 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -7,7 +7,7 @@ * Arbitrary resource management. */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/errno.h> #include <linux/ioport.h> #include <linux/init.h> diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c index a2e7e7210f3e..8eafd1bd273e 100644 --- a/kernel/rtmutex-debug.c +++ b/kernel/rtmutex-debug.c @@ -18,7 +18,7 @@ */ #include <linux/sched.h> #include <linux/delay.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/spinlock.h> #include <linux/kallsyms.h> #include <linux/syscalls.h> diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index 5c9ccd380966..3d9f31cd79e7 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c @@ -7,7 +7,7 @@ * */ #include <linux/kthread.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/sysdev.h> diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 5e8d9cce7470..f9d8482dd487 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c @@ -11,7 +11,7 @@ * See Documentation/rt-mutex-design.txt for details. */ #include <linux/spinlock.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/sched.h> #include <linux/timer.h> diff --git a/kernel/rwsem.c b/kernel/rwsem.c index 9f48f3d82e9b..b152f74f02de 100644 --- a/kernel/rwsem.c +++ b/kernel/rwsem.c @@ -7,7 +7,7 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/rwsem.h> #include <asm/system.h> diff --git a/kernel/sched.c b/kernel/sched.c index d87c6e5d4e8c..d6b149ccf925 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -71,6 +71,7 @@ #include <linux/ctype.h> #include <linux/ftrace.h> #include <linux/slab.h> +#include <linux/init_task.h> #include <asm/tlb.h> #include <asm/irq_regs.h> @@ -4810,6 +4811,9 @@ EXPORT_SYMBOL(wait_for_completion); * This waits for either a completion of a specific task to be signaled or for a * specified timeout to expire. The timeout is in jiffies. It is not * interruptible. + * + * The return value is 0 if timed out, and positive (at least 1, or number of + * jiffies left till timeout) if completed. */ unsigned long __sched wait_for_completion_timeout(struct completion *x, unsigned long timeout) @@ -4824,6 +4828,8 @@ EXPORT_SYMBOL(wait_for_completion_timeout); * * This waits for completion of a specific task to be signaled. It is * interruptible. + * + * The return value is -ERESTARTSYS if interrupted, 0 if completed. */ int __sched wait_for_completion_interruptible(struct completion *x) { @@ -4841,6 +4847,9 @@ EXPORT_SYMBOL(wait_for_completion_interruptible); * * This waits for either a completion of a specific task to be signaled or for a * specified timeout to expire. It is interruptible. The timeout is in jiffies. + * + * The return value is -ERESTARTSYS if interrupted, 0 if timed out, + * positive (at least 1, or number of jiffies left till timeout) if completed. */ long __sched wait_for_completion_interruptible_timeout(struct completion *x, @@ -4856,6 +4865,8 @@ EXPORT_SYMBOL(wait_for_completion_interruptible_timeout); * * This waits to be signaled for completion of a specific task. It can be * interrupted by a kill signal. + * + * The return value is -ERESTARTSYS if interrupted, 0 if completed. */ int __sched wait_for_completion_killable(struct completion *x) { @@ -4874,6 +4885,9 @@ EXPORT_SYMBOL(wait_for_completion_killable); * This waits for either a completion of a specific task to be * signaled or for a specified timeout to expire. It can be * interrupted by a kill signal. The timeout is in jiffies. + * + * The return value is -ERESTARTSYS if interrupted, 0 if timed out, + * positive (at least 1, or number of jiffies left till timeout) if completed. */ long __sched wait_for_completion_killable_timeout(struct completion *x, @@ -6099,6 +6113,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) */ idle->sched_class = &idle_sched_class; ftrace_graph_init_idle_task(idle, cpu); +#if defined(CONFIG_SMP) + sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); +#endif } /* @@ -7087,8 +7104,6 @@ static int __init isolated_cpu_setup(char *str) __setup("isolcpus=", isolated_cpu_setup); -#define SD_NODES_PER_DOMAIN 16 - #ifdef CONFIG_NUMA /** diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 9d8af0b3fb64..c685e31492df 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c @@ -62,7 +62,7 @@ */ #include <linux/spinlock.h> #include <linux/hardirq.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/percpu.h> #include <linux/ktime.h> #include <linux/sched.h> diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 5c9e67923b7c..a78ed2736ba7 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -772,19 +772,32 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) list_del_leaf_cfs_rq(cfs_rq); } +static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq) +{ + long tg_weight; + + /* + * Use this CPU's actual weight instead of the last load_contribution + * to gain a more accurate current total weight. See + * update_cfs_rq_load_contribution(). + */ + tg_weight = atomic_read(&tg->load_weight); + tg_weight -= cfs_rq->load_contribution; + tg_weight += cfs_rq->load.weight; + + return tg_weight; +} + static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) { - long load_weight, load, shares; + long tg_weight, load, shares; + tg_weight = calc_tg_weight(tg, cfs_rq); load = cfs_rq->load.weight; - load_weight = atomic_read(&tg->load_weight); - load_weight += load; - load_weight -= cfs_rq->load_contribution; - shares = (tg->shares * load); - if (load_weight) - shares /= load_weight; + if (tg_weight) + shares /= tg_weight; if (shares < MIN_SHARES) shares = MIN_SHARES; @@ -1743,7 +1756,7 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq) static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) { - if (!cfs_rq->runtime_enabled || !cfs_rq->nr_running) + if (!cfs_rq->runtime_enabled || cfs_rq->nr_running) return; __return_cfs_rq_runtime(cfs_rq); @@ -2036,36 +2049,100 @@ static void task_waking_fair(struct task_struct *p) * Adding load to a group doesn't make a group heavier, but can cause movement * of group shares between cpus. Assuming the shares were perfectly aligned one * can calculate the shift in shares. + * + * Calculate the effective load difference if @wl is added (subtracted) to @tg + * on this @cpu and results in a total addition (subtraction) of @wg to the + * total group weight. + * + * Given a runqueue weight distribution (rw_i) we can compute a shares + * distribution (s_i) using: + * + * s_i = rw_i / \Sum rw_j (1) + * + * Suppose we have 4 CPUs and our @tg is a direct child of the root group and + * has 7 equal weight tasks, distributed as below (rw_i), with the resulting + * shares distribution (s_i): + * + * rw_i = { 2, 4, 1, 0 } + * s_i = { 2/7, 4/7, 1/7, 0 } + * + * As per wake_affine() we're interested in the load of two CPUs (the CPU the + * task used to run on and the CPU the waker is running on), we need to + * compute the effect of waking a task on either CPU and, in case of a sync + * wakeup, compute the effect of the current task going to sleep. + * + * So for a change of @wl to the local @cpu with an overall group weight change + * of @wl we can compute the new shares distribution (s'_i) using: + * + * s'_i = (rw_i + @wl) / (@wg + \Sum rw_j) (2) + * + * Suppose we're interested in CPUs 0 and 1, and want to compute the load + * differences in waking a task to CPU 0. The additional task changes the + * weight and shares distributions like: + * + * rw'_i = { 3, 4, 1, 0 } + * s'_i = { 3/8, 4/8, 1/8, 0 } + * + * We can then compute the difference in effective weight by using: + * + * dw_i = S * (s'_i - s_i) (3) + * + * Where 'S' is the group weight as seen by its parent. + * + * Therefore the effective change in loads on CPU 0 would be 5/56 (3/8 - 2/7) + * times the weight of the group. The effect on CPU 1 would be -4/56 (4/8 - + * 4/7) times the weight of the group. */ static long effective_load(struct task_group *tg, int cpu, long wl, long wg) { struct sched_entity *se = tg->se[cpu]; - if (!tg->parent) + if (!tg->parent) /* the trivial, non-cgroup case */ return wl; for_each_sched_entity(se) { - long lw, w; + long w, W; tg = se->my_q->tg; - w = se->my_q->load.weight; - /* use this cpu's instantaneous contribution */ - lw = atomic_read(&tg->load_weight); - lw -= se->my_q->load_contribution; - lw += w + wg; + /* + * W = @wg + \Sum rw_j + */ + W = wg + calc_tg_weight(tg, se->my_q); - wl += w; + /* + * w = rw_i + @wl + */ + w = se->my_q->load.weight + wl; - if (lw > 0 && wl < lw) - wl = (wl * tg->shares) / lw; + /* + * wl = S * s'_i; see (2) + */ + if (W > 0 && w < W) + wl = (w * tg->shares) / W; else wl = tg->shares; - /* zero point is MIN_SHARES */ + /* + * Per the above, wl is the new se->load.weight value; since + * those are clipped to [MIN_SHARES, ...) do so now. See + * calc_cfs_shares(). + */ if (wl < MIN_SHARES) wl = MIN_SHARES; + + /* + * wl = dw_i = S * (s'_i - s_i); see (3) + */ wl -= se->load.weight; + + /* + * Recursively apply this logic to all parent groups to compute + * the final effective load change on the root group. Since + * only the @tg group gets extra weight, all parent groups can + * only redistribute existing shares. @wl is the shift in shares + * resulting from this level per the above. + */ wg = 0; } @@ -2249,7 +2326,8 @@ static int select_idle_sibling(struct task_struct *p, int target) int cpu = smp_processor_id(); int prev_cpu = task_cpu(p); struct sched_domain *sd; - int i; + struct sched_group *sg; + int i, smt = 0; /* * If the task is going to be woken-up on this cpu and if it is @@ -2269,25 +2347,38 @@ static int select_idle_sibling(struct task_struct *p, int target) * Otherwise, iterate the domains and find an elegible idle cpu. */ rcu_read_lock(); +again: for_each_domain(target, sd) { - if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) - break; + if (!smt && (sd->flags & SD_SHARE_CPUPOWER)) + continue; - for_each_cpu_and(i, sched_domain_span(sd), tsk_cpus_allowed(p)) { - if (idle_cpu(i)) { - target = i; - break; + if (!(sd->flags & SD_SHARE_PKG_RESOURCES)) { + if (!smt) { + smt = 1; + goto again; } + break; } - /* - * Lets stop looking for an idle sibling when we reached - * the domain that spans the current cpu and prev_cpu. - */ - if (cpumask_test_cpu(cpu, sched_domain_span(sd)) && - cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) - break; + sg = sd->groups; + do { + if (!cpumask_intersects(sched_group_cpus(sg), + tsk_cpus_allowed(p))) + goto next; + + for_each_cpu(i, sched_group_cpus(sg)) { + if (!idle_cpu(i)) + goto next; + } + + target = cpumask_first_and(sched_group_cpus(sg), + tsk_cpus_allowed(p)); + goto done; +next: + sg = sg->next; + } while (sg != sd->groups); } +done: rcu_read_unlock(); return target; @@ -3511,7 +3602,7 @@ static bool update_sd_pick_busiest(struct sched_domain *sd, } /** - * update_sd_lb_stats - Update sched_group's statistics for load balancing. + * update_sd_lb_stats - Update sched_domain's statistics for load balancing. * @sd: sched_domain whose statistics are to be updated. * @this_cpu: Cpu for which load balance is currently performed. * @idle: Idle status of this_cpu diff --git a/kernel/sched_features.h b/kernel/sched_features.h index efa0a7b75dde..84802245abd2 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h @@ -67,3 +67,4 @@ SCHED_FEAT(NONTASK_POWER, 1) SCHED_FEAT(TTWU_QUEUE, 1) SCHED_FEAT(FORCE_SD_OVERLAP, 0) +SCHED_FEAT(RT_RUNTIME_SHARE, 1) diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 056cbd2e2a27..583a1368afe6 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -560,6 +560,9 @@ static int balance_runtime(struct rt_rq *rt_rq) { int more = 0; + if (!sched_feat(RT_RUNTIME_SHARE)) + return more; + if (rt_rq->rt_time > rt_rq->rt_runtime) { raw_spin_unlock(&rt_rq->rt_runtime_lock); more = do_balance_runtime(rt_rq); diff --git a/kernel/semaphore.c b/kernel/semaphore.c index d831841e55a7..60636a4e25c3 100644 --- a/kernel/semaphore.c +++ b/kernel/semaphore.c @@ -27,7 +27,7 @@ #include <linux/compiler.h> #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/sched.h> #include <linux/semaphore.h> #include <linux/spinlock.h> diff --git a/kernel/signal.c b/kernel/signal.c index d252be2d3de5..b3f78d09a105 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -11,7 +11,7 @@ */ #include <linux/slab.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/init.h> #include <linux/sched.h> #include <linux/fs.h> diff --git a/kernel/smp.c b/kernel/smp.c index fb67dfa8394e..db197d60489b 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -6,7 +6,7 @@ #include <linux/rcupdate.h> #include <linux/rculist.h> #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/percpu.h> #include <linux/init.h> #include <linux/gfp.h> diff --git a/kernel/softirq.c b/kernel/softirq.c index fca82c32042b..2c71d91efff0 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -10,7 +10,7 @@ * Remote softirq infrastructure is by Jens Axboe. */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/kernel_stat.h> #include <linux/interrupt.h> #include <linux/init.h> diff --git a/kernel/spinlock.c b/kernel/spinlock.c index be6517fb9c14..84c7d96918bf 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -19,7 +19,7 @@ #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/debug_locks.h> -#include <linux/module.h> +#include <linux/export.h> /* * If lockdep is enabled then we use the non-preemption spin-ops diff --git a/kernel/srcu.c b/kernel/srcu.c index 73ce23feaea9..0febf61e1aa3 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c @@ -24,7 +24,7 @@ * */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/mutex.h> #include <linux/percpu.h> #include <linux/preempt.h> diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c index d20c6983aad9..00fe55cc5a82 100644 --- a/kernel/stacktrace.c +++ b/kernel/stacktrace.c @@ -7,7 +7,7 @@ */ #include <linux/sched.h> #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/kallsyms.h> #include <linux/stacktrace.h> diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 5b0951aa0496..2f194e965715 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -12,7 +12,7 @@ #include <linux/cpu.h> #include <linux/init.h> #include <linux/kthread.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/percpu.h> #include <linux/sched.h> #include <linux/stop_machine.h> diff --git a/kernel/sys.c b/kernel/sys.c index d06c091e0345..481611fbd079 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -4,7 +4,7 @@ * Copyright (C) 1991, 1992 Linus Torvalds */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/mm.h> #include <linux/utsname.h> #include <linux/mman.h> @@ -12,6 +12,7 @@ #include <linux/prctl.h> #include <linux/highuid.h> #include <linux/fs.h> +#include <linux/kmod.h> #include <linux/perf_event.h> #include <linux/resource.h> #include <linux/kernel.h> diff --git a/kernel/time.c b/kernel/time.c index d77606214529..73e416db0a1e 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -27,7 +27,7 @@ * with nanosecond accuracy */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/timex.h> #include <linux/capability.h> #include <linux/clocksource.h> diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index c436e790b21b..8a46f5d64504 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -195,7 +195,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) struct alarm *alarm; ktime_t expired = next->expires; - if (expired.tv64 >= now.tv64) + if (expired.tv64 > now.tv64) break; alarm = container_of(next, struct alarm, node); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 1ecd6ba36d6c..c4eb71c8b2ea 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -387,6 +387,7 @@ void clockevents_exchange_device(struct clock_event_device *old, * released list and do a notify add later. */ if (old) { + old->event_handler = clockevents_handle_noop; clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); list_del(&old->list); list_add(&old->list, &clockevents_released); diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index cf52fda2e096..da2f760e780c 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -492,6 +492,22 @@ void clocksource_touch_watchdog(void) } /** + * clocksource_max_adjustment- Returns max adjustment amount + * @cs: Pointer to clocksource + * + */ +static u32 clocksource_max_adjustment(struct clocksource *cs) +{ + u64 ret; + /* + * We won't try to correct for more then 11% adjustments (110,000 ppm), + */ + ret = (u64)cs->mult * 11; + do_div(ret,100); + return (u32)ret; +} + +/** * clocksource_max_deferment - Returns max time the clocksource can be deferred * @cs: Pointer to clocksource * @@ -503,25 +519,28 @@ static u64 clocksource_max_deferment(struct clocksource *cs) /* * Calculate the maximum number of cycles that we can pass to the * cyc2ns function without overflowing a 64-bit signed result. The - * maximum number of cycles is equal to ULLONG_MAX/cs->mult which - * is equivalent to the below. - * max_cycles < (2^63)/cs->mult - * max_cycles < 2^(log2((2^63)/cs->mult)) - * max_cycles < 2^(log2(2^63) - log2(cs->mult)) - * max_cycles < 2^(63 - log2(cs->mult)) - * max_cycles < 1 << (63 - log2(cs->mult)) + * maximum number of cycles is equal to ULLONG_MAX/(cs->mult+cs->maxadj) + * which is equivalent to the below. + * max_cycles < (2^63)/(cs->mult + cs->maxadj) + * max_cycles < 2^(log2((2^63)/(cs->mult + cs->maxadj))) + * max_cycles < 2^(log2(2^63) - log2(cs->mult + cs->maxadj)) + * max_cycles < 2^(63 - log2(cs->mult + cs->maxadj)) + * max_cycles < 1 << (63 - log2(cs->mult + cs->maxadj)) * Please note that we add 1 to the result of the log2 to account for * any rounding errors, ensure the above inequality is satisfied and * no overflow will occur. */ - max_cycles = 1ULL << (63 - (ilog2(cs->mult) + 1)); + max_cycles = 1ULL << (63 - (ilog2(cs->mult + cs->maxadj) + 1)); /* * The actual maximum number of cycles we can defer the clocksource is * determined by the minimum of max_cycles and cs->mask. + * Note: Here we subtract the maxadj to make sure we don't sleep for + * too long if there's a large negative adjustment. */ max_cycles = min_t(u64, max_cycles, (u64) cs->mask); - max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult, cs->shift); + max_nsecs = clocksource_cyc2ns(max_cycles, cs->mult - cs->maxadj, + cs->shift); /* * To ensure that the clocksource does not wrap whilst we are idle, @@ -529,7 +548,7 @@ static u64 clocksource_max_deferment(struct clocksource *cs) * note a margin of 12.5% is used because this can be computed with * a shift, versus say 10% which would require division. */ - return max_nsecs - (max_nsecs >> 5); + return max_nsecs - (max_nsecs >> 3); } #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET @@ -640,7 +659,6 @@ static void clocksource_enqueue(struct clocksource *cs) void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) { u64 sec; - /* * Calc the maximum number of seconds which we can run before * wrapping around. For clocksources which have a mask > 32bit @@ -651,7 +669,7 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) * ~ 0.06ppm granularity for NTP. We apply the same 12.5% * margin as we do in clocksource_max_deferment() */ - sec = (cs->mask - (cs->mask >> 5)); + sec = (cs->mask - (cs->mask >> 3)); do_div(sec, freq); do_div(sec, scale); if (!sec) @@ -661,6 +679,20 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, NSEC_PER_SEC / scale, sec * scale); + + /* + * for clocksources that have large mults, to avoid overflow. + * Since mult may be adjusted by ntp, add an safety extra margin + * + */ + cs->maxadj = clocksource_max_adjustment(cs); + while ((cs->mult + cs->maxadj < cs->mult) + || (cs->mult - cs->maxadj > cs->mult)) { + cs->mult >>= 1; + cs->shift--; + cs->maxadj = clocksource_max_adjustment(cs); + } + cs->max_idle_ns = clocksource_max_deferment(cs); } EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); @@ -701,6 +733,12 @@ EXPORT_SYMBOL_GPL(__clocksource_register_scale); */ int clocksource_register(struct clocksource *cs) { + /* calculate max adjustment for given mult/shift */ + cs->maxadj = clocksource_max_adjustment(cs); + WARN_ONCE(cs->mult + cs->maxadj < cs->mult, + "Clocksource %s might overflow on 11%% adjustment\n", + cs->name); + /* calculate max idle time permitted for this clocksource */ cs->max_idle_ns = clocksource_max_deferment(cs); diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index c340ca658f37..ce033c7aa2e8 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -18,6 +18,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/device.h> +#include <linux/export.h> #include <linux/file.h> #include <linux/posix-clock.h> #include <linux/slab.h> diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index f954282d9a82..fd4a7b1625a2 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -71,7 +71,7 @@ int tick_check_broadcast_device(struct clock_event_device *dev) (dev->features & CLOCK_EVT_FEAT_C3STOP)) return 0; - clockevents_exchange_device(NULL, dev); + clockevents_exchange_device(tick_broadcast_device.evtdev, dev); tick_broadcast_device.evtdev = dev; if (!cpumask_empty(tick_get_broadcast_mask())) tick_broadcast_start_periodic(dev); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 2b021b0e8507..237841378c03 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -249,6 +249,8 @@ ktime_t ktime_get(void) secs = xtime.tv_sec + wall_to_monotonic.tv_sec; nsecs = xtime.tv_nsec + wall_to_monotonic.tv_nsec; nsecs += timekeeping_get_ns(); + /* If arch requires, add in gettimeoffset() */ + nsecs += arch_gettimeoffset(); } while (read_seqretry(&xtime_lock, seq)); /* @@ -280,6 +282,8 @@ void ktime_get_ts(struct timespec *ts) *ts = xtime; tomono = wall_to_monotonic; nsecs = timekeeping_get_ns(); + /* If arch requires, add in gettimeoffset() */ + nsecs += arch_gettimeoffset(); } while (read_seqretry(&xtime_lock, seq)); @@ -802,14 +806,44 @@ static void timekeeping_adjust(s64 offset) s64 error, interval = timekeeper.cycle_interval; int adj; + /* + * The point of this is to check if the error is greater then half + * an interval. + * + * First we shift it down from NTP_SHIFT to clocksource->shifted nsecs. + * + * Note we subtract one in the shift, so that error is really error*2. + * This "saves" dividing(shifting) intererval twice, but keeps the + * (error > interval) comparision as still measuring if error is + * larger then half an interval. + * + * Note: It does not "save" on aggrivation when reading the code. + */ error = timekeeper.ntp_error >> (timekeeper.ntp_error_shift - 1); if (error > interval) { + /* + * We now divide error by 4(via shift), which checks if + * the error is greater then twice the interval. + * If it is greater, we need a bigadjust, if its smaller, + * we can adjust by 1. + */ error >>= 2; + /* + * XXX - In update_wall_time, we round up to the next + * nanosecond, and store the amount rounded up into + * the error. This causes the likely below to be unlikely. + * + * The properfix is to avoid rounding up by using + * the high precision timekeeper.xtime_nsec instead of + * xtime.tv_nsec everywhere. Fixing this will take some + * time. + */ if (likely(error <= interval)) adj = 1; else adj = timekeeping_bigadjust(error, &interval, &offset); } else if (error < -interval) { + /* See comment above, this is just switched for the negative */ error >>= 2; if (likely(error >= -interval)) { adj = -1; @@ -817,9 +851,65 @@ static void timekeeping_adjust(s64 offset) offset = -offset; } else adj = timekeeping_bigadjust(error, &interval, &offset); - } else + } else /* No adjustment needed */ return; + WARN_ONCE(timekeeper.clock->maxadj && + (timekeeper.mult + adj > timekeeper.clock->mult + + timekeeper.clock->maxadj), + "Adjusting %s more then 11%% (%ld vs %ld)\n", + timekeeper.clock->name, (long)timekeeper.mult + adj, + (long)timekeeper.clock->mult + + timekeeper.clock->maxadj); + /* + * So the following can be confusing. + * + * To keep things simple, lets assume adj == 1 for now. + * + * When adj != 1, remember that the interval and offset values + * have been appropriately scaled so the math is the same. + * + * The basic idea here is that we're increasing the multiplier + * by one, this causes the xtime_interval to be incremented by + * one cycle_interval. This is because: + * xtime_interval = cycle_interval * mult + * So if mult is being incremented by one: + * xtime_interval = cycle_interval * (mult + 1) + * Its the same as: + * xtime_interval = (cycle_interval * mult) + cycle_interval + * Which can be shortened to: + * xtime_interval += cycle_interval + * + * So offset stores the non-accumulated cycles. Thus the current + * time (in shifted nanoseconds) is: + * now = (offset * adj) + xtime_nsec + * Now, even though we're adjusting the clock frequency, we have + * to keep time consistent. In other words, we can't jump back + * in time, and we also want to avoid jumping forward in time. + * + * So given the same offset value, we need the time to be the same + * both before and after the freq adjustment. + * now = (offset * adj_1) + xtime_nsec_1 + * now = (offset * adj_2) + xtime_nsec_2 + * So: + * (offset * adj_1) + xtime_nsec_1 = + * (offset * adj_2) + xtime_nsec_2 + * And we know: + * adj_2 = adj_1 + 1 + * So: + * (offset * adj_1) + xtime_nsec_1 = + * (offset * (adj_1+1)) + xtime_nsec_2 + * (offset * adj_1) + xtime_nsec_1 = + * (offset * adj_1) + offset + xtime_nsec_2 + * Canceling the sides: + * xtime_nsec_1 = offset + xtime_nsec_2 + * Which gives us: + * xtime_nsec_2 = xtime_nsec_1 - offset + * Which simplfies to: + * xtime_nsec -= offset + * + * XXX - TODO: Doc ntp_error calculation. + */ timekeeper.mult += adj; timekeeper.xtime_interval += interval; timekeeper.xtime_nsec -= offset; diff --git a/kernel/timer.c b/kernel/timer.c index 8cff36119e4d..9c3c62b0c4bc 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -20,7 +20,7 @@ */ #include <linux/kernel_stat.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/interrupt.h> #include <linux/percpu.h> #include <linux/init.h> @@ -1368,7 +1368,7 @@ SYSCALL_DEFINE0(getppid) int pid; rcu_read_lock(); - pid = task_tgid_vnr(current->real_parent); + pid = task_tgid_vnr(rcu_dereference(current->real_parent)); rcu_read_unlock(); return pid; diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 7c910a5593a6..16fc34a0806f 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -23,6 +23,7 @@ #include <linux/mutex.h> #include <linux/slab.h> #include <linux/debugfs.h> +#include <linux/export.h> #include <linux/time.h> #include <linux/uaccess.h> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 077d85387908..b1e8943fed1d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -22,6 +22,7 @@ #include <linux/hardirq.h> #include <linux/kthread.h> #include <linux/uaccess.h> +#include <linux/module.h> #include <linux/ftrace.h> #include <linux/sysctl.h> #include <linux/slab.h> @@ -151,7 +152,6 @@ void clear_ftrace_function(void) ftrace_pid_function = ftrace_stub; } -#undef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST #ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST /* * For those archs that do not test ftrace_trace_stop in their @@ -1211,7 +1211,9 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, if (!src->count) { free_ftrace_hash_rcu(*dst); rcu_assign_pointer(*dst, EMPTY_HASH); - return 0; + /* still need to update the function records */ + ret = 0; + goto out; } /* diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 581876f9f387..c212a7f934ec 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1078,7 +1078,6 @@ event_subsystem_dir(const char *name, struct dentry *d_events) /* First see if we did not already create this dir */ list_for_each_entry(system, &event_subsystems, list) { if (strcmp(system->name, name) == 0) { - __get_system(system); system->nr_events++; return system->entry; } diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 816d3d074979..95dc31efd6dd 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1649,7 +1649,9 @@ static int replace_system_preds(struct event_subsystem *system, */ err = replace_preds(call, NULL, ps, filter_string, true); if (err) - goto fail; + call->flags |= TRACE_EVENT_FL_NO_SET_FILTER; + else + call->flags &= ~TRACE_EVENT_FL_NO_SET_FILTER; } list_for_each_entry(call, &ftrace_events, list) { @@ -1658,6 +1660,9 @@ static int replace_system_preds(struct event_subsystem *system, if (strcmp(call->class->system, system->name) != 0) continue; + if (call->flags & TRACE_EVENT_FL_NO_SET_FILTER) + continue; + filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL); if (!filter_item) goto fail_mem; @@ -1686,7 +1691,7 @@ static int replace_system_preds(struct event_subsystem *system, * replace the filter for the call. */ filter = call->filter; - call->filter = filter_item->filter; + rcu_assign_pointer(call->filter, filter_item->filter); filter_item->filter = filter; fail = false; @@ -1741,7 +1746,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) filter = call->filter; if (!filter) goto out_unlock; - call->filter = NULL; + RCU_INIT_POINTER(call->filter, NULL); /* Make sure the filter is not being used */ synchronize_sched(); __free_filter(filter); @@ -1782,7 +1787,7 @@ out: * string */ tmp = call->filter; - call->filter = filter; + rcu_assign_pointer(call->filter, filter); if (tmp) { /* Make sure the call is done with the filter */ synchronize_sched(); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index ee7b5a0bb9f8..cb654542c1a1 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -2,6 +2,7 @@ #include <trace/events/syscalls.h> #include <linux/slab.h> #include <linux/kernel.h> +#include <linux/module.h> /* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */ #include <linux/ftrace.h> #include <linux/perf_event.h> #include <asm/syscall.h> diff --git a/kernel/up.c b/kernel/up.c index 1ff27a28bb7d..c54c75e9faf7 100644 --- a/kernel/up.c +++ b/kernel/up.c @@ -4,7 +4,7 @@ #include <linux/interrupt.h> #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/smp.h> int smp_call_function_single(int cpu, void (*func) (void *info), void *info, diff --git a/kernel/user-return-notifier.c b/kernel/user-return-notifier.c index 92cb706c7fc8..1744bb80f1fb 100644 --- a/kernel/user-return-notifier.c +++ b/kernel/user-return-notifier.c @@ -2,7 +2,7 @@ #include <linux/user-return-notifier.h> #include <linux/percpu.h> #include <linux/sched.h> -#include <linux/module.h> +#include <linux/export.h> static DEFINE_PER_CPU(struct hlist_head, return_notifier_list); diff --git a/kernel/user.c b/kernel/user.c index 9e03e9c1df8d..71dd2363ab0f 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -14,7 +14,7 @@ #include <linux/bitops.h> #include <linux/key.h> #include <linux/interrupt.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/user_namespace.h> /* diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 9da289c34f22..3b906e98b1db 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -5,7 +5,7 @@ * License. */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/nsproxy.h> #include <linux/slab.h> #include <linux/user_namespace.h> diff --git a/kernel/utsname.c b/kernel/utsname.c index bff131b9510a..405caf91aad5 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -9,7 +9,7 @@ * License. */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/uts.h> #include <linux/utsname.h> #include <linux/err.h> diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index 3b0d48ebf81d..63da38c2d820 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c @@ -9,7 +9,7 @@ * License. */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/uts.h> #include <linux/utsname.h> #include <linux/sysctl.h> diff --git a/kernel/wait.c b/kernel/wait.c index f45ea8d2a1ce..26fa7797f90f 100644 --- a/kernel/wait.c +++ b/kernel/wait.c @@ -4,7 +4,7 @@ * (C) 2004 William Irwin, Oracle */ #include <linux/init.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/sched.h> #include <linux/mm.h> #include <linux/wait.h> diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 1783aabc6128..42fa9ad0a810 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -23,7 +23,7 @@ * Please read Documentation/workqueue.txt for details. */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/init.h> |