diff options
author | Stefan Agner <stefan.agner@toradex.com> | 2015-10-23 15:25:54 -0700 |
---|---|---|
committer | Stefan Agner <stefan.agner@toradex.com> | 2015-10-23 15:25:54 -0700 |
commit | 0df341edfa5e7c119523a0c30146e88106f88b43 (patch) | |
tree | b8a8bfabaa488d70ce49a7941e974bab023cb2d0 /kernel | |
parent | 9ac0b253c59216c9614436b2006d0557396eb268 (diff) | |
parent | 205a8514e63a221c3a5071f27521013444e43e5f (diff) |
Merge tag 'v4.1.11' into toradex_vf_4.1-next
This is the 4.1.11 stable release
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 7 | ||||
-rw-r--r-- | kernel/cpuset.c | 2 | ||||
-rw-r--r-- | kernel/events/core.c | 99 | ||||
-rw-r--r-- | kernel/events/internal.h | 10 | ||||
-rw-r--r-- | kernel/events/ring_buffer.c | 37 | ||||
-rw-r--r-- | kernel/fork.c | 28 | ||||
-rw-r--r-- | kernel/irq/chip.c | 19 | ||||
-rw-r--r-- | kernel/irq/proc.c | 19 | ||||
-rw-r--r-- | kernel/sched/core.c | 33 | ||||
-rw-r--r-- | kernel/sched/fair.c | 25 | ||||
-rw-r--r-- | kernel/sched/sched.h | 5 | ||||
-rw-r--r-- | kernel/signal.c | 13 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 2 |
13 files changed, 216 insertions, 83 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e8a5491be756..4d65b66ae60d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1319,7 +1319,7 @@ static int cgroup_show_options(struct seq_file *seq, for_each_subsys(ss, ssid) if (root->subsys_mask & (1 << ssid)) - seq_printf(seq, ",%s", ss->name); + seq_show_option(seq, ss->name, NULL); if (root->flags & CGRP_ROOT_NOPREFIX) seq_puts(seq, ",noprefix"); if (root->flags & CGRP_ROOT_XATTR) @@ -1327,13 +1327,14 @@ static int cgroup_show_options(struct seq_file *seq, spin_lock(&release_agent_path_lock); if (strlen(root->release_agent_path)) - seq_printf(seq, ",release_agent=%s", root->release_agent_path); + seq_show_option(seq, "release_agent", + root->release_agent_path); spin_unlock(&release_agent_path_lock); if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags)) seq_puts(seq, ",clone_children"); if (strlen(root->name)) - seq_printf(seq, ",name=%s", root->name); + seq_show_option(seq, "name", root->name); return 0; } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index ee14e3a35a29..f0acff0f66c9 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1223,7 +1223,7 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, spin_unlock_irq(&callback_lock); /* use trialcs->mems_allowed as a temp variable */ - update_nodemasks_hier(cs, &cs->mems_allowed); + update_nodemasks_hier(cs, &trialcs->mems_allowed); done: return retval; } diff --git a/kernel/events/core.c b/kernel/events/core.c index 0ceb386777ae..e1af58e23bee 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1886,8 +1886,6 @@ event_sched_in(struct perf_event *event, perf_pmu_disable(event->pmu); - event->tstamp_running += tstamp - event->tstamp_stopped; - perf_set_shadow_time(event, ctx, tstamp); perf_log_itrace_start(event); @@ -1899,6 +1897,8 @@ event_sched_in(struct perf_event *event, goto out; } + event->tstamp_running += tstamp - event->tstamp_stopped; + if (!is_software_event(event)) cpuctx->active_oncpu++; if (!ctx->nr_active++) @@ -3976,28 +3976,21 @@ static void perf_event_for_each(struct perf_event *event, perf_event_for_each_child(sibling, func); } -static int perf_event_period(struct perf_event *event, u64 __user *arg) -{ - struct perf_event_context *ctx = event->ctx; - int ret = 0, active; +struct period_event { + struct perf_event *event; u64 value; +}; - if (!is_sampling_event(event)) - return -EINVAL; - - if (copy_from_user(&value, arg, sizeof(value))) - return -EFAULT; - - if (!value) - return -EINVAL; +static int __perf_event_period(void *info) +{ + struct period_event *pe = info; + struct perf_event *event = pe->event; + struct perf_event_context *ctx = event->ctx; + u64 value = pe->value; + bool active; - raw_spin_lock_irq(&ctx->lock); + raw_spin_lock(&ctx->lock); if (event->attr.freq) { - if (value > sysctl_perf_event_sample_rate) { - ret = -EINVAL; - goto unlock; - } - event->attr.sample_freq = value; } else { event->attr.sample_period = value; @@ -4016,11 +4009,53 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) event->pmu->start(event, PERF_EF_RELOAD); perf_pmu_enable(ctx->pmu); } + raw_spin_unlock(&ctx->lock); -unlock: + return 0; +} + +static int perf_event_period(struct perf_event *event, u64 __user *arg) +{ + struct period_event pe = { .event = event, }; + struct perf_event_context *ctx = event->ctx; + struct task_struct *task; + u64 value; + + if (!is_sampling_event(event)) + return -EINVAL; + + if (copy_from_user(&value, arg, sizeof(value))) + return -EFAULT; + + if (!value) + return -EINVAL; + + if (event->attr.freq && value > sysctl_perf_event_sample_rate) + return -EINVAL; + + task = ctx->task; + pe.value = value; + + if (!task) { + cpu_function_call(event->cpu, __perf_event_period, &pe); + return 0; + } + +retry: + if (!task_function_call(task, __perf_event_period, &pe)) + return 0; + + raw_spin_lock_irq(&ctx->lock); + if (ctx->is_active) { + raw_spin_unlock_irq(&ctx->lock); + task = ctx->task; + goto retry; + } + + __perf_event_period(&pe); raw_spin_unlock_irq(&ctx->lock); - return ret; + return 0; } static const struct file_operations perf_fops; @@ -4376,14 +4411,6 @@ static void ring_buffer_wakeup(struct perf_event *event) rcu_read_unlock(); } -static void rb_free_rcu(struct rcu_head *rcu_head) -{ - struct ring_buffer *rb; - - rb = container_of(rcu_head, struct ring_buffer, rcu_head); - rb_free(rb); -} - struct ring_buffer *ring_buffer_get(struct perf_event *event) { struct ring_buffer *rb; @@ -4766,12 +4793,20 @@ static const struct file_operations perf_fops = { * to user-space before waking everybody up. */ +static inline struct fasync_struct **perf_event_fasync(struct perf_event *event) +{ + /* only the parent has fasync state */ + if (event->parent) + event = event->parent; + return &event->fasync; +} + void perf_event_wakeup(struct perf_event *event) { ring_buffer_wakeup(event); if (event->pending_kill) { - kill_fasync(&event->fasync, SIGIO, event->pending_kill); + kill_fasync(perf_event_fasync(event), SIGIO, event->pending_kill); event->pending_kill = 0; } } @@ -6117,7 +6152,7 @@ static int __perf_event_overflow(struct perf_event *event, else perf_event_output(event, data, regs); - if (event->fasync && event->pending_kill) { + if (*perf_event_fasync(event) && event->pending_kill) { event->pending_wakeup = 1; irq_work_queue(&event->pending); } diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 9f6ce9ba4a04..a6adc36a3732 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -11,6 +11,7 @@ struct ring_buffer { atomic_t refcount; struct rcu_head rcu_head; + struct irq_work irq_work; #ifdef CONFIG_PERF_USE_VMALLOC struct work_struct work; int page_order; /* allocation order */ @@ -55,6 +56,15 @@ struct ring_buffer { }; extern void rb_free(struct ring_buffer *rb); + +static inline void rb_free_rcu(struct rcu_head *rcu_head) +{ + struct ring_buffer *rb; + + rb = container_of(rcu_head, struct ring_buffer, rcu_head); + rb_free(rb); +} + extern struct ring_buffer * rb_alloc(int nr_pages, long watermark, int cpu, int flags); extern void perf_event_wakeup(struct perf_event *event); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 725c416085e3..7f63ad978cb8 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -221,6 +221,8 @@ void perf_output_end(struct perf_output_handle *handle) rcu_read_unlock(); } +static void rb_irq_work(struct irq_work *work); + static void ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) { @@ -241,6 +243,16 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) INIT_LIST_HEAD(&rb->event_list); spin_lock_init(&rb->event_lock); + init_irq_work(&rb->irq_work, rb_irq_work); +} + +static void ring_buffer_put_async(struct ring_buffer *rb) +{ + if (!atomic_dec_and_test(&rb->refcount)) + return; + + rb->rcu_head.next = (void *)rb; + irq_work_queue(&rb->irq_work); } /* @@ -319,7 +331,7 @@ err_put: rb_free_aux(rb); err: - ring_buffer_put(rb); + ring_buffer_put_async(rb); handle->event = NULL; return NULL; @@ -370,7 +382,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, local_set(&rb->aux_nest, 0); rb_free_aux(rb); - ring_buffer_put(rb); + ring_buffer_put_async(rb); } /* @@ -547,17 +559,30 @@ static void __rb_free_aux(struct ring_buffer *rb) rb->aux_priv = NULL; } - for (pg = 0; pg < rb->aux_nr_pages; pg++) - rb_free_aux_page(rb, pg); + if (rb->aux_nr_pages) { + for (pg = 0; pg < rb->aux_nr_pages; pg++) + rb_free_aux_page(rb, pg); - kfree(rb->aux_pages); - rb->aux_nr_pages = 0; + kfree(rb->aux_pages); + rb->aux_nr_pages = 0; + } } void rb_free_aux(struct ring_buffer *rb) { if (atomic_dec_and_test(&rb->aux_refcount)) + irq_work_queue(&rb->irq_work); +} + +static void rb_irq_work(struct irq_work *work) +{ + struct ring_buffer *rb = container_of(work, struct ring_buffer, irq_work); + + if (!atomic_read(&rb->aux_refcount)) __rb_free_aux(rb); + + if (rb->rcu_head.next == (void *)rb) + call_rcu(&rb->rcu_head, rb_free_rcu); } #ifndef CONFIG_PERF_USE_VMALLOC diff --git a/kernel/fork.c b/kernel/fork.c index 03c1eaaa6ef5..8209fa2d36ef 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1854,13 +1854,21 @@ static int check_unshare_flags(unsigned long unshare_flags) CLONE_NEWUSER|CLONE_NEWPID)) return -EINVAL; /* - * Not implemented, but pretend it works if there is nothing to - * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND - * needs to unshare vm. + * Not implemented, but pretend it works if there is nothing + * to unshare. Note that unsharing the address space or the + * signal handlers also need to unshare the signal queues (aka + * CLONE_THREAD). */ if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) { - /* FIXME: get_task_mm() increments ->mm_users */ - if (atomic_read(¤t->mm->mm_users) > 1) + if (!thread_group_empty(current)) + return -EINVAL; + } + if (unshare_flags & (CLONE_SIGHAND | CLONE_VM)) { + if (atomic_read(¤t->sighand->count) > 1) + return -EINVAL; + } + if (unshare_flags & CLONE_VM) { + if (!current_is_single_threaded()) return -EINVAL; } @@ -1929,16 +1937,16 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) if (unshare_flags & CLONE_NEWUSER) unshare_flags |= CLONE_THREAD | CLONE_FS; /* - * If unsharing a thread from a thread group, must also unshare vm. - */ - if (unshare_flags & CLONE_THREAD) - unshare_flags |= CLONE_VM; - /* * If unsharing vm, must also unshare signal handlers. */ if (unshare_flags & CLONE_VM) unshare_flags |= CLONE_SIGHAND; /* + * If unsharing a signal handlers, must also unshare the signal queues. + */ + if (unshare_flags & CLONE_SIGHAND) + unshare_flags |= CLONE_THREAD; + /* * If unsharing namespace, must also unshare filesystem information. */ if (unshare_flags & CLONE_NEWNS) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 2456fe89719c..0ec0298ad999 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -962,6 +962,23 @@ int irq_chip_set_affinity_parent(struct irq_data *data, } /** + * irq_chip_set_type_parent - Set IRQ type on the parent interrupt + * @data: Pointer to interrupt specific data + * @type: IRQ_TYPE_{LEVEL,EDGE}_* value - see include/linux/irq.h + * + * Conditional, as the underlying parent chip might not implement it. + */ +int irq_chip_set_type_parent(struct irq_data *data, unsigned int type) +{ + data = data->parent_data; + + if (data->chip->irq_set_type) + return data->chip->irq_set_type(data, type); + + return -ENOSYS; +} + +/** * irq_chip_retrigger_hierarchy - Retrigger an interrupt in hardware * @data: Pointer to interrupt specific data * @@ -974,7 +991,7 @@ int irq_chip_retrigger_hierarchy(struct irq_data *data) if (data->chip && data->chip->irq_retrigger) return data->chip->irq_retrigger(data); - return -ENOSYS; + return 0; } /** diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index df2f4642d1e7..5c38f59741e2 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -12,6 +12,7 @@ #include <linux/seq_file.h> #include <linux/interrupt.h> #include <linux/kernel_stat.h> +#include <linux/mutex.h> #include "internals.h" @@ -323,18 +324,29 @@ void register_handler_proc(unsigned int irq, struct irqaction *action) void register_irq_proc(unsigned int irq, struct irq_desc *desc) { + static DEFINE_MUTEX(register_lock); char name [MAX_NAMELEN]; - if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip) || desc->dir) + if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip)) return; + /* + * irq directories are registered only when a handler is + * added, not when the descriptor is created, so multiple + * tasks might try to register at the same time. + */ + mutex_lock(®ister_lock); + + if (desc->dir) + goto out_unlock; + memset(name, 0, MAX_NAMELEN); sprintf(name, "%d", irq); /* create /proc/irq/1234 */ desc->dir = proc_mkdir(name, root_irq_dir); if (!desc->dir) - return; + goto out_unlock; #ifdef CONFIG_SMP /* create /proc/irq/<irq>/smp_affinity */ @@ -355,6 +367,9 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) proc_create_data("spurious", 0444, desc->dir, &irq_spurious_proc_fops, (void *)(long)irq); + +out_unlock: + mutex_unlock(®ister_lock); } void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 123673291ffb..8476206a1e19 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2217,11 +2217,11 @@ static struct rq *finish_task_switch(struct task_struct *prev) * If a task dies, then it sets TASK_DEAD in tsk->state and calls * schedule one last time. The schedule call will never return, and * the scheduled task must drop that reference. - * The test for TASK_DEAD must occur while the runqueue locks are - * still held, otherwise prev could be scheduled on another cpu, die - * there before we look at prev->state, and then the reference would - * be dropped twice. - * Manfred Spraul <manfred@colorfullife.com> + * + * We must observe prev->state before clearing prev->on_cpu (in + * finish_lock_switch), otherwise a concurrent wakeup can get prev + * running on another CPU and we could rave with its RUNNING -> DEAD + * transition, resulting in a double drop. */ prev_state = prev->state; vtime_task_switch(prev); @@ -2358,13 +2358,20 @@ unsigned long nr_running(void) /* * Check if only the current task is running on the cpu. + * + * Caution: this function does not check that the caller has disabled + * preemption, thus the result might have a time-of-check-to-time-of-use + * race. The caller is responsible to use it correctly, for example: + * + * - from a non-preemptable section (of course) + * + * - from a thread that is bound to a single CPU + * + * - in a loop with very short iterations (e.g. a polling loop) */ bool single_task_running(void) { - if (cpu_rq(smp_processor_id())->nr_running == 1) - return true; - else - return false; + return raw_rq()->nr_running == 1; } EXPORT_SYMBOL(single_task_running); @@ -5328,6 +5335,14 @@ static int sched_cpu_active(struct notifier_block *nfb, case CPU_STARTING: set_cpu_rq_start_time(); return NOTIFY_OK; + case CPU_ONLINE: + /* + * At this point a starting CPU has marked itself as online via + * set_cpu_online(). But it might not yet have marked itself + * as active, which is essential from here on. + * + * Thus, fall-through and help the starting CPU along. + */ case CPU_DOWN_FAILED: set_cpu_active((long)hcpu, true); return NOTIFY_OK; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c2980e8733bc..77690b653ca9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5126,18 +5126,21 @@ again: * entity, update_curr() will update its vruntime, otherwise * forget we've ever seen it. */ - if (curr && curr->on_rq) - update_curr(cfs_rq); - else - curr = NULL; + if (curr) { + if (curr->on_rq) + update_curr(cfs_rq); + else + curr = NULL; - /* - * This call to check_cfs_rq_runtime() will do the throttle and - * dequeue its entity in the parent(s). Therefore the 'simple' - * nr_running test will indeed be correct. - */ - if (unlikely(check_cfs_rq_runtime(cfs_rq))) - goto simple; + /* + * This call to check_cfs_rq_runtime() will do the + * throttle and dequeue its entity in the parent(s). + * Therefore the 'simple' nr_running test will indeed + * be correct. + */ + if (unlikely(check_cfs_rq_runtime(cfs_rq))) + goto simple; + } se = pick_next_entity(cfs_rq, curr); cfs_rq = group_cfs_rq(se); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e0e129993958..aa1f059de4f7 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1068,9 +1068,10 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) * After ->on_cpu is cleared, the task can be moved to a different CPU. * We must ensure this doesn't happen until the switch is completely * finished. + * + * Pairs with the control dependency and rmb in try_to_wake_up(). */ - smp_wmb(); - prev->on_cpu = 0; + smp_store_release(&prev->on_cpu, 0); #endif #ifdef CONFIG_DEBUG_SPINLOCK /* this is a valid case when another task releases the spinlock */ diff --git a/kernel/signal.c b/kernel/signal.c index d51c5ddd855c..0206be728dac 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2753,12 +2753,15 @@ int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from) * Other callers might not initialize the si_lsb field, * so check explicitly for the right codes here. */ - if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO) + if (from->si_signo == SIGBUS && + (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)) err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb); #endif #ifdef SEGV_BNDERR - err |= __put_user(from->si_lower, &to->si_lower); - err |= __put_user(from->si_upper, &to->si_upper); + if (from->si_signo == SIGSEGV && from->si_code == SEGV_BNDERR) { + err |= __put_user(from->si_lower, &to->si_lower); + err |= __put_user(from->si_upper, &to->si_upper); + } #endif break; case __SI_CHLD: @@ -3022,7 +3025,7 @@ COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo, int, sig, struct compat_siginfo __user *, uinfo) { - siginfo_t info; + siginfo_t info = {}; int ret = copy_siginfo_from_user32(&info, uinfo); if (unlikely(ret)) return ret; @@ -3066,7 +3069,7 @@ COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo, int, sig, struct compat_siginfo __user *, uinfo) { - siginfo_t info; + siginfo_t info = {}; if (copy_siginfo_from_user32(&info, uinfo)) return -EFAULT; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 946acb72179f..414d9df94724 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1615,7 +1615,7 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk, negative = (tick_error < 0); /* Sort out the magnitude of the correction */ - tick_error = abs(tick_error); + tick_error = abs64(tick_error); for (adj = 0; tick_error > interval; adj++) tick_error >>= 1; |