From d49db342f0e276b354383b3281c5668b6b80f5c2 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 24 Jun 2015 12:41:47 -0700 Subject: sched/fair: Test list head instead of list entry in throttle_cfs_rq() According to the comments, we need to test if this is the first throttled task, however, list_empty() tests on the entry cfs_rq->throttled_list, not the head, this is wrong. This is a bug because we don't re-init the list entry after removing it from the list, so list_empty() could return false even if the list is really empty. Signed-off-by: Cong Wang Signed-off-by: Cong Wang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ben Segall Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435174907-432-1-git-send-email-xiyou.wangcong@gmail.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 65c8f3ebdc3c..d113c3ba8bc4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3683,7 +3683,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq) cfs_rq->throttled = 1; cfs_rq->throttled_clock = rq_clock(rq); raw_spin_lock(&cfs_b->lock); - empty = list_empty(&cfs_rq->throttled_list); + empty = list_empty(&cfs_b->throttled_cfs_rq); /* * Add to the _head_ of the list, so that an already-started -- cgit v1.2.3 From 6224beb12e190ff11f3c7d4bf50cb2922878f600 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 7 Jul 2015 15:05:03 -0400 Subject: tracing: Have branch tracer use recursive field of task struct Fengguang Wu's tests triggered a bug in the branch tracer's start up test when CONFIG_DEBUG_PREEMPT set. This was because that config adds some debug logic in the per cpu field, which calls back into the branch tracer. The branch tracer has its own recursive checks, but uses a per cpu variable to implement it. If retrieving the per cpu variable calls back into the branch tracer, you can see how things will break. Instead of using a per cpu variable, use the trace_recursion field of the current task struct. Simply set a bit when entering the branch tracing and clear it when leaving. If the bit is set on entry, just don't do the tracing. There's also the case with lockdep, as the local_irq_save() called before the recursion can also trigger code that can call back into the function. Changing that to a raw_local_irq_save() will protect that as well. This prevents the recursion and the inevitable crash that follows. Link: http://lkml.kernel.org/r/20150630141803.GA28071@wfg-t540p.sh.intel.com Cc: stable@vger.kernel.org # 3.10+ Reported-by: Fengguang Wu Tested-by: Fengguang Wu Signed-off-by: Steven Rostedt --- kernel/trace/trace.h | 1 + kernel/trace/trace_branch.c | 17 ++++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f060716b02ae..74bde81601a9 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -444,6 +444,7 @@ enum { TRACE_CONTROL_BIT, + TRACE_BRANCH_BIT, /* * Abuse of the trace_recursion. * As we need a way to maintain state if we are tracing the function diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index a87b43f49eb4..e2e12ad3186f 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -36,9 +36,12 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) struct trace_branch *entry; struct ring_buffer *buffer; unsigned long flags; - int cpu, pc; + int pc; const char *p; + if (current->trace_recursion & TRACE_BRANCH_BIT) + return; + /* * I would love to save just the ftrace_likely_data pointer, but * this code can also be used by modules. Ugly things can happen @@ -49,10 +52,10 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) if (unlikely(!tr)) return; - local_irq_save(flags); - cpu = raw_smp_processor_id(); - data = per_cpu_ptr(tr->trace_buffer.data, cpu); - if (atomic_inc_return(&data->disabled) != 1) + raw_local_irq_save(flags); + current->trace_recursion |= TRACE_BRANCH_BIT; + data = this_cpu_ptr(tr->trace_buffer.data); + if (atomic_read(&data->disabled)) goto out; pc = preempt_count(); @@ -81,8 +84,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) __buffer_unlock_commit(buffer, event); out: - atomic_dec(&data->disabled); - local_irq_restore(flags); + current->trace_recursion &= ~TRACE_BRANCH_BIT; + raw_local_irq_restore(flags); } static inline -- cgit v1.2.3 From 0f44705175347ec96935d60b765b5d14ecc763bb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Jul 2015 12:01:04 +0200 Subject: tick: Move the export of tick_broadcast_oneshot_control to the proper place tick_broadcast_oneshot_control got moved from tick-broadcast to tick-common, but the export stayed in the old place. Fix it up. Fixes: f32dd1170511 'tick/broadcast: Make idle check independent from mode and config' Reported-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- kernel/time/tick-broadcast.c | 1 - kernel/time/tick-common.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 52b9e199b5ac..f6aae7977824 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -839,7 +839,6 @@ out: raw_spin_unlock(&tick_broadcast_lock); return ret; } -EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control); /* * Reset the one shot broadcast for a cpu diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 55e13efff1ab..f8bf47571dda 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -363,6 +363,7 @@ int tick_broadcast_oneshot_control(enum tick_broadcast_state state) return __tick_broadcast_oneshot_control(state); } +EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control); #ifdef CONFIG_HOTPLUG_CPU /* -- cgit v1.2.3 From ce0d3c0a6fb1422101498ef378c0851dabbbf67f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Jul 2015 22:03:57 +0200 Subject: genirq: Revert sparse irq locking around __cpu_up() and move it to x86 for now Boris reported that the sparse_irq protection around __cpu_up() in the generic code causes a regression on Xen. Xen allocates interrupts and some more in the xen_cpu_up() function, so it deadlocks on the sparse_irq_lock. There is no simple fix for this and we really should have the protection for all architectures, but for now the only solution is to move it to x86 where actual wreckage due to the lack of protection has been observed. Reported-and-tested-by: Boris Ostrovsky Fixes: a89941816726 'hotplug: Prevent alloc/free of irq descriptors during cpu up/down' Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: xiao jin Cc: Joerg Roedel Cc: Borislav Petkov Cc: Yanmin Zhang Cc: xen-devel --- kernel/cpu.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'kernel') diff --git a/kernel/cpu.c b/kernel/cpu.c index 6a374544d495..5644ec5582b9 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -527,18 +527,9 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen) goto out_notify; } - /* - * Some architectures have to walk the irq descriptors to - * setup the vector space for the cpu which comes online. - * Prevent irq alloc/free across the bringup. - */ - irq_lock_sparse(); - /* Arch-specific enabling code. */ ret = __cpu_up(cpu, idle); - irq_unlock_sparse(); - if (ret != 0) goto out_notify; BUG_ON(!cpu_online(cpu)); -- cgit v1.2.3 From 75a06189fc508a2acf470b0b12710362ffb2c4b1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 16 Jul 2015 14:10:17 +0200 Subject: genirq: Prevent resend to interrupts marked IRQ_NESTED_THREAD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The resend mechanism happily calls the interrupt handler of interrupts which are marked IRQ_NESTED_THREAD from softirq context. This can result in crashes because the interrupt handler is not the proper way to invoke the device handlers. They must be invoked via handle_nested_irq. Prevent the resend even if the interrupt has no valid parent irq set. Its better to have a lost interrupt than a crashing machine. Reported-by: Uwe Kleine-König Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org --- kernel/irq/resend.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index 9065107f083e..7a5237a1bce5 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c @@ -75,13 +75,21 @@ void check_irq_resend(struct irq_desc *desc, unsigned int irq) !desc->irq_data.chip->irq_retrigger(&desc->irq_data)) { #ifdef CONFIG_HARDIRQS_SW_RESEND /* - * If the interrupt has a parent irq and runs - * in the thread context of the parent irq, - * retrigger the parent. + * If the interrupt is running in the thread + * context of the parent irq we need to be + * careful, because we cannot trigger it + * directly. */ - if (desc->parent_irq && - irq_settings_is_nested_thread(desc)) + if (irq_settings_is_nested_thread(desc)) { + /* + * If the parent_irq is valid, we + * retrigger the parent, otherwise we + * do nothing. + */ + if (!desc->parent_irq) + return; irq = desc->parent_irq; + } /* Set it pending and activate the softirq: */ set_bit(irq, irqs_resend); tasklet_schedule(&resend_tasklet); -- cgit v1.2.3 From 0c8c0f03e3a292e031596484275c14cf39c0ab7a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 17 Jul 2015 12:28:11 +0200 Subject: x86/fpu, sched: Dynamically allocate 'struct fpu' The FPU rewrite removed the dynamic allocations of 'struct fpu'. But, this potentially wastes massive amounts of memory (2k per task on systems that do not have AVX-512 for instance). Instead of having a separate slab, this patch just appends the space that we need to the 'task_struct' which we dynamically allocate already. This saves from doing an extra slab allocation at fork(). The only real downside here is that we have to stick everything and the end of the task_struct. But, I think the BUILD_BUG_ON()s I stuck in there should keep that from being too fragile. Signed-off-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1437128892-9831-2-git-send-email-mingo@kernel.org Signed-off-by: Ingo Molnar --- kernel/fork.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 1bfefc6f96a4..431b67a6098c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -287,15 +287,21 @@ static void set_max_threads(unsigned int max_threads_suggested) max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS); } +int __weak arch_task_struct_size(void) +{ + return sizeof(struct task_struct); +} + void __init fork_init(void) { + int task_struct_size = arch_task_struct_size(); #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR #ifndef ARCH_MIN_TASKALIGN #define ARCH_MIN_TASKALIGN L1_CACHE_BYTES #endif /* create a slab on which task_structs can be allocated */ task_struct_cachep = - kmem_cache_create("task_struct", sizeof(struct task_struct), + kmem_cache_create("task_struct", task_struct_size, ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL); #endif -- cgit v1.2.3 From 5aaeb5c01c5b6c0be7b7aadbf3ace9f3a4458c3d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 17 Jul 2015 12:28:12 +0200 Subject: x86/fpu, sched: Introduce CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT and use it on x86 Don't burden architectures without dynamic task_struct sizing with the overhead of dynamic sizing. Also optimize the x86 code a bit by caching task_struct_size. Acked-and-Tested-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1437128892-9831-3-git-send-email-mingo@kernel.org Signed-off-by: Ingo Molnar --- kernel/fork.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 431b67a6098c..dbd9b8d7b7cc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -287,21 +287,20 @@ static void set_max_threads(unsigned int max_threads_suggested) max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS); } -int __weak arch_task_struct_size(void) -{ - return sizeof(struct task_struct); -} +#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT +/* Initialized by the architecture: */ +int arch_task_struct_size __read_mostly; +#endif void __init fork_init(void) { - int task_struct_size = arch_task_struct_size(); #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR #ifndef ARCH_MIN_TASKALIGN #define ARCH_MIN_TASKALIGN L1_CACHE_BYTES #endif /* create a slab on which task_structs can be allocated */ task_struct_cachep = - kmem_cache_create("task_struct", task_struct_size, + kmem_cache_create("task_struct", arch_task_struct_size, ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL); #endif -- cgit v1.2.3