From a831881be220358a1d28c5d95d69449fb6d623ca Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 18 Dec 2012 17:32:19 +0100 Subject: nohz: Basic full dynticks interface For extreme usecases such as Real Time or HPC, having the ability to shutdown the tick when a single task runs on a CPU is a desired feature: * Reducing the amount of interrupts improves throughput for CPU-bound tasks. The CPU is less distracted from its real job, from an execution time and from the cache point of views. * This also improve latency response as we have less critical sections. Start with introducing a very simple interface to define full dynticks CPU: use a boot time option defined cpumask through the "nohz_extended=" kernel parameter. CPUs that are part of this range will have their tick shutdown whenever possible: provided they run a single task and they don't do kernel activity that require the periodic tick. These details will be later documented in Documentation/* An online CPU must be kept outside this range to handle the timekeeping. Suggested-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 19 +++++++++++++++ kernel/time/tick-sched.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 24510d84efd7..5a87c03e45ad 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -79,6 +79,25 @@ config NO_HZ only trigger on an as-needed basis both when the system is busy and when the system is idle. +config NO_HZ_EXTENDED + bool "Full dynticks system" + depends on NO_HZ && RCU_USER_QS && VIRT_CPU_ACCOUNTING_GEN && RCU_NOCB_CPU && SMP + select CONTEXT_TRACKING_FORCE + help + Adaptively try to shutdown the tick whenever possible, even when + the CPU is running tasks. Typically this requires running a single + task on the CPU. Chances for running tickless are maximized when + the task mostly runs in userspace and has few kernel activity. + + You need to fill up the nohz_extended boot parameter with the + desired range of dynticks CPUs. + + This is implemented at the expense of some overhead in user <-> kernel + transitions: syscalls, exceptions and interrupts. Even when it's + dynamically off. + + Say N. + config HIGH_RES_TIMERS bool "High Resolution Timer Support" depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index a19a39952c1b..79c275f08b7d 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -142,6 +142,68 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) profile_tick(CPU_PROFILING); } +#ifdef CONFIG_NO_HZ_EXTENDED +static cpumask_var_t nohz_extended_mask; +bool have_nohz_extended_mask; + +int tick_nohz_extended_cpu(int cpu) +{ + if (!have_nohz_extended_mask) + return 0; + + return cpumask_test_cpu(cpu, nohz_extended_mask); +} + +/* Parse the boot-time nohz CPU list from the kernel parameters. */ +static int __init tick_nohz_extended_setup(char *str) +{ + alloc_bootmem_cpumask_var(&nohz_extended_mask); + if (cpulist_parse(str, nohz_extended_mask) < 0) + pr_warning("NOHZ: Incorrect nohz_extended cpumask\n"); + else + have_nohz_extended_mask = true; + return 1; +} +__setup("nohz_extended=", tick_nohz_extended_setup); + +static int __init init_tick_nohz_extended(void) +{ + cpumask_var_t online_nohz; + int cpu; + + if (!have_nohz_extended_mask) + return 0; + + if (!zalloc_cpumask_var(&online_nohz, GFP_KERNEL)) { + pr_warning("NO_HZ: Not enough memory to check extended nohz mask\n"); + return -ENOMEM; + } + + /* + * CPUs can probably not be concurrently offlined on initcall time. + * But we are paranoid, aren't we? + */ + get_online_cpus(); + + /* Ensure we keep a CPU outside the dynticks range for timekeeping */ + cpumask_and(online_nohz, cpu_online_mask, nohz_extended_mask); + if (cpumask_equal(online_nohz, cpu_online_mask)) { + cpu = cpumask_any(cpu_online_mask); + pr_warning("NO_HZ: Must keep at least one online CPU " + "out of nohz_extended range\n"); + pr_warning("NO_HZ: Clearing %d from nohz_extended range\n", cpu); + cpumask_clear_cpu(cpu, nohz_extended_mask); + } + put_online_cpus(); + free_cpumask_var(online_nohz); + + return 0; +} +core_initcall(init_tick_nohz_extended); +#else +#define have_nohz_extended_mask (0) +#endif + /* * NOHZ - aka dynamic tick functionality */ -- cgit v1.2.3 From a382bf934449ddeb625167537ae81daa0211b477 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 18 Dec 2012 18:24:35 +0100 Subject: nohz: Assign timekeeping duty to a CPU outside the full dynticks range This way the full nohz CPUs can safely run with the tick stopped with a guarantee that somebody else is taking care of the jiffies and GTOD progression. Once the duty is attributed to a CPU, it won't change. Also that CPU can't enter into dyntick idle mode or be hot unplugged. This may later be improved from a power consumption POV. At least we should be able to share the duty amongst all CPUs outside the full dynticks range. Then the duty could even be shared with full dynticks CPUs when those can't stop their tick for any reason. But let's start with that very simple approach first. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner [fix have_nohz_full_mask offcase] Signed-off-by: Steven Rostedt --- kernel/time/tick-broadcast.c | 3 ++- kernel/time/tick-common.c | 5 ++++- kernel/time/tick-sched.c | 47 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 51 insertions(+), 4 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 2fb8cb88df8d..8a6875cc1879 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -573,7 +573,8 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) bc->event_handler = tick_handle_oneshot_broadcast; /* Take the do_timer update */ - tick_do_timer_cpu = cpu; + if (!tick_nohz_extended_cpu(cpu)) + tick_do_timer_cpu = cpu; /* * We must be careful here. There might be other CPUs diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index b1600a6973f4..b7dc0cbdb59b 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -163,7 +163,10 @@ static void tick_setup_device(struct tick_device *td, * this cpu: */ if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) { - tick_do_timer_cpu = cpu; + if (!tick_nohz_extended_cpu(cpu)) + tick_do_timer_cpu = cpu; + else + tick_do_timer_cpu = TICK_DO_TIMER_NONE; tick_next_period = ktime_get(); tick_period = ktime_set(0, NSEC_PER_SEC / HZ); } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 79c275f08b7d..57bb3fe5aaa3 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -112,7 +112,8 @@ static void tick_sched_do_timer(ktime_t now) * this duty, then the jiffies update is still serialized by * jiffies_lock. */ - if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) + if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE) + && !tick_nohz_extended_cpu(cpu)) tick_do_timer_cpu = cpu; #endif @@ -166,6 +167,25 @@ static int __init tick_nohz_extended_setup(char *str) } __setup("nohz_extended=", tick_nohz_extended_setup); +static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_DOWN_PREPARE: + /* + * If we handle the timekeeping duty for full dynticks CPUs, + * we can't safely shutdown that CPU. + */ + if (have_nohz_extended_mask && tick_do_timer_cpu == cpu) + return -EINVAL; + break; + } + return NOTIFY_OK; +} + static int __init init_tick_nohz_extended(void) { cpumask_var_t online_nohz; @@ -174,6 +194,8 @@ static int __init init_tick_nohz_extended(void) if (!have_nohz_extended_mask) return 0; + cpu_notifier(tick_nohz_cpu_down_callback, 0); + if (!zalloc_cpumask_var(&online_nohz, GFP_KERNEL)) { pr_warning("NO_HZ: Not enough memory to check extended nohz mask\n"); return -ENOMEM; @@ -188,11 +210,17 @@ static int __init init_tick_nohz_extended(void) /* Ensure we keep a CPU outside the dynticks range for timekeeping */ cpumask_and(online_nohz, cpu_online_mask, nohz_extended_mask); if (cpumask_equal(online_nohz, cpu_online_mask)) { - cpu = cpumask_any(cpu_online_mask); pr_warning("NO_HZ: Must keep at least one online CPU " "out of nohz_extended range\n"); + /* + * We know the current CPU doesn't have its tick stopped. + * Let's use it for the timekeeping duty. + */ + preempt_disable(); + cpu = smp_processor_id(); pr_warning("NO_HZ: Clearing %d from nohz_extended range\n", cpu); cpumask_clear_cpu(cpu, nohz_extended_mask); + preempt_enable(); } put_online_cpus(); free_cpumask_var(online_nohz); @@ -551,6 +579,21 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) return false; } + if (have_nohz_extended_mask) { + /* + * Keep the tick alive to guarantee timekeeping progression + * if there are full dynticks CPUs around + */ + if (tick_do_timer_cpu == cpu) + return false; + /* + * Boot safety: make sure the timekeeping duty has been + * assigned before entering dyntick-idle mode, + */ + if (tick_do_timer_cpu == TICK_DO_TIMER_NONE) + return false; + } + return true; } -- cgit v1.2.3 From ab71d36ddb9e60d4ddb28a187718815d38c3c666 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 10 Aug 2011 23:21:01 +0200 Subject: nohz: Unhide full dynticks feature from its dependencies The full dynticks feature only shows up when all its Kconfig dependencies are met (RCU nocbs, RCU user mode, ...) This is far from being user friendly as those who want to activate this feature need to look into the Kconfig files and iterate through each dependency then activate these by hand in order to show and select the full dynticks Kconfig option. So process the other way around: show up the Kconfig option if the minimal low level dependencies are met and activate the high level ones when we enable the feature. Note there is one exception in the picture: CONFIG_VIRT_CPU_ACCOUNTING_GEN is part of a Kconfig choice menu and it appears we can't select it from another Kconfig selection when it's under such layout. So for now this particular item stays as a passive dependency. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 5a87c03e45ad..726c33e00da2 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -80,11 +80,20 @@ config NO_HZ busy and when the system is idle. config NO_HZ_EXTENDED - bool "Full dynticks system" - depends on NO_HZ && RCU_USER_QS && VIRT_CPU_ACCOUNTING_GEN && RCU_NOCB_CPU && SMP - select CONTEXT_TRACKING_FORCE - help - Adaptively try to shutdown the tick whenever possible, even when + bool "Full dynticks system" + # NO_HZ dependency + depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS + # RCU_USER_QS + depends on HAVE_CONTEXT_TRACKING && SMP + # RCU_NOCB_CPU dependency + depends on TREE_RCU || TREE_PREEMPT_RCU + depends on VIRT_CPU_ACCOUNTING_GEN + select NO_HZ + select RCU_USER_QS + select RCU_NOCB_CPU + select CONTEXT_TRACKING_FORCE + help + Adaptively try to shutdown the tick whenever possible, even when the CPU is running tasks. Typically this requires running a single task on the CPU. Chances for running tickless are maximized when the task mostly runs in userspace and has few kernel activity. -- cgit v1.2.3 From 3451d0243c3cdfd729b36f9684a14659d4895ca3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 10 Aug 2011 23:21:01 +0200 Subject: nohz: Rename CONFIG_NO_HZ to CONFIG_NO_HZ_COMMON We are planning to convert the dynticks Kconfig options layout into a choice menu. The user must be able to easily pick any of the following implementations: constant periodic tick, idle dynticks, full dynticks. As this implies a mutual exclusion, the two dynticks implementions need to converge on the selection of a common Kconfig option in order to ease the sharing of a common infrastructure. It would thus seem pretty natural to reuse CONFIG_NO_HZ to that end. It already implements all the idle dynticks code and the full dynticks depends on all that code for now. So ideally the choice menu would propose CONFIG_NO_HZ_IDLE and CONFIG_NO_HZ_EXTENDED then both would select CONFIG_NO_HZ. On the other hand we want to stay backward compatible: if CONFIG_NO_HZ is set in an older config file, we want to enable CONFIG_NO_HZ_IDLE by default. But we can't afford both at the same time or we run into a circular dependency: 1) CONFIG_NO_HZ_IDLE and CONFIG_NO_HZ_EXTENDED both select CONFIG_NO_HZ 2) If CONFIG_NO_HZ is set, we default to CONFIG_NO_HZ_IDLE We might be able to support that from Kconfig/Kbuild but it may not be wise to introduce such a confusing behaviour. So to solve this, create a new CONFIG_NO_HZ_COMMON option which gathers the common code between idle and full dynticks (that common code for now is simply the idle dynticks code) and select it from their referring Kconfig. Then we'll later create CONFIG_NO_HZ_IDLE and map CONFIG_NO_HZ to it for backward compatibility. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 13 +++++++++---- kernel/time/tick-sched.c | 12 ++++++------ 2 files changed, 15 insertions(+), 10 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 726c33e00da2..c88fc43494c9 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -64,16 +64,21 @@ config GENERIC_CMOS_UPDATE if GENERIC_CLOCKEVENTS menu "Timers subsystem" -# Core internal switch. Selected by NO_HZ / HIGH_RES_TIMERS. This is +# Core internal switch. Selected by NO_HZ_COMMON / HIGH_RES_TIMERS. This is # only related to the tick functionality. Oneshot clockevent devices # are supported independ of this. config TICK_ONESHOT bool +config NO_HZ_COMMON + bool + depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS + select TICK_ONESHOT + config NO_HZ bool "Tickless System (Dynamic Ticks)" depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS - select TICK_ONESHOT + select NO_HZ_COMMON help This option enables a tickless system: timer interrupts will only trigger on an as-needed basis both when the system is @@ -81,14 +86,14 @@ config NO_HZ config NO_HZ_EXTENDED bool "Full dynticks system" - # NO_HZ dependency + # NO_HZ_COMMON dependency depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS # RCU_USER_QS depends on HAVE_CONTEXT_TRACKING && SMP # RCU_NOCB_CPU dependency depends on TREE_RCU || TREE_PREEMPT_RCU depends on VIRT_CPU_ACCOUNTING_GEN - select NO_HZ + select NO_HZ_COMMON select RCU_USER_QS select RCU_NOCB_CPU select CONTEXT_TRACKING_FORCE diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 57bb3fe5aaa3..ccfc2086cd4b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -104,7 +104,7 @@ static void tick_sched_do_timer(ktime_t now) { int cpu = smp_processor_id(); -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * Check if the do_timer duty was dropped. We don't care about * concurrency: This happens only when the cpu in charge went @@ -124,7 +124,7 @@ static void tick_sched_do_timer(ktime_t now) static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) { -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * When we are idle and the tick is stopped, we have to touch * the watchdog as we might not schedule for a really long @@ -235,7 +235,7 @@ core_initcall(init_tick_nohz_extended); /* * NOHZ - aka dynamic tick functionality */ -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON /* * NO HZ enabled ? */ @@ -907,7 +907,7 @@ static inline void tick_check_nohz(int cpu) static inline void tick_nohz_switch_to_nohz(void) { } static inline void tick_check_nohz(int cpu) { } -#endif /* NO_HZ */ +#endif /* CONFIG_NO_HZ_COMMON */ /* * Called from irq_enter to notify about the possible interruption of idle() @@ -992,14 +992,14 @@ void tick_setup_sched_timer(void) now = ktime_get(); } -#ifdef CONFIG_NO_HZ +#ifdef CONFIG_NO_HZ_COMMON if (tick_nohz_enabled) ts->nohz_mode = NOHZ_MODE_HIGHRES; #endif } #endif /* HIGH_RES_TIMERS */ -#if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS +#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS void tick_cancel_sched_timer(int cpu) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); -- cgit v1.2.3 From 3ca277e41914ab344214ed50a41c14c48ae973f3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 10 Aug 2011 23:21:01 +0200 Subject: nohz: Pack nohz Kconfig option in a menu of choices Now the user has the choice between three implementations of the timer tick: * Static periodic tick * Idle dynticks * Full dynticks At least for now, these are mutually exclusive choices, so let's rely on the proper Kconfig feature to display these to the user. A new entry CONFIG_NO_HZ_IDLE is created and the old CONFIG_NO_HZ maps to it for config file backward compatibility. The old name was too general now that we have more granular dynticks implementations. While at it, add some explanation to help the user on his decision between the 3 entries. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index c88fc43494c9..27cc404ea187 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -75,17 +75,33 @@ config NO_HZ_COMMON depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS select TICK_ONESHOT +# Kept around for compatibility, maps to NO_HZ_IDLE config NO_HZ - bool "Tickless System (Dynamic Ticks)" + bool + +choice + prompt "Timer tick handling" + default NO_HZ_IDLE if NO_HZ + +config PERIODIC_HZ + bool "Periodic timer ticks (constant rate, no dynticks)" + help + This option keeps the tick running periodically at a constant + rate, even when the CPU doesn't need it. + +config NO_HZ_IDLE + bool "Idle dynticks system (tickless idle)" depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS select NO_HZ_COMMON help - This option enables a tickless system: timer interrupts will - only trigger on an as-needed basis both when the system is - busy and when the system is idle. + This option enables a tickless idle system: timer interrupts + will only trigger on an as-needed basis when the system is idle. + This is usually interesting for energy saving. + + Most of the time you want to say Y here. config NO_HZ_EXTENDED - bool "Full dynticks system" + bool "Full dynticks system (tickless single task)" # NO_HZ_COMMON dependency depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS # RCU_USER_QS @@ -112,6 +128,8 @@ config NO_HZ_EXTENDED Say N. +endchoice + config HIGH_RES_TIMERS bool "High Resolution Timer Support" depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS -- cgit v1.2.3 From 1034fc2f41aaf32f782a9362178f9a236ac5a50a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 26 Mar 2013 15:04:50 +0100 Subject: nohz: Print final full dynticks CPUs range on boot Given that we apply a few restrictions on the full dynticks CPUs range (keep an online timekeeper oustide the range, then in the future have the range be an RCU nocb CPUs subset), let's print the final resulting range of full dynticks CPUs to the user so that he knows what's really going to run. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/tick-sched.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index ccfc2086cd4b..e057d338daa4 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -186,6 +186,13 @@ static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, return NOTIFY_OK; } +/* + * Worst case string length in chunks of CPU range seems 2 steps + * separations: 0,2,4,6,... + * This is NR_CPUS + sizeof('\0') + */ +static char __initdata nohz_ext_buf[NR_CPUS + 1]; + static int __init init_tick_nohz_extended(void) { cpumask_var_t online_nohz; @@ -225,6 +232,9 @@ static int __init init_tick_nohz_extended(void) put_online_cpus(); free_cpumask_var(online_nohz); + cpulist_scnprintf(nohz_ext_buf, sizeof(nohz_ext_buf), nohz_extended_mask); + pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_ext_buf); + return 0; } core_initcall(init_tick_nohz_extended); -- cgit v1.2.3 From 0644ca5c774f1a7033bfc83c7ed0660f74f28b56 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Apr 2013 16:40:12 +0200 Subject: nohz: Fix old dynticks idle Kconfig backward compatibility In order to enforce backward compatibility with older config files, we want the new dynticks-idle Kconfig entry to default its value to the one of the old CONFIG_NO_HZ symbol if present. Namely we want: config NO_HZ # old obsolete dynticks idle symbol bool config NO_HZ_IDLE # new dynticks idle symbol default NO_HZ However Kconfig prevents this to work if the old symbol is not visible. And this is currently the case because NO_HZ lacks a title in order to show it in make oldconfig and alike. To fix this, bring a minimal title and help text to the obsolete Kconfig entry that explains its purpose. This makes the "defaulting" to work. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 27cc404ea187..cbe64be17d1f 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -75,10 +75,6 @@ config NO_HZ_COMMON depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS select TICK_ONESHOT -# Kept around for compatibility, maps to NO_HZ_IDLE -config NO_HZ - bool - choice prompt "Timer tick handling" default NO_HZ_IDLE if NO_HZ @@ -130,6 +126,14 @@ config NO_HZ_EXTENDED endchoice +config NO_HZ + bool "Old Idle dynticks config" + depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS + help + This is the old config entry that enables dynticks idle. + We keep it around for a little while to enforce backward + compatibility with older config files. + config HIGH_RES_TIMERS bool "High Resolution Timer Support" depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS -- cgit v1.2.3 From c5bfece2d6129131b4ade985e63bc35ddf5868d4 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 12 Apr 2013 16:45:34 +0200 Subject: nohz: Switch from "extended nohz" to "full nohz" based naming "Extended nohz" was used as a naming base for the full dynticks API and Kconfig symbols. It reflects the fact the system tries to stop the tick in more places than just idle. But that "extended" name is a bit opaque and vague. Rename it to "full" makes it clearer what the system tries to do under this config: try to shutdown the tick anytime it can. The various constraints that prevent that to happen shouldn't be considered as fundamental properties of this feature but rather technical issues that may be solved in the future. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 4 ++-- kernel/time/tick-broadcast.c | 2 +- kernel/time/tick-common.c | 2 +- kernel/time/tick-sched.c | 54 ++++++++++++++++++++++---------------------- 4 files changed, 31 insertions(+), 31 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index cbe64be17d1f..4a17b5069466 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -96,7 +96,7 @@ config NO_HZ_IDLE Most of the time you want to say Y here. -config NO_HZ_EXTENDED +config NO_HZ_FULL bool "Full dynticks system (tickless single task)" # NO_HZ_COMMON dependency depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS @@ -115,7 +115,7 @@ config NO_HZ_EXTENDED task on the CPU. Chances for running tickless are maximized when the task mostly runs in userspace and has few kernel activity. - You need to fill up the nohz_extended boot parameter with the + You need to fill up the nohz_full boot parameter with the desired range of dynticks CPUs. This is implemented at the expense of some overhead in user <-> kernel diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 8a6875cc1879..a3a3123f6272 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -573,7 +573,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) bc->event_handler = tick_handle_oneshot_broadcast; /* Take the do_timer update */ - if (!tick_nohz_extended_cpu(cpu)) + if (!tick_nohz_full_cpu(cpu)) tick_do_timer_cpu = cpu; /* diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index b7dc0cbdb59b..83f2bd967161 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -163,7 +163,7 @@ static void tick_setup_device(struct tick_device *td, * this cpu: */ if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) { - if (!tick_nohz_extended_cpu(cpu)) + if (!tick_nohz_full_cpu(cpu)) tick_do_timer_cpu = cpu; else tick_do_timer_cpu = TICK_DO_TIMER_NONE; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index e057d338daa4..369b5769fc97 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -113,7 +113,7 @@ static void tick_sched_do_timer(ktime_t now) * jiffies_lock. */ if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE) - && !tick_nohz_extended_cpu(cpu)) + && !tick_nohz_full_cpu(cpu)) tick_do_timer_cpu = cpu; #endif @@ -143,29 +143,29 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) profile_tick(CPU_PROFILING); } -#ifdef CONFIG_NO_HZ_EXTENDED -static cpumask_var_t nohz_extended_mask; -bool have_nohz_extended_mask; +#ifdef CONFIG_NO_HZ_FULL +static cpumask_var_t nohz_full_mask; +bool have_nohz_full_mask; -int tick_nohz_extended_cpu(int cpu) +int tick_nohz_full_cpu(int cpu) { - if (!have_nohz_extended_mask) + if (!have_nohz_full_mask) return 0; - return cpumask_test_cpu(cpu, nohz_extended_mask); + return cpumask_test_cpu(cpu, nohz_full_mask); } /* Parse the boot-time nohz CPU list from the kernel parameters. */ -static int __init tick_nohz_extended_setup(char *str) +static int __init tick_nohz_full_setup(char *str) { - alloc_bootmem_cpumask_var(&nohz_extended_mask); - if (cpulist_parse(str, nohz_extended_mask) < 0) - pr_warning("NOHZ: Incorrect nohz_extended cpumask\n"); + alloc_bootmem_cpumask_var(&nohz_full_mask); + if (cpulist_parse(str, nohz_full_mask) < 0) + pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); else - have_nohz_extended_mask = true; + have_nohz_full_mask = true; return 1; } -__setup("nohz_extended=", tick_nohz_extended_setup); +__setup("nohz_full=", tick_nohz_full_setup); static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, unsigned long action, @@ -179,7 +179,7 @@ static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, * If we handle the timekeeping duty for full dynticks CPUs, * we can't safely shutdown that CPU. */ - if (have_nohz_extended_mask && tick_do_timer_cpu == cpu) + if (have_nohz_full_mask && tick_do_timer_cpu == cpu) return -EINVAL; break; } @@ -191,20 +191,20 @@ static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, * separations: 0,2,4,6,... * This is NR_CPUS + sizeof('\0') */ -static char __initdata nohz_ext_buf[NR_CPUS + 1]; +static char __initdata nohz_full_buf[NR_CPUS + 1]; -static int __init init_tick_nohz_extended(void) +static int __init init_tick_nohz_full(void) { cpumask_var_t online_nohz; int cpu; - if (!have_nohz_extended_mask) + if (!have_nohz_full_mask) return 0; cpu_notifier(tick_nohz_cpu_down_callback, 0); if (!zalloc_cpumask_var(&online_nohz, GFP_KERNEL)) { - pr_warning("NO_HZ: Not enough memory to check extended nohz mask\n"); + pr_warning("NO_HZ: Not enough memory to check full nohz mask\n"); return -ENOMEM; } @@ -215,31 +215,31 @@ static int __init init_tick_nohz_extended(void) get_online_cpus(); /* Ensure we keep a CPU outside the dynticks range for timekeeping */ - cpumask_and(online_nohz, cpu_online_mask, nohz_extended_mask); + cpumask_and(online_nohz, cpu_online_mask, nohz_full_mask); if (cpumask_equal(online_nohz, cpu_online_mask)) { pr_warning("NO_HZ: Must keep at least one online CPU " - "out of nohz_extended range\n"); + "out of nohz_full range\n"); /* * We know the current CPU doesn't have its tick stopped. * Let's use it for the timekeeping duty. */ preempt_disable(); cpu = smp_processor_id(); - pr_warning("NO_HZ: Clearing %d from nohz_extended range\n", cpu); - cpumask_clear_cpu(cpu, nohz_extended_mask); + pr_warning("NO_HZ: Clearing %d from nohz_full range\n", cpu); + cpumask_clear_cpu(cpu, nohz_full_mask); preempt_enable(); } put_online_cpus(); free_cpumask_var(online_nohz); - cpulist_scnprintf(nohz_ext_buf, sizeof(nohz_ext_buf), nohz_extended_mask); - pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_ext_buf); + cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); + pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); return 0; } -core_initcall(init_tick_nohz_extended); +core_initcall(init_tick_nohz_full); #else -#define have_nohz_extended_mask (0) +#define have_nohz_full_mask (0) #endif /* @@ -589,7 +589,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) return false; } - if (have_nohz_extended_mask) { + if (have_nohz_full_mask) { /* * Keep the tick alive to guarantee timekeeping progression * if there are full dynticks CPUs around -- cgit v1.2.3 From 5b533f4ff5b6d8f2442f2ac82ca1530ff3f55267 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 13 Apr 2013 16:53:35 +0200 Subject: nohz: Align periodic tick Kconfig with other choices' naming convention Rename CONFIG_PERIODIC_HZ to CONFIG_HZ_PERIODIC in order to stay consistent with other tick implementation entries: CONFIG_HZ_PERIODIC CONFIG_NO_HZ_IDLE CONFIG_NO_HZ_FULL Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 4a17b5069466..3b683227a10d 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -79,7 +79,7 @@ choice prompt "Timer tick handling" default NO_HZ_IDLE if NO_HZ -config PERIODIC_HZ +config HZ_PERIODIC bool "Periodic timer ticks (constant rate, no dynticks)" help This option keeps the tick running periodically at a constant -- cgit v1.2.3 From fae30dd6698ecaa93cb50213266c224bc550c32c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 13 Apr 2013 17:04:04 +0200 Subject: nohz: Improve a bit the full dynticks Kconfig documentation Remove the "single task" statement from CONFIG_NO_HZ_FULL title. The constraint can be invalidated when tasks from other sched classes than SCHED_FAIR are running. Moreover it's possible that hrtick join the party in the future. Also add a line about the dependency on SMP. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 3b683227a10d..358d601a4fec 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -97,11 +97,13 @@ config NO_HZ_IDLE Most of the time you want to say Y here. config NO_HZ_FULL - bool "Full dynticks system (tickless single task)" + bool "Full dynticks system (tickless)" # NO_HZ_COMMON dependency depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS - # RCU_USER_QS - depends on HAVE_CONTEXT_TRACKING && SMP + # We need at least one periodic CPU for timekeeping + depends on SMP + # RCU_USER_QS dependency + depends on HAVE_CONTEXT_TRACKING # RCU_NOCB_CPU dependency depends on TREE_RCU || TREE_PREEMPT_RCU depends on VIRT_CPU_ACCOUNTING_GEN -- cgit v1.2.3 From 76c24fb054b52b34af4dcde589cbb9e2b98fc74c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Apr 2013 00:15:40 +0200 Subject: nohz: New APIs to re-evaluate the tick on full dynticks CPUs Provide two new helpers in order to notify the full dynticks CPUs about some internal system changes against which they may reconsider the state of their tick. Some practical examples include: posix cpu timers, perf tick and sched clock tick. For now the notifying handler, implemented through IPIs, is a stub that will be implemented when we get the tick stop/restart infrastructure in. Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 1 + kernel/time/tick-sched.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 358d601a4fec..fbb4c7eb92a0 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -111,6 +111,7 @@ config NO_HZ_FULL select RCU_USER_QS select RCU_NOCB_CPU select CONTEXT_TRACKING_FORCE + select IRQ_WORK help Adaptively try to shutdown the tick whenever possible, even when the CPU is running tasks. Typically this requires running a single diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 369b5769fc97..2bcad5b904d8 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -147,6 +147,57 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) static cpumask_var_t nohz_full_mask; bool have_nohz_full_mask; +/* + * Re-evaluate the need for the tick on the current CPU + * and restart it if necessary. + */ +static void tick_nohz_full_check(void) +{ + /* + * STUB for now, will be filled with the full tick stop/restart + * infrastructure patches + */ +} + +static void nohz_full_kick_work_func(struct irq_work *work) +{ + tick_nohz_full_check(); +} + +static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { + .func = nohz_full_kick_work_func, +}; + +/* + * Kick the current CPU if it's full dynticks in order to force it to + * re-evaluate its dependency on the tick and restart it if necessary. + */ +void tick_nohz_full_kick(void) +{ + if (tick_nohz_full_cpu(smp_processor_id())) + irq_work_queue(&__get_cpu_var(nohz_full_kick_work)); +} + +static void nohz_full_kick_ipi(void *info) +{ + tick_nohz_full_check(); +} + +/* + * Kick all full dynticks CPUs in order to force these to re-evaluate + * their dependency on the tick and restart it if necessary. + */ +void tick_nohz_full_kick_all(void) +{ + if (!have_nohz_full_mask) + return; + + preempt_disable(); + smp_call_function_many(nohz_full_mask, + nohz_full_kick_ipi, NULL, false); + preempt_enable(); +} + int tick_nohz_full_cpu(int cpu) { if (!have_nohz_full_mask) -- cgit v1.2.3 From 0453b435df0d69dd0d8c42eb9b3015aaf0d8a032 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 27 Mar 2013 02:18:34 +0100 Subject: nohz: Force boot CPU outside full dynticks range The timekeeping job must be able to run early on boot because there may be some pre-SMP (and thus pre-initcalls ) components that rely on it. The IO-APIC is one such users as it tests the timer health by watching jiffies progression. Given that it happens before we know the initial online set, we can't rely on it to select a timekeeper. We need one before SMP time otherwise we simply crash on boot. To fix this and keep things simple for now, force the boot CPU outside of the full dynticks range in any case and do this early on kernel parameter parsing time. We might want a trickier solution later, expecially for aSMP architectures that need to assign housekeeping tasks to arbitrary low power CPUs. But it's still first pass KISS time for now. Reviewed-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/tick-sched.c | 54 ++++++++++++++---------------------------------- 1 file changed, 15 insertions(+), 39 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 369b5769fc97..2bac5ea2c9af 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -158,11 +158,21 @@ int tick_nohz_full_cpu(int cpu) /* Parse the boot-time nohz CPU list from the kernel parameters. */ static int __init tick_nohz_full_setup(char *str) { + int cpu; + alloc_bootmem_cpumask_var(&nohz_full_mask); - if (cpulist_parse(str, nohz_full_mask) < 0) + if (cpulist_parse(str, nohz_full_mask) < 0) { pr_warning("NOHZ: Incorrect nohz_full cpumask\n"); - else - have_nohz_full_mask = true; + return 1; + } + + cpu = smp_processor_id(); + if (cpumask_test_cpu(cpu, nohz_full_mask)) { + pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu); + cpumask_clear_cpu(cpu, nohz_full_mask); + } + have_nohz_full_mask = true; + return 1; } __setup("nohz_full=", tick_nohz_full_setup); @@ -195,42 +205,8 @@ static char __initdata nohz_full_buf[NR_CPUS + 1]; static int __init init_tick_nohz_full(void) { - cpumask_var_t online_nohz; - int cpu; - - if (!have_nohz_full_mask) - return 0; - - cpu_notifier(tick_nohz_cpu_down_callback, 0); - - if (!zalloc_cpumask_var(&online_nohz, GFP_KERNEL)) { - pr_warning("NO_HZ: Not enough memory to check full nohz mask\n"); - return -ENOMEM; - } - - /* - * CPUs can probably not be concurrently offlined on initcall time. - * But we are paranoid, aren't we? - */ - get_online_cpus(); - - /* Ensure we keep a CPU outside the dynticks range for timekeeping */ - cpumask_and(online_nohz, cpu_online_mask, nohz_full_mask); - if (cpumask_equal(online_nohz, cpu_online_mask)) { - pr_warning("NO_HZ: Must keep at least one online CPU " - "out of nohz_full range\n"); - /* - * We know the current CPU doesn't have its tick stopped. - * Let's use it for the timekeeping duty. - */ - preempt_disable(); - cpu = smp_processor_id(); - pr_warning("NO_HZ: Clearing %d from nohz_full range\n", cpu); - cpumask_clear_cpu(cpu, nohz_full_mask); - preempt_enable(); - } - put_online_cpus(); - free_cpumask_var(online_nohz); + if (have_nohz_full_mask) + cpu_notifier(tick_nohz_cpu_down_callback, 0); cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); -- cgit v1.2.3 From d1e43fa5f8bb25f83a86a29f11fcfb57ed4d7566 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 26 Mar 2013 23:47:24 +0100 Subject: nohz: Ensure full dynticks CPUs are RCU nocbs We need full dynticks CPU to also be RCU nocb so that we don't have to keep the tick to handle RCU callbacks. Make sure the range passed to nohz_full= boot parameter is a subset of rcu_nocbs= The CPUs that fail to meet this requirement will be excluded from the nohz_full range. This is checked early in boot time, before any CPU has the opportunity to stop its tick. Suggested-by: Steven Rostedt Reviewed-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/tick-sched.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 2bac5ea2c9af..d71a5f2bd7b2 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -203,17 +203,27 @@ static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, */ static char __initdata nohz_full_buf[NR_CPUS + 1]; -static int __init init_tick_nohz_full(void) +void __init tick_nohz_init(void) { - if (have_nohz_full_mask) - cpu_notifier(tick_nohz_cpu_down_callback, 0); + int cpu; + + if (!have_nohz_full_mask) + return; + + cpu_notifier(tick_nohz_cpu_down_callback, 0); + + /* Make sure full dynticks CPU are also RCU nocbs */ + for_each_cpu(cpu, nohz_full_mask) { + if (!rcu_is_nocb_cpu(cpu)) { + pr_warning("NO_HZ: CPU %d is not RCU nocb: " + "cleared from nohz_full range", cpu); + cpumask_clear_cpu(cpu, nohz_full_mask); + } + } cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); - - return 0; } -core_initcall(init_tick_nohz_full); #else #define have_nohz_full_mask (0) #endif -- cgit v1.2.3 From f98823ac758ba1aa77c6e3f8ad4ef3ad84ee0a7c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 27 Mar 2013 01:17:22 +0100 Subject: nohz: New option to default all CPUs in full dynticks range Provide a new kernel config that defaults all CPUs to be part of the full dynticks range, except the boot one for timekeeping. This default setting is overriden by the nohz_full= boot option if passed by the user. This is helpful for those who don't need a finegrained range of full dynticks CPU and also for automated testing. Suggested-by: Ingo Molnar Reviewed-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 10 ++++++++++ kernel/time/tick-sched.c | 23 +++++++++++++++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 358d601a4fec..99c3f13dd478 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -128,6 +128,16 @@ config NO_HZ_FULL endchoice +config NO_HZ_FULL_ALL + bool "Full dynticks system on all CPUs by default" + depends on NO_HZ_FULL + help + If the user doesn't pass the nohz_full boot option to + define the range of full dynticks CPUs, consider that all + CPUs in the system are full dynticks by default. + Note the boot CPU will still be kept outside the range to + handle the timekeeping duty. + config NO_HZ bool "Old Idle dynticks config" depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d71a5f2bd7b2..a76e09044f9f 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -203,12 +203,31 @@ static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb, */ static char __initdata nohz_full_buf[NR_CPUS + 1]; +static int tick_nohz_init_all(void) +{ + int err = -1; + +#ifdef CONFIG_NO_HZ_FULL_ALL + if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) { + pr_err("NO_HZ: Can't allocate full dynticks cpumask\n"); + return err; + } + err = 0; + cpumask_setall(nohz_full_mask); + cpumask_clear_cpu(smp_processor_id(), nohz_full_mask); + have_nohz_full_mask = true; +#endif + return err; +} + void __init tick_nohz_init(void) { int cpu; - if (!have_nohz_full_mask) - return; + if (!have_nohz_full_mask) { + if (tick_nohz_init_all() < 0) + return; + } cpu_notifier(tick_nohz_cpu_down_callback, 0); -- cgit v1.2.3 From ff442c51f6543378cf23107c75b7949dc64a9119 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 20 Apr 2013 15:27:08 +0200 Subject: nohz: Re-evaluate the tick from the scheduler IPI The scheduler IPI is used by the scheduler to kick full dynticks CPUs asynchronously when more than one task are running or when a new timer list timer is enqueued. This way the destination CPU can decide to restart the tick to handle this new situation. Now let's call that kick in the scheduler IPI. (Reusing the scheduler IPI rather than implementing a new IPI was suggested by Peter Zijlstra a while ago) Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/tick-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 884a9f302a06..4d74a68b2c34 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -151,7 +151,7 @@ bool have_nohz_full_mask; * Re-evaluate the need for the tick on the current CPU * and restart it if necessary. */ -static void tick_nohz_full_check(void) +void tick_nohz_full_check(void) { /* * STUB for now, will be filled with the full tick stop/restart -- cgit v1.2.3 From 9014c45d9e2dbb935498a5f1d106e220e8624643 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 20 Apr 2013 15:43:57 +0200 Subject: nohz: Implement full dynticks kick Implement the full dynticks kick that is performed from IPIs sent by various subsystems (scheduler, posix timers, ...) when they want to notify about a new event that may reconsider the dependency on the tick. Most of the time, such an event end up restarting the tick. (Part of the design with subsystems providing *_can_stop_tick() helpers suggested by Peter Zijlstra a while ago). Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/tick-sched.c | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 4d74a68b2c34..95d79aeb3e27 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include @@ -147,16 +149,48 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) static cpumask_var_t nohz_full_mask; bool have_nohz_full_mask; +static bool can_stop_full_tick(void) +{ + WARN_ON_ONCE(!irqs_disabled()); + + if (!sched_can_stop_tick()) + return false; + + if (!posix_cpu_timers_can_stop_tick(current)) + return false; + + if (!perf_event_can_stop_tick()) + return false; + + /* sched_clock_tick() needs us? */ +#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK + /* + * TODO: kick full dynticks CPUs when + * sched_clock_stable is set. + */ + if (!sched_clock_stable) + return false; +#endif + + return true; +} + +static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now); + /* * Re-evaluate the need for the tick on the current CPU * and restart it if necessary. */ void tick_nohz_full_check(void) { - /* - * STUB for now, will be filled with the full tick stop/restart - * infrastructure patches - */ + struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); + + if (tick_nohz_full_cpu(smp_processor_id())) { + if (ts->tick_stopped && !is_idle_task(current)) { + if (!can_stop_full_tick()) + tick_nohz_restart_sched_tick(ts, ktime_get()); + } + } } static void nohz_full_kick_work_func(struct irq_work *work) -- cgit v1.2.3 From 5811d9963e26146898a24b535b301f7654257f8a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 20 Apr 2013 16:40:31 +0200 Subject: nohz: Prepare to stop the tick on irq exit Interrupt exit is a natural place to stop the tick: it happens after all events happening before and during the irq which are liable to update the dependency on the tick occured. Also it makes sure that any check on tick dependency is well ordered against dynticks kick IPIs. Bring in the infrastructure that performs the tick dependency checks on irq exit and shut it down if these checks show that we can do it safely. Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/tick-sched.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 95d79aeb3e27..d0ed1905a85c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -647,6 +647,24 @@ out: return ret; } +static void tick_nohz_full_stop_tick(struct tick_sched *ts) +{ +#ifdef CONFIG_NO_HZ_FULL + int cpu = smp_processor_id(); + + if (!tick_nohz_full_cpu(cpu) || is_idle_task(current)) + return; + + if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE) + return; + + if (!can_stop_full_tick()) + return; + + tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); +#endif +} + static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) { /* @@ -773,12 +791,13 @@ void tick_nohz_irq_exit(void) { struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); - if (!ts->inidle) - return; - - /* Cancel the timer because CPU already waken up from the C-states*/ - menu_hrtimer_cancel(); - __tick_nohz_idle_enter(ts); + if (ts->inidle) { + /* Cancel the timer because CPU already waken up from the C-states*/ + menu_hrtimer_cancel(); + __tick_nohz_idle_enter(ts); + } else { + tick_nohz_full_stop_tick(ts); + } } /** -- cgit v1.2.3 From 99e5ada9407cc19d7c4c05ce2165f20dc46fc093 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 20 Apr 2013 17:11:50 +0200 Subject: nohz: Re-evaluate the tick for the new task after a context switch When a task is scheduled in, it may have some properties of its own that could make the CPU reconsider the need for the tick: posix cpu timers, perf events, ... So notify the full dynticks subsystem when a task gets scheduled in and re-check the tick dependency at this stage. This is done through a self IPI to avoid messing up with any current lock scenario. Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/tick-sched.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d0ed1905a85c..12a900dbb819 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -232,6 +232,26 @@ void tick_nohz_full_kick_all(void) preempt_enable(); } +/* + * Re-evaluate the need for the tick as we switch the current task. + * It might need the tick due to per task/process properties: + * perf events, posix cpu timers, ... + */ +void tick_nohz_task_switch(struct task_struct *tsk) +{ + unsigned long flags; + + if (!tick_nohz_full_cpu(smp_processor_id())) + return; + + local_irq_save(flags); + + if (tick_nohz_tick_stopped() && !can_stop_full_tick()) + tick_nohz_full_kick(); + + local_irq_restore(flags); +} + int tick_nohz_full_cpu(int cpu) { if (!have_nohz_full_mask) -- cgit v1.2.3 From 0637e029392386e6996f5d6574aadccee8315efa Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 22 Apr 2013 21:15:59 +0200 Subject: nohz: Select wide RCU nocb for full dynticks It makes testing and implementation much easier as we know in advance that all CPUs are RCU nocbs. Also this prepares to remove the dynamic check for nohz_full= boot mask to be a subset of rcu_nocbs= Eventually this should also help removing the requirement for the boot CPU to be outside the full dynticks range. Suggested-by: Christoph Lameter Suggested-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/time') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index f6a792ab4983..e1ac129b01c9 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -110,6 +110,7 @@ config NO_HZ_FULL select NO_HZ_COMMON select RCU_USER_QS select RCU_NOCB_CPU + select RCU_NOCB_CPU_ALL select CONTEXT_TRACKING_FORCE select IRQ_WORK help -- cgit v1.2.3 From cb41a29076e9f95547da46578d5c8804f7b8845d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 20 Apr 2013 17:35:50 +0200 Subject: nohz: Add basic tracing It's not obvious to find out why the full dynticks subsystem doesn't always stop the tick: whether this is due to kthreads, posix timers, perf events, etc... These new tracepoints are here to help the user diagnose the failures and test this feature. Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/tick-sched.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 12a900dbb819..85e05ab98253 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -28,6 +28,8 @@ #include "tick-internal.h" +#include + /* * Per cpu nohz control structure */ @@ -153,14 +155,20 @@ static bool can_stop_full_tick(void) { WARN_ON_ONCE(!irqs_disabled()); - if (!sched_can_stop_tick()) + if (!sched_can_stop_tick()) { + trace_tick_stop(0, "more than 1 task in runqueue\n"); return false; + } - if (!posix_cpu_timers_can_stop_tick(current)) + if (!posix_cpu_timers_can_stop_tick(current)) { + trace_tick_stop(0, "posix timers running\n"); return false; + } - if (!perf_event_can_stop_tick()) + if (!perf_event_can_stop_tick()) { + trace_tick_stop(0, "perf events running\n"); return false; + } /* sched_clock_tick() needs us? */ #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK @@ -168,8 +176,10 @@ static bool can_stop_full_tick(void) * TODO: kick full dynticks CPUs when * sched_clock_stable is set. */ - if (!sched_clock_stable) + if (!sched_clock_stable) { + trace_tick_stop(0, "unstable sched clock\n"); return false; + } #endif return true; @@ -631,6 +641,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, ts->last_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; + trace_tick_stop(1, " "); } /* -- cgit v1.2.3 From 65e709dc0c25dbd563861924815e9a3a93878b75 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 20 Apr 2013 17:35:50 +0200 Subject: nohz: Remove full dynticks' superfluous dependency on RCU tree Remove the dependency on (TREE_RCU || TREE_PREEMPT_RCU). The full dynticks option already depends on SMP which implies (whatever flavour of) RCU tree config anyway. Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index e1ac129b01c9..1ea2bba4a686 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -104,8 +104,6 @@ config NO_HZ_FULL depends on SMP # RCU_USER_QS dependency depends on HAVE_CONTEXT_TRACKING - # RCU_NOCB_CPU dependency - depends on TREE_RCU || TREE_PREEMPT_RCU depends on VIRT_CPU_ACCOUNTING_GEN select NO_HZ_COMMON select RCU_USER_QS -- cgit v1.2.3 From 47aa8b6cbcb839efe2edaa5b50fee21df115d37b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 26 Apr 2013 10:05:59 +0200 Subject: nohz: Reduce overhead under high-freq idling patterns One testbox of mine (Intel Nehalem, 16-way) uses MWAIT for its idle routine, which apparently can break out of its idle loop rather frequently, with high frequency. In that case NO_HZ_FULL=y kernels show high ksoftirqd overhead and constant context switching, because tick_nohz_stop_sched_tick() will, if delta_jiffies == 0, mis-identify this as a timer event - activating the TIMER_SOFTIRQ, which wakes up ksoftirqd. Fix this by treating delta_jiffies == 0 the same way we treat other short wakeups, delta_jiffies == 1. Cc: Frederic Weisbecker Cc: Chris Metcalf Cc: Christoph Lameter Cc: Geoff Levand Cc: Gilad Ben Yossef Cc: Hakan Akkan Cc: Kevin Hilman Cc: Li Zhong Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/time/tick-sched.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 85e05ab98253..da53c8f2beb5 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -565,11 +565,12 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, delta_jiffies = rcu_delta_jiffies; } } + /* - * Do not stop the tick, if we are only one off - * or if the cpu is required for rcu + * Do not stop the tick, if we are only one off (or less) + * or if the cpu is required for RCU: */ - if (!ts->tick_stopped && delta_jiffies == 1) + if (!ts->tick_stopped && delta_jiffies <= 1) goto out; /* Schedule the tick, if we are at least one jiffie off */ -- cgit v1.2.3 From c58b0df12a6b5c497637db0676effd00e1fbab13 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 26 Apr 2013 15:16:31 +0200 Subject: nohz: Select VIRT_CPU_ACCOUNTING_GEN from full dynticks config Turn the full dynticks passive dependency on VIRT_CPU_ACCOUNTING_GEN to an active one. The full dynticks Kconfig is currently hidden behind the full dynticks cputime accounting, which is an awkward and counter-intuitive layout: the user first has to select the dynticks cputime accounting in order to make the full dynticks feature to be visible. We definetly want it the other way around. The usual way to perform this kind of active dependency is use "select" on the depended target. Now we can't use the Kconfig "select" instruction when the target is a "choice". So this patch inspires on how the RCU subsystem Kconfig interact with its dependencies on SMP and PREEMPT: we make sure that cputime accounting can't propose another option than VIRT_CPU_ACCOUNTING_GEN when NO_HZ_FULL is selected by using the right "depends on" instruction for each cputime accounting choices. v2: Keep full dynticks cputime accounting available even without full dynticks, as per Paul McKenney's suggestion. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Christoph Lameter Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 1ea2bba4a686..a2ddd650cb92 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -104,11 +104,13 @@ config NO_HZ_FULL depends on SMP # RCU_USER_QS dependency depends on HAVE_CONTEXT_TRACKING - depends on VIRT_CPU_ACCOUNTING_GEN + # VIRT_CPU_ACCOUNTING_GEN dependency + depends on 64BIT select NO_HZ_COMMON select RCU_USER_QS select RCU_NOCB_CPU select RCU_NOCB_CPU_ALL + select VIRT_CPU_ACCOUNTING_GEN select CONTEXT_TRACKING_FORCE select IRQ_WORK help -- cgit v1.2.3 From 6296ace467c8640317414ba589b124323806f7ce Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Sun, 28 Apr 2013 11:25:58 +0800 Subject: nohz: Protect smp_processor_id() in tick_nohz_task_switch() I saw following error when testing the latest nohz code on Power: [ 85.295384] BUG: using smp_processor_id() in preemptible [00000000] code: rsyslogd/3493 [ 85.295396] caller is .tick_nohz_task_switch+0x1c/0xb8 [ 85.295402] Call Trace: [ 85.295408] [c0000001fababab0] [c000000000012dc4] .show_stack+0x110/0x25c (unreliable) [ 85.295420] [c0000001fababba0] [c0000000007c4b54] .dump_stack+0x20/0x30 [ 85.295430] [c0000001fababc10] [c00000000044eb74] .debug_smp_processor_id+0xf4/0x124 [ 85.295438] [c0000001fababca0] [c0000000000d7594] .tick_nohz_task_switch+0x1c/0xb8 [ 85.295447] [c0000001fababd20] [c0000000000b9748] .finish_task_switch+0x13c/0x160 [ 85.295455] [c0000001fababdb0] [c0000000000bbe50] .schedule_tail+0x50/0x124 [ 85.295463] [c0000001fababe30] [c000000000009dc8] .ret_from_fork+0x4/0x54 The code below moves the test into local_irq_save/restore section to avoid the above complaint. Signed-off-by: Li Zhong Acked-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul McKenney Link: http://lkml.kernel.org/r/1367119558.6391.34.camel@ThinkPad-T5421.cn.ibm.com Signed-off-by: Ingo Molnar --- kernel/time/tick-sched.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index da53c8f2beb5..1c9f53b2ddb7 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -251,14 +251,15 @@ void tick_nohz_task_switch(struct task_struct *tsk) { unsigned long flags; - if (!tick_nohz_full_cpu(smp_processor_id())) - return; - local_irq_save(flags); + if (!tick_nohz_full_cpu(smp_processor_id())) + goto out; + if (tick_nohz_tick_stopped() && !can_stop_full_tick()) tick_nohz_full_kick(); +out: local_irq_restore(flags); } -- cgit v1.2.3 From 73c30828771acafb0a5e3a1c4cf75e6c5dc5f98a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 3 May 2013 01:28:12 +0200 Subject: rcu: Fix full dynticks' dependency on wide RCU nocb mode Commit 0637e029392386e6996f5d6574aadccee8315efa ("nohz: Select wide RCU nocb for full dynticks") intended to force CONFIG_RCU_NOCB_CPU_ALL=y when full dynticks is enabled. However this option is part of a choice menu and Kconfig's "select" instruction has no effect on such targets. Fix this by using reverse dependencies on the targets we don't want instead. Reviewed-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker Cc: Christoph Lameter Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/Kconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index a2ddd650cb92..e4c07b0692bb 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -109,7 +109,6 @@ config NO_HZ_FULL select NO_HZ_COMMON select RCU_USER_QS select RCU_NOCB_CPU - select RCU_NOCB_CPU_ALL select VIRT_CPU_ACCOUNTING_GEN select CONTEXT_TRACKING_FORCE select IRQ_WORK -- cgit v1.2.3 From 265f22a975c1e4cc3a4d1f94a3ec53ffbb6f5b9f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 3 May 2013 03:39:05 +0200 Subject: sched: Keep at least 1 tick per second for active dynticks tasks The scheduler doesn't yet fully support environments with a single task running without a periodic tick. In order to ensure we still maintain the duties of scheduler_tick(), keep at least 1 tick per second. This makes sure that we keep the progression of various scheduler accounting and background maintainance even with a very low granularity. Examples include cpu load, sched average, CFS entity vruntime, avenrun and events such as load balancing, amongst other details handled in sched_class::task_tick(). This limitation will be removed in the future once we get these individual items to work in full dynticks CPUs. Suggested-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Christoph Lameter Cc: Hakan Akkan Cc: Ingo Molnar Cc: Kevin Hilman Cc: Li Zhong Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- kernel/time/tick-sched.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 1c9f53b2ddb7..07929c633570 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -600,6 +600,13 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, time_delta = KTIME_MAX; } +#ifdef CONFIG_NO_HZ_FULL + if (!ts->inidle) { + time_delta = min(time_delta, + scheduler_tick_max_deferment()); + } +#endif + /* * calculate the expiry time for the next timer wheel * timer. delta_jiffies >= NEXT_TIMER_MAX_DELTA signals -- cgit v1.2.3