From 0e7767687fdabfc58d5046e7488632bf2ecd4d0c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Apr 2018 18:58:27 +0200 Subject: time: tick-sched: Reorganize idle tick management code Prepare the scheduler tick code for reworking the idle loop to avoid stopping the tick in some cases. The idea is to split the nohz idle entry call to decouple the idle time stats accounting and preparatory work from the actual tick stop code, in order to later be able to delay the tick stop once we reach more power-knowledgeable callers. Move away the tick_nohz_start_idle() invocation from __tick_nohz_idle_enter(), rename the latter to __tick_nohz_idle_stop_tick() and define tick_nohz_idle_stop_tick() as a wrapper around it for calling it from the outside. Make tick_nohz_idle_enter() only call tick_nohz_start_idle() instead of calling the entire __tick_nohz_idle_enter(), add another wrapper disabling and enabling interrupts around tick_nohz_idle_stop_tick() and make the current callers of tick_nohz_idle_enter() call it too to retain their current functionality. Signed-off-by: Rafael J. Wysocki Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) --- include/linux/tick.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 7f8c9a127f5a..1d253df9ea3c 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -115,6 +115,7 @@ enum tick_dep_bits { extern bool tick_nohz_enabled; extern bool tick_nohz_tick_stopped(void); extern bool tick_nohz_tick_stopped_cpu(int cpu); +extern void tick_nohz_idle_stop_tick(void); extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); @@ -123,10 +124,19 @@ extern unsigned long tick_nohz_get_idle_calls(void); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); + +static inline void tick_nohz_idle_stop_tick_protected(void) +{ + local_irq_disable(); + tick_nohz_idle_stop_tick(); + local_irq_enable(); +} + #else /* !CONFIG_NO_HZ_COMMON */ #define tick_nohz_enabled (0) static inline int tick_nohz_tick_stopped(void) { return 0; } static inline int tick_nohz_tick_stopped_cpu(int cpu) { return 0; } +static inline void tick_nohz_idle_stop_tick(void) { } static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } @@ -136,6 +146,8 @@ static inline ktime_t tick_nohz_get_sleep_length(void) } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } + +static inline void tick_nohz_idle_stop_tick_protected(void) { } #endif /* !CONFIG_NO_HZ_COMMON */ #ifdef CONFIG_NO_HZ_FULL -- cgit v1.2.3 From 2aaf709a518d26563b80fd7a42379d7aa7ffed4a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 15 Mar 2018 23:05:50 +0100 Subject: sched: idle: Do not stop the tick upfront in the idle loop Push the decision whether or not to stop the tick somewhat deeper into the idle loop. Stopping the tick upfront leads to unpleasant outcomes in case the idle governor doesn't agree with the nohz code on the duration of the upcoming idle period. Specifically, if the tick has been stopped and the idle governor predicts short idle, the situation is bad regardless of whether or not the prediction is accurate. If it is accurate, the tick has been stopped unnecessarily which means excessive overhead. If it is not accurate, the CPU is likely to spend too much time in the (shallow, because short idle has been predicted) idle state selected by the governor [1]. As the first step towards addressing this problem, change the code to make the tick stopping decision inside of the loop in do_idle(). In particular, do not stop the tick in the cpu_idle_poll() code path. Also don't do that in tick_nohz_irq_exit() which doesn't really have enough information on whether or not to stop the tick. Link: https://marc.info/?l=linux-pm&m=150116085925208&w=2 # [1] Link: https://tu-dresden.de/zih/forschung/ressourcen/dateien/projekte/haec/powernightmares.pdf Suggested-by: Frederic Weisbecker Signed-off-by: Rafael J. Wysocki Reviewed-by: Frederic Weisbecker Acked-by: Peter Zijlstra (Intel) --- include/linux/tick.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 1d253df9ea3c..fccebfba167e 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -116,6 +116,7 @@ extern bool tick_nohz_enabled; extern bool tick_nohz_tick_stopped(void); extern bool tick_nohz_tick_stopped_cpu(int cpu); extern void tick_nohz_idle_stop_tick(void); +extern void tick_nohz_idle_restart_tick(void); extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); @@ -137,6 +138,7 @@ static inline void tick_nohz_idle_stop_tick_protected(void) static inline int tick_nohz_tick_stopped(void) { return 0; } static inline int tick_nohz_tick_stopped_cpu(int cpu) { return 0; } static inline void tick_nohz_idle_stop_tick(void) { } +static inline void tick_nohz_idle_restart_tick(void) { } static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } -- cgit v1.2.3 From efefc97736e6f3261879bc9dddcb161224a455f5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 20 Mar 2018 10:11:28 +0100 Subject: jiffies: Introduce USER_TICK_USEC and redefine TICK_USEC Since the subsequent changes will need a TICK_USEC definition analogous to TICK_NSEC, rename the existing TICK_USEC as USER_TICK_USEC, update its users and redefine TICK_USEC accordingly. Suggested-by: Peter Zijlstra Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Reviewed-by: Frederic Weisbecker --- include/linux/jiffies.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 9385aa57497b..a27cf6652327 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -62,8 +62,11 @@ extern int register_refined_jiffies(long clock_tick_rate); /* TICK_NSEC is the time between ticks in nsec assuming SHIFTED_HZ */ #define TICK_NSEC ((NSEC_PER_SEC+HZ/2)/HZ) -/* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */ -#define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ) +/* TICK_USEC is the time between ticks in usec assuming SHIFTED_HZ */ +#define TICK_USEC ((USEC_PER_SEC + HZ/2) / HZ) + +/* USER_TICK_USEC is the time between ticks in usec assuming fake USER_HZ */ +#define USER_TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ) #ifndef __jiffy_arch_data #define __jiffy_arch_data -- cgit v1.2.3 From 45f1ff59e27ca59d33cc1a317e669d90022ccf7d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 22 Mar 2018 17:50:49 +0100 Subject: cpuidle: Return nohz hint from cpuidle_select() Add a new pointer argument to cpuidle_select() and to the ->select cpuidle governor callback to allow a boolean value indicating whether or not the tick should be stopped before entering the selected state to be returned from there. Make the ladder governor ignore that pointer (to preserve its current behavior) and make the menu governor return 'false" through it if: (1) the idle exit latency is constrained at 0, or (2) the selected state is a polling one, or (3) the expected idle period duration is within the tick period range. In addition to that, the correction factor computations in the menu governor need to take the possibility that the tick may not be stopped into account to avoid artificially small correction factor values. To that end, add a mechanism to record tick wakeups, as suggested by Peter Zijlstra, and use it to modify the menu_update() behavior when tick wakeup occurs. Namely, if the CPU is woken up by the tick and the return value of tick_nohz_get_sleep_length() is not within the tick boundary, the predicted idle duration is likely too short, so make menu_update() try to compensate for that by updating the governor statistics as though the CPU was idle for a long time. Since the value returned through the new argument pointer of cpuidle_select() is not used by its caller yet, this change by itself is not expected to alter the functionality of the code. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) --- include/linux/cpuidle.h | 8 +++++--- include/linux/tick.h | 2 ++ 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index a806e94c482f..1eefabf1621f 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -135,7 +135,8 @@ extern bool cpuidle_not_available(struct cpuidle_driver *drv, struct cpuidle_device *dev); extern int cpuidle_select(struct cpuidle_driver *drv, - struct cpuidle_device *dev); + struct cpuidle_device *dev, + bool *stop_tick); extern int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index); extern void cpuidle_reflect(struct cpuidle_device *dev, int index); @@ -167,7 +168,7 @@ static inline bool cpuidle_not_available(struct cpuidle_driver *drv, struct cpuidle_device *dev) {return true; } static inline int cpuidle_select(struct cpuidle_driver *drv, - struct cpuidle_device *dev) + struct cpuidle_device *dev, bool *stop_tick) {return -ENODEV; } static inline int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) @@ -250,7 +251,8 @@ struct cpuidle_governor { struct cpuidle_device *dev); int (*select) (struct cpuidle_driver *drv, - struct cpuidle_device *dev); + struct cpuidle_device *dev, + bool *stop_tick); void (*reflect) (struct cpuidle_device *dev, int index); }; diff --git a/include/linux/tick.h b/include/linux/tick.h index fccebfba167e..ef0717e5e526 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -120,6 +120,7 @@ extern void tick_nohz_idle_restart_tick(void); extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); +extern bool tick_nohz_idle_got_tick(void); extern ktime_t tick_nohz_get_sleep_length(void); extern unsigned long tick_nohz_get_idle_calls(void); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); @@ -141,6 +142,7 @@ static inline void tick_nohz_idle_stop_tick(void) { } static inline void tick_nohz_idle_restart_tick(void) { } static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } +static inline bool tick_nohz_idle_got_tick(void) { return false; } static inline ktime_t tick_nohz_get_sleep_length(void) { -- cgit v1.2.3 From a59855cd8c613ba4bb95147f6176360d95f75e60 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 3 Apr 2018 23:17:00 +0200 Subject: time: hrtimer: Introduce hrtimer_next_event_without() The next set of changes will need to compute the time to the next hrtimer event over all hrtimers except for the scheduler tick one. To that end introduce a new helper function, hrtimer_next_event_without(), for computing the time until the next hrtimer event over all timers except for one and modify the underlying code in __hrtimer_next_event_base() to prepare it for being called by that new function. No intentional changes in functionality. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Reviewed-by: Frederic Weisbecker --- include/linux/hrtimer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index c7902ca7c9f4..3892e9c8b2de 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -426,6 +426,7 @@ static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer) } extern u64 hrtimer_get_next_event(void); +extern u64 hrtimer_next_event_without(const struct hrtimer *exclude); extern bool hrtimer_active(const struct hrtimer *timer); -- cgit v1.2.3 From 554c8aa8ecade210d58a252173bb8f2106552a44 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 3 Apr 2018 23:17:11 +0200 Subject: sched: idle: Select idle state before stopping the tick In order to address the issue with short idle duration predictions by the idle governor after the scheduler tick has been stopped, reorder the code in cpuidle_idle_call() so that the governor idle state selection runs before tick_nohz_idle_go_idle() and use the "nohz" hint returned by cpuidle_select() to decide whether or not to stop the tick. This isn't straightforward, because menu_select() invokes tick_nohz_get_sleep_length() to get the time to the next timer event and the number returned by the latter comes from __tick_nohz_idle_stop_tick(). Fortunately, however, it is possible to compute that number without actually stopping the tick and with the help of the existing code. Namely, tick_nohz_get_sleep_length() can be made call tick_nohz_next_event(), introduced earlier, to get the time to the next non-highres timer event. If that happens, tick_nohz_next_event() need not be called by __tick_nohz_idle_stop_tick() again. If it turns out that the scheduler tick cannot be stopped going forward or the next timer event is too close for the tick to be stopped, tick_nohz_get_sleep_length() can simply return the time to the next event currently programmed into the corresponding clock event device. In addition to knowing the return value of tick_nohz_next_event(), however, tick_nohz_get_sleep_length() needs to know the time to the next highres timer event, but with the scheduler tick timer excluded, which can be computed with the help of hrtimer_get_next_event(). That minimum of that number and the tick_nohz_next_event() return value is the total time to the next timer event with the assumption that the tick will be stopped. It can be returned to the idle governor which can use it for predicting idle duration (under the assumption that the tick will be stopped) and deciding whether or not it makes sense to stop the tick before putting the CPU into the selected idle state. With the above, the sleep_length field in struct tick_sched is not necessary any more, so drop it. Link: https://bugzilla.kernel.org/show_bug.cgi?id=199227 Reported-by: Doug Smythies Reported-by: Thomas Ilsche Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Reviewed-by: Frederic Weisbecker --- include/linux/tick.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index ef0717e5e526..e8e7ff16b929 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -116,6 +116,7 @@ extern bool tick_nohz_enabled; extern bool tick_nohz_tick_stopped(void); extern bool tick_nohz_tick_stopped_cpu(int cpu); extern void tick_nohz_idle_stop_tick(void); +extern void tick_nohz_idle_retain_tick(void); extern void tick_nohz_idle_restart_tick(void); extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); @@ -139,6 +140,7 @@ static inline void tick_nohz_idle_stop_tick_protected(void) static inline int tick_nohz_tick_stopped(void) { return 0; } static inline int tick_nohz_tick_stopped_cpu(int cpu) { return 0; } static inline void tick_nohz_idle_stop_tick(void) { } +static inline void tick_nohz_idle_retain_tick(void) { } static inline void tick_nohz_idle_restart_tick(void) { } static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } -- cgit v1.2.3 From 296bb1e51a4838a6488ec5ce676607093482ecbc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 5 Apr 2018 19:12:34 +0200 Subject: cpuidle: menu: Refine idle state selection for running tick If the tick isn't stopped, the target residency of the state selected by the menu governor may be greater than the actual time to the next tick and that means lost energy. To avoid that, make tick_nohz_get_sleep_length() return the current time to the next event (before stopping the tick) in addition to the estimated one via an extra pointer argument and make menu_select() use that value to refine the state selection when necessary. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) --- include/linux/tick.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index e8e7ff16b929..55388ab45fd4 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -122,7 +122,7 @@ extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern bool tick_nohz_idle_got_tick(void); -extern ktime_t tick_nohz_get_sleep_length(void); +extern ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next); extern unsigned long tick_nohz_get_idle_calls(void); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); @@ -146,9 +146,10 @@ static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_exit(void) { } static inline bool tick_nohz_idle_got_tick(void) { return false; } -static inline ktime_t tick_nohz_get_sleep_length(void) +static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next) { - return NSEC_PER_SEC / HZ; + *delta_next = TICK_NSEC; + return *delta_next; } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } -- cgit v1.2.3