From bd624d75db21ea5402f9ecf4450b311794d80352 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 13 Feb 2015 08:54:56 +0800 Subject: clockevents: Introduce mode specific callbacks It is not possible for the clockevents core to know which modes (other than those with a corresponding feature flag) are supported by a particular implementation. And drivers are expected to handle transition to all modes elegantly, as ->set_mode() would be issued for them unconditionally. Now, adding support for a new mode complicates things a bit if we want to use the legacy ->set_mode() callback. We need to closely review all clockevents drivers to see if they would break on addition of a new mode. And after such reviews, it is found that we have to do non-trivial changes to most of the drivers [1]. Introduce mode-specific set_mode_*() callbacks, some of which the drivers may or may not implement. A missing callback would clearly convey the message that the corresponding mode isn't supported. A driver may still choose to keep supporting the legacy ->set_mode() callback, but ->set_mode() wouldn't be supporting any new modes beyond RESUME. If a driver wants to benefit from using a new mode, it would be required to migrate to the mode specific callbacks. The legacy ->set_mode() callback and the newly introduced mode-specific callbacks are mutually exclusive. Only one of them should be supported by the driver. Sanity check is done at the time of registration to distinguish between optional and required callbacks and to make error recovery and handling simpler. If the legacy ->set_mode() callback is provided, all mode specific ones would be ignored by the core but a warning is thrown if they are present. Call sites calling ->set_mode() directly are also updated to use __clockevents_set_mode() instead, as ->set_mode() may not be available anymore for few drivers. [1] https://lkml.org/lkml/2014/12/9/605 [2] https://lkml.org/lkml/2015/1/23/255 Suggested-by: Thomas Gleixner [2] Signed-off-by: Viresh Kumar Signed-off-by: Peter Zijlstra (Intel) Cc: Daniel Lezcano Cc: Frederic Weisbecker Cc: John Stultz Cc: Kevin Hilman Cc: Linus Torvalds Cc: Preeti U Murthy Cc: linaro-kernel@lists.linaro.org Cc: linaro-networking@linaro.org Link: http://lkml.kernel.org/r/792d59a40423f0acffc9bb0bec9de1341a06fa02.1423788565.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/clockevents.c | 88 +++++++++++++++++++++++++++++++++++++++++++++-- kernel/time/timer_list.c | 32 +++++++++++++++-- 2 files changed, 115 insertions(+), 5 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 55449909f114..489642b08d64 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -94,6 +94,57 @@ u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt) } EXPORT_SYMBOL_GPL(clockevent_delta2ns); +static int __clockevents_set_mode(struct clock_event_device *dev, + enum clock_event_mode mode) +{ + /* Transition with legacy set_mode() callback */ + if (dev->set_mode) { + /* Legacy callback doesn't support new modes */ + if (mode > CLOCK_EVT_MODE_RESUME) + return -ENOSYS; + dev->set_mode(mode, dev); + return 0; + } + + if (dev->features & CLOCK_EVT_FEAT_DUMMY) + return 0; + + /* Transition with new mode-specific callbacks */ + switch (mode) { + case CLOCK_EVT_MODE_UNUSED: + /* + * This is an internal state, which is guaranteed to go from + * SHUTDOWN to UNUSED. No driver interaction required. + */ + return 0; + + case CLOCK_EVT_MODE_SHUTDOWN: + return dev->set_mode_shutdown(dev); + + case CLOCK_EVT_MODE_PERIODIC: + /* Core internal bug */ + if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC)) + return -ENOSYS; + return dev->set_mode_periodic(dev); + + case CLOCK_EVT_MODE_ONESHOT: + /* Core internal bug */ + if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) + return -ENOSYS; + return dev->set_mode_oneshot(dev); + + case CLOCK_EVT_MODE_RESUME: + /* Optional callback */ + if (dev->set_mode_resume) + return dev->set_mode_resume(dev); + else + return 0; + + default: + return -ENOSYS; + } +} + /** * clockevents_set_mode - set the operating mode of a clock event device * @dev: device to modify @@ -105,7 +156,9 @@ void clockevents_set_mode(struct clock_event_device *dev, enum clock_event_mode mode) { if (dev->mode != mode) { - dev->set_mode(mode, dev); + if (__clockevents_set_mode(dev, mode)) + return; + dev->mode = mode; /* @@ -373,6 +426,35 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu) } EXPORT_SYMBOL_GPL(clockevents_unbind); +/* Sanity check of mode transition callbacks */ +static int clockevents_sanity_check(struct clock_event_device *dev) +{ + /* Legacy set_mode() callback */ + if (dev->set_mode) { + /* We shouldn't be supporting new modes now */ + WARN_ON(dev->set_mode_periodic || dev->set_mode_oneshot || + dev->set_mode_shutdown || dev->set_mode_resume); + return 0; + } + + if (dev->features & CLOCK_EVT_FEAT_DUMMY) + return 0; + + /* New mode-specific callbacks */ + if (!dev->set_mode_shutdown) + return -EINVAL; + + if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) && + !dev->set_mode_periodic) + return -EINVAL; + + if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) && + !dev->set_mode_oneshot) + return -EINVAL; + + return 0; +} + /** * clockevents_register_device - register a clock event device * @dev: device to register @@ -382,6 +464,8 @@ void clockevents_register_device(struct clock_event_device *dev) unsigned long flags; BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); + BUG_ON(clockevents_sanity_check(dev)); + if (!dev->cpumask) { WARN_ON(num_possible_cpus() > 1); dev->cpumask = cpumask_of(smp_processor_id()); @@ -449,7 +533,7 @@ int __clockevents_update_freq(struct clock_event_device *dev, u32 freq) return clockevents_program_event(dev, dev->next_event, false); if (dev->mode == CLOCK_EVT_MODE_PERIODIC) - dev->set_mode(CLOCK_EVT_MODE_PERIODIC, dev); + return __clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC); return 0; } diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 61ed862cdd37..2cfd19485824 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -228,9 +228,35 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) print_name_offset(m, dev->set_next_event); SEQ_printf(m, "\n"); - SEQ_printf(m, " set_mode: "); - print_name_offset(m, dev->set_mode); - SEQ_printf(m, "\n"); + if (dev->set_mode) { + SEQ_printf(m, " set_mode: "); + print_name_offset(m, dev->set_mode); + SEQ_printf(m, "\n"); + } else { + if (dev->set_mode_shutdown) { + SEQ_printf(m, " shutdown: "); + print_name_offset(m, dev->set_mode_shutdown); + SEQ_printf(m, "\n"); + } + + if (dev->set_mode_periodic) { + SEQ_printf(m, " periodic: "); + print_name_offset(m, dev->set_mode_periodic); + SEQ_printf(m, "\n"); + } + + if (dev->set_mode_oneshot) { + SEQ_printf(m, " oneshot: "); + print_name_offset(m, dev->set_mode_oneshot); + SEQ_printf(m, "\n"); + } + + if (dev->set_mode_resume) { + SEQ_printf(m, " resume: "); + print_name_offset(m, dev->set_mode_resume); + SEQ_printf(m, "\n"); + } + } SEQ_printf(m, " event_handler: "); print_name_offset(m, dev->event_handler); -- cgit v1.2.3 From 6086e346fdea1ae64d974c94c1acacc2605567ae Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:29 -0700 Subject: clocksource: Simplify the clocks_calc_max_nsecs() logic The previous clocks_calc_max_nsecs() code had some unecessarily complex bit logic to find the max interval that could cause multiplication overflows. Since this is not in the hot path, just do the divide to make it easier to read. The previous implementation also had a subtle issue that it avoided overflows with signed 64-bit values, where as the intervals are always unsigned. This resulted in overly conservative intervals, which other safety margins were then added to, reducing the intended interval length. Signed-off-by: John Stultz Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-2-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/clocksource.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 4892352f0e49..2148f413256c 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -476,19 +476,10 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask) /* * Calculate the maximum number of cycles that we can pass to the - * cyc2ns function without overflowing a 64-bit signed result. The - * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj) - * which is equivalent to the below. - * max_cycles < (2^63)/(mult + maxadj) - * max_cycles < 2^(log2((2^63)/(mult + maxadj))) - * max_cycles < 2^(log2(2^63) - log2(mult + maxadj)) - * max_cycles < 2^(63 - log2(mult + maxadj)) - * max_cycles < 1 << (63 - log2(mult + maxadj)) - * Please note that we add 1 to the result of the log2 to account for - * any rounding errors, ensure the above inequality is satisfied and - * no overflow will occur. + * cyc2ns() function without overflowing a 64-bit result. */ - max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1)); + max_cycles = ULLONG_MAX; + do_div(max_cycles, mult+maxadj); /* * The actual maximum number of cycles we can defer the clocksource is -- cgit v1.2.3 From 362fde0410377e468ca00ad363fdf3e3ec42eb6a Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:30 -0700 Subject: clocksource: Simplify the logic around clocksource wrapping safety margins The clocksource logic has a number of places where we try to include a safety margin. Most of these are 12% safety margins, but they are inconsistently applied and sometimes are applied on top of each other. Additionally, in the previous patch, we corrected an issue where we unintentionally in effect created a 50% safety margin, which these 12.5% margins where then added to. So to simplify the logic here, this patch removes the various 12.5% margins, and consolidates adding the margin in one place: clocks_calc_max_nsecs(). Additionally, Linus prefers a 50% safety margin, as it allows bad clock values to be more easily caught. This should really have no net effect, due to the corrected issue earlier which caused greater then 50% margins to be used w/o issue. Signed-off-by: John Stultz Acked-by: Stephen Boyd (for the sched_clock.c bit) Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-3-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/clocksource.c | 26 ++++++++++++-------------- kernel/time/sched_clock.c | 4 ++-- 2 files changed, 14 insertions(+), 16 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 2148f413256c..ace95763b3a6 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -469,6 +469,9 @@ static u32 clocksource_max_adjustment(struct clocksource *cs) * @shift: cycle to nanosecond divisor (power of two) * @maxadj: maximum adjustment value to mult (~11%) * @mask: bitmask for two's complement subtraction of non 64 bit counters + * + * NOTE: This function includes a safety margin of 50%, so that bad clock values + * can be detected. */ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask) { @@ -490,11 +493,14 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask) max_cycles = min(max_cycles, mask); max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift); + /* Return 50% of the actual maximum, so we can detect bad values */ + max_nsecs >>= 1; + return max_nsecs; } /** - * clocksource_max_deferment - Returns max time the clocksource can be deferred + * clocksource_max_deferment - Returns max time the clocksource should be deferred * @cs: Pointer to clocksource * */ @@ -504,13 +510,7 @@ static u64 clocksource_max_deferment(struct clocksource *cs) max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj, cs->mask); - /* - * To ensure that the clocksource does not wrap whilst we are idle, - * limit the time the clocksource can be deferred by 12.5%. Please - * note a margin of 12.5% is used because this can be computed with - * a shift, versus say 10% which would require division. - */ - return max_nsecs - (max_nsecs >> 3); + return max_nsecs; } #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET @@ -659,10 +659,9 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) * conversion precision. 10 minutes is still a reasonable * amount. That results in a shift value of 24 for a * clocksource with mask >= 40bit and f >= 4GHz. That maps to - * ~ 0.06ppm granularity for NTP. We apply the same 12.5% - * margin as we do in clocksource_max_deferment() + * ~ 0.06ppm granularity for NTP. */ - sec = (cs->mask - (cs->mask >> 3)); + sec = cs->mask; do_div(sec, freq); do_div(sec, scale); if (!sec) @@ -674,9 +673,8 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) NSEC_PER_SEC / scale, sec * scale); /* - * for clocksources that have large mults, to avoid overflow. - * Since mult may be adjusted by ntp, add an safety extra margin - * + * Ensure clocksources that have large 'mult' values don't overflow + * when adjusted. */ cs->maxadj = clocksource_max_adjustment(cs); while ((cs->mult + cs->maxadj < cs->mult) diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 01d2d15aa662..3b8ae45020c1 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -125,9 +125,9 @@ void __init sched_clock_register(u64 (*read)(void), int bits, new_mask = CLOCKSOURCE_MASK(bits); - /* calculate how many ns until we wrap */ + /* calculate how many nanosecs until we risk wrapping */ wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask); - new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3)); + new_wrap_kt = ns_to_ktime(wrap); /* update epoch for new counter and update epoch_ns from old counter*/ new_epoch = read(); -- cgit v1.2.3 From fb82fe2fe8588745edd73aa3a6229facac5c1e15 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:31 -0700 Subject: clocksource: Add 'max_cycles' to 'struct clocksource' In order to facilitate clocksource validation, add a 'max_cycles' field to the clocksource structure which will hold the maximum cycle value that can safely be multiplied without potentially causing an overflow. Signed-off-by: John Stultz Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-4-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/clocksource.c | 28 ++++++++++++++++------------ kernel/time/sched_clock.c | 2 +- 2 files changed, 17 insertions(+), 13 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index ace95763b3a6..fc2a9de43ca1 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -469,11 +469,13 @@ static u32 clocksource_max_adjustment(struct clocksource *cs) * @shift: cycle to nanosecond divisor (power of two) * @maxadj: maximum adjustment value to mult (~11%) * @mask: bitmask for two's complement subtraction of non 64 bit counters + * @max_cyc: maximum cycle value before potential overflow (does not include + * any safety margin) * * NOTE: This function includes a safety margin of 50%, so that bad clock values * can be detected. */ -u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask) +u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc) { u64 max_nsecs, max_cycles; @@ -493,6 +495,10 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask) max_cycles = min(max_cycles, mask); max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift); + /* return the max_cycles value as well if requested */ + if (max_cyc) + *max_cyc = max_cycles; + /* Return 50% of the actual maximum, so we can detect bad values */ max_nsecs >>= 1; @@ -500,17 +506,15 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask) } /** - * clocksource_max_deferment - Returns max time the clocksource should be deferred - * @cs: Pointer to clocksource + * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles + * @cs: Pointer to clocksource to be updated * */ -static u64 clocksource_max_deferment(struct clocksource *cs) +static inline void clocksource_update_max_deferment(struct clocksource *cs) { - u64 max_nsecs; - - max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj, - cs->mask); - return max_nsecs; + cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift, + cs->maxadj, cs->mask, + &cs->max_cycles); } #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET @@ -684,7 +688,7 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) cs->maxadj = clocksource_max_adjustment(cs); } - cs->max_idle_ns = clocksource_max_deferment(cs); + clocksource_update_max_deferment(cs); } EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); @@ -730,8 +734,8 @@ int clocksource_register(struct clocksource *cs) "Clocksource %s might overflow on 11%% adjustment\n", cs->name); - /* calculate max idle time permitted for this clocksource */ - cs->max_idle_ns = clocksource_max_deferment(cs); + /* Update max idle time permitted for this clocksource */ + clocksource_update_max_deferment(cs); mutex_lock(&clocksource_mutex); clocksource_enqueue(cs); diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 3b8ae45020c1..ca3bc5c7027c 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -126,7 +126,7 @@ void __init sched_clock_register(u64 (*read)(void), int bits, new_mask = CLOCKSOURCE_MASK(bits); /* calculate how many nanosecs until we risk wrapping */ - wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask); + wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL); new_wrap_kt = ns_to_ktime(wrap); /* update epoch for new counter and update epoch_ns from old counter*/ -- cgit v1.2.3 From 3c17ad19f0697ffe5ef7438cdafc2d2b7757d8a5 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:32 -0700 Subject: timekeeping: Add debugging checks to warn if we see delays Recently there's been requests for better sanity checking in the time code, so that it's more clear when something is going wrong, since timekeeping issues could manifest in a large number of strange ways in various subsystems. Thus, this patch adds some extra infrastructure to add a check to update_wall_time() to print two new warnings: 1) if we see the call delayed beyond the 'max_cycles' overflow point, 2) or if we see the call delayed beyond the clocksource's 'max_idle_ns' value, which is currently 50% of the overflow point. This extra infrastructure is conditional on a new CONFIG_DEBUG_TIMEKEEPING option, also added in this patch - default off. Tested this a bit by halting qemu for specified lengths of time to trigger the warnings. Signed-off-by: John Stultz Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-5-git-send-email-john.stultz@linaro.org [ Improved the changelog and the messages a bit. ] Signed-off-by: Ingo Molnar --- kernel/time/jiffies.c | 1 + kernel/time/timekeeping.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index a6a5bf53e86d..7e413902aa6a 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -71,6 +71,7 @@ static struct clocksource clocksource_jiffies = { .mask = 0xffffffff, /*32bits*/ .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ .shift = JIFFIES_SHIFT, + .max_cycles = 10, }; __cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 91db94136c10..acf049144cf6 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -118,6 +118,31 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta) tk->offs_boot = ktime_add(tk->offs_boot, delta); } +#ifdef CONFIG_DEBUG_TIMEKEEPING +static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset) +{ + + cycle_t max_cycles = tk->tkr.clock->max_cycles; + const char *name = tk->tkr.clock->name; + + if (offset > max_cycles) { + printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow\n", + offset, name, max_cycles); + printk_deferred(" timekeeping: Your kernel is sick, but tries to cope\n"); + } else { + if (offset > (max_cycles >> 1)) { + printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the the '%s' clock's 50%% safety margin (%lld)\n", + offset, name, max_cycles >> 1); + printk_deferred(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n"); + } + } +} +#else +static inline void timekeeping_check_update(struct timekeeper *tk, cycle_t offset) +{ +} +#endif + /** * tk_setup_internals - Set up internals to use clocksource clock. * @@ -1630,6 +1655,9 @@ void update_wall_time(void) if (offset < real_tk->cycle_interval) goto out; + /* Do some additional sanity checking */ + timekeeping_check_update(real_tk, offset); + /* * With NO_HZ we may have to accumulate many cycle_intervals * (think "ticks") worth of time at once. To do this efficiently, -- cgit v1.2.3 From a558cd021d83b65c47ee5b9bec1fcfe5298a769f Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:33 -0700 Subject: timekeeping: Add checks to cap clocksource reads to the 'max_cycles' value When calculating the current delta since the last tick, we currently have no hard protections to prevent a multiplication overflow from occuring. This patch introduces infrastructure to allow a cap that limits the clocksource read delta value to the 'max_cycles' value, which is where an overflow would occur. Since this is in the hotpath, it adds the extra checking under CONFIG_DEBUG_TIMEKEEPING=y. There was some concern that capping time like this could cause problems as we may stop expiring timers, which could go circular if the timer that triggers time accumulation were mis-scheduled too far in the future, which would cause time to stop. However, since the mult overflow would result in a smaller time value, we would effectively have the same problem there. Signed-off-by: John Stultz Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-6-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 49 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 14 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index acf049144cf6..657414cf2e46 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -126,9 +126,9 @@ static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset) const char *name = tk->tkr.clock->name; if (offset > max_cycles) { - printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow\n", + printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n", offset, name, max_cycles); - printk_deferred(" timekeeping: Your kernel is sick, but tries to cope\n"); + printk_deferred(" timekeeping: Your kernel is sick, but tries to cope by capping time updates\n"); } else { if (offset > (max_cycles >> 1)) { printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the the '%s' clock's 50%% safety margin (%lld)\n", @@ -137,10 +137,39 @@ static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset) } } } + +static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr) +{ + cycle_t cycle_now, delta; + + /* read clocksource */ + cycle_now = tkr->read(tkr->clock); + + /* calculate the delta since the last update_wall_time */ + delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask); + + /* Cap delta value to the max_cycles values to avoid mult overflows */ + if (unlikely(delta > tkr->clock->max_cycles)) + delta = tkr->clock->max_cycles; + + return delta; +} #else static inline void timekeeping_check_update(struct timekeeper *tk, cycle_t offset) { } +static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr) +{ + cycle_t cycle_now, delta; + + /* read clocksource */ + cycle_now = tkr->read(tkr->clock); + + /* calculate the delta since the last update_wall_time */ + delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask); + + return delta; +} #endif /** @@ -218,14 +247,10 @@ static inline u32 arch_gettimeoffset(void) { return 0; } static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) { - cycle_t cycle_now, delta; + cycle_t delta; s64 nsec; - /* read clocksource: */ - cycle_now = tkr->read(tkr->clock); - - /* calculate the delta since the last update_wall_time: */ - delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask); + delta = timekeeping_get_delta(tkr); nsec = delta * tkr->mult + tkr->xtime_nsec; nsec >>= tkr->shift; @@ -237,14 +262,10 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) { struct clocksource *clock = tk->tkr.clock; - cycle_t cycle_now, delta; + cycle_t delta; s64 nsec; - /* read clocksource: */ - cycle_now = tk->tkr.read(clock); - - /* calculate the delta since the last update_wall_time: */ - delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask); + delta = timekeeping_get_delta(&tk->tkr); /* convert delta to nanoseconds. */ nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift); -- cgit v1.2.3 From 057b87e3161d1194a095718f9918c01b2c389e74 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:34 -0700 Subject: timekeeping: Try to catch clocksource delta underflows In the case where there is a broken clocksource where there are multiple actual clocks that aren't perfectly aligned, we may see small "negative" deltas when we subtract 'now' from 'cycle_last'. The values are actually negative with respect to the clocksource mask value, not necessarily negative if cast to a s64, but we can check by checking the delta to see if it is a small (relative to the mask) negative value (again negative relative to the mask). If so, we assume we jumped backwards somehow and instead use zero for our delta. Signed-off-by: John Stultz Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-7-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 657414cf2e46..187149be83ea 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -148,6 +148,13 @@ static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr) /* calculate the delta since the last update_wall_time */ delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask); + /* + * Try to catch underflows by checking if we are seeing small + * mask-relative negative values. + */ + if (unlikely((~delta & tkr->mask) < (tkr->mask >> 3))) + delta = 0; + /* Cap delta value to the max_cycles values to avoid mult overflows */ if (unlikely(delta > tkr->clock->max_cycles)) delta = tkr->clock->max_cycles; -- cgit v1.2.3 From 4ca22c2648f9c1cec0b242f58d7302136f5a4cbb Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:35 -0700 Subject: timekeeping: Add warnings when overflows or underflows are observed It was suggested that the underflow/overflow protection should probably throw some sort of warning out, rather than just silently fixing the issue. So this patch adds some warnings here. The flag variables used are not protected by locks, but since we can't print from the reading functions, just being able to say we saw an issue in the update interval is useful enough, and can be slightly racy without real consequence. The big complication is that we're only under a read seqlock, so the data could shift under us during our calculation to see if there was a problem. This patch avoids this issue by nesting another seqlock which allows us to snapshot the just required values atomically. So we shouldn't see false positives. I also added some basic rate-limiting here, since on one build machine w/ skewed TSCs it was fairly noisy at bootup. Signed-off-by: John Stultz Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-8-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 64 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 7 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 187149be83ea..892f6cbf1e67 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -119,6 +119,20 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta) } #ifdef CONFIG_DEBUG_TIMEKEEPING +#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */ +/* + * These simple flag variables are managed + * without locks, which is racy, but ok since + * we don't really care about being super + * precise about how many events were seen, + * just that a problem was observed. + */ +static int timekeeping_underflow_seen; +static int timekeeping_overflow_seen; + +/* last_warning is only modified under the timekeeping lock */ +static long timekeeping_last_warning; + static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset) { @@ -136,28 +150,64 @@ static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset) printk_deferred(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n"); } } + + if (timekeeping_underflow_seen) { + if (jiffies - timekeeping_last_warning > WARNING_FREQ) { + printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name); + printk_deferred(" Please report this, consider using a different clocksource, if possible.\n"); + printk_deferred(" Your kernel is probably still fine.\n"); + timekeeping_last_warning = jiffies; + } + timekeeping_underflow_seen = 0; + } + + if (timekeeping_overflow_seen) { + if (jiffies - timekeeping_last_warning > WARNING_FREQ) { + printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name); + printk_deferred(" Please report this, consider using a different clocksource, if possible.\n"); + printk_deferred(" Your kernel is probably still fine.\n"); + timekeeping_last_warning = jiffies; + } + timekeeping_overflow_seen = 0; + } } static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr) { - cycle_t cycle_now, delta; + cycle_t now, last, mask, max, delta; + unsigned int seq; - /* read clocksource */ - cycle_now = tkr->read(tkr->clock); + /* + * Since we're called holding a seqlock, the data may shift + * under us while we're doing the calculation. This can cause + * false positives, since we'd note a problem but throw the + * results away. So nest another seqlock here to atomically + * grab the points we are checking with. + */ + do { + seq = read_seqcount_begin(&tk_core.seq); + now = tkr->read(tkr->clock); + last = tkr->cycle_last; + mask = tkr->mask; + max = tkr->clock->max_cycles; + } while (read_seqcount_retry(&tk_core.seq, seq)); - /* calculate the delta since the last update_wall_time */ - delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask); + delta = clocksource_delta(now, last, mask); /* * Try to catch underflows by checking if we are seeing small * mask-relative negative values. */ - if (unlikely((~delta & tkr->mask) < (tkr->mask >> 3))) + if (unlikely((~delta & mask) < (mask >> 3))) { + timekeeping_underflow_seen = 1; delta = 0; + } /* Cap delta value to the max_cycles values to avoid mult overflows */ - if (unlikely(delta > tkr->clock->max_cycles)) + if (unlikely(delta > max)) { + timekeeping_overflow_seen = 1; delta = tkr->clock->max_cycles; + } return delta; } -- cgit v1.2.3 From 0b046b217ad4c64fbbeaaac24d0648cb1fa49ad8 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:36 -0700 Subject: clocksource: Improve clocksource watchdog reporting The clocksource watchdog reporting has been less helpful then desired, as it just printed the delta between the two clocksources. This prevents any useful analysis of why the skew occurred. Thus this patch tries to improve the output when we mark a clocksource as unstable, printing out the cycle last and now values for both the current clocksource and the watchdog clocksource. This will allow us to see if the result was due to a false positive caused by a problematic watchdog. Signed-off-by: John Stultz Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-9-git-send-email-john.stultz@linaro.org [ Minor cleanups of kernel messages. ] Signed-off-by: Ingo Molnar --- kernel/time/clocksource.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index fc2a9de43ca1..c4cc04bec698 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -142,13 +142,6 @@ static void __clocksource_unstable(struct clocksource *cs) schedule_work(&watchdog_work); } -static void clocksource_unstable(struct clocksource *cs, int64_t delta) -{ - printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n", - cs->name, delta); - __clocksource_unstable(cs); -} - /** * clocksource_mark_unstable - mark clocksource unstable via watchdog * @cs: clocksource to be marked unstable @@ -174,7 +167,7 @@ void clocksource_mark_unstable(struct clocksource *cs) static void clocksource_watchdog(unsigned long data) { struct clocksource *cs; - cycle_t csnow, wdnow, delta; + cycle_t csnow, wdnow, cslast, wdlast, delta; int64_t wd_nsec, cs_nsec; int next_cpu, reset_pending; @@ -213,6 +206,8 @@ static void clocksource_watchdog(unsigned long data) delta = clocksource_delta(csnow, cs->cs_last, cs->mask); cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift); + wdlast = cs->wd_last; /* save these in case we print them */ + cslast = cs->cs_last; cs->cs_last = csnow; cs->wd_last = wdnow; @@ -221,7 +216,12 @@ static void clocksource_watchdog(unsigned long data) /* Check the deviation from the watchdog clocksource. */ if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) { - clocksource_unstable(cs, cs_nsec - wd_nsec); + pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable, because the skew is too large:\n", cs->name); + pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n", + watchdog->name, wdnow, wdlast, watchdog->mask); + pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n", + cs->name, csnow, cslast, cs->mask); + __clocksource_unstable(cs); continue; } -- cgit v1.2.3 From f8935983f110505daa38e8d36ee406807f83a069 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:37 -0700 Subject: clocksource: Mostly kill clocksource_register() A long running project has been to clean up remaining uses of clocksource_register(), replacing it with the simpler clocksource_register_khz/hz() functions. However, there are a few cases where we need to self-define our mult/shift values, so switch the function to a more obviously internal __clocksource_register() name, and consolidate much of the internal logic so we don't have duplication. Signed-off-by: John Stultz Cc: Dave Jones Cc: David S. Miller Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-10-git-send-email-john.stultz@linaro.org [ Minor cleanups. ] Signed-off-by: Ingo Molnar --- kernel/time/clocksource.c | 81 ++++++++++++++++++++--------------------------- kernel/time/jiffies.c | 4 +-- 2 files changed, 36 insertions(+), 49 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c4cc04bec698..5cdf17eb4fa6 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -656,38 +656,52 @@ static void clocksource_enqueue(struct clocksource *cs) void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) { u64 sec; + /* - * Calc the maximum number of seconds which we can run before - * wrapping around. For clocksources which have a mask > 32bit - * we need to limit the max sleep time to have a good - * conversion precision. 10 minutes is still a reasonable - * amount. That results in a shift value of 24 for a - * clocksource with mask >= 40bit and f >= 4GHz. That maps to - * ~ 0.06ppm granularity for NTP. + * Default clocksources are *special* and self-define their mult/shift. + * But, you're not special, so you should specify a freq value. */ - sec = cs->mask; - do_div(sec, freq); - do_div(sec, scale); - if (!sec) - sec = 1; - else if (sec > 600 && cs->mask > UINT_MAX) - sec = 600; - - clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, - NSEC_PER_SEC / scale, sec * scale); - + if (freq) { + /* + * Calc the maximum number of seconds which we can run before + * wrapping around. For clocksources which have a mask > 32-bit + * we need to limit the max sleep time to have a good + * conversion precision. 10 minutes is still a reasonable + * amount. That results in a shift value of 24 for a + * clocksource with mask >= 40-bit and f >= 4GHz. That maps to + * ~ 0.06ppm granularity for NTP. + */ + sec = cs->mask; + do_div(sec, freq); + do_div(sec, scale); + if (!sec) + sec = 1; + else if (sec > 600 && cs->mask > UINT_MAX) + sec = 600; + + clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, + NSEC_PER_SEC / scale, sec * scale); + } /* * Ensure clocksources that have large 'mult' values don't overflow * when adjusted. */ cs->maxadj = clocksource_max_adjustment(cs); - while ((cs->mult + cs->maxadj < cs->mult) - || (cs->mult - cs->maxadj > cs->mult)) { + while (freq && ((cs->mult + cs->maxadj < cs->mult) + || (cs->mult - cs->maxadj > cs->mult))) { cs->mult >>= 1; cs->shift--; cs->maxadj = clocksource_max_adjustment(cs); } + /* + * Only warn for *special* clocksources that self-define + * their mult/shift values and don't specify a freq. + */ + WARN_ONCE(cs->mult + cs->maxadj < cs->mult, + "timekeeping: Clocksource %s might overflow on 11%% adjustment\n", + cs->name); + clocksource_update_max_deferment(cs); } EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); @@ -719,33 +733,6 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) } EXPORT_SYMBOL_GPL(__clocksource_register_scale); - -/** - * clocksource_register - Used to install new clocksources - * @cs: clocksource to be registered - * - * Returns -EBUSY if registration fails, zero otherwise. - */ -int clocksource_register(struct clocksource *cs) -{ - /* calculate max adjustment for given mult/shift */ - cs->maxadj = clocksource_max_adjustment(cs); - WARN_ONCE(cs->mult + cs->maxadj < cs->mult, - "Clocksource %s might overflow on 11%% adjustment\n", - cs->name); - - /* Update max idle time permitted for this clocksource */ - clocksource_update_max_deferment(cs); - - mutex_lock(&clocksource_mutex); - clocksource_enqueue(cs); - clocksource_enqueue_watchdog(cs); - clocksource_select(); - mutex_unlock(&clocksource_mutex); - return 0; -} -EXPORT_SYMBOL(clocksource_register); - static void __clocksource_change_rating(struct clocksource *cs, int rating) { list_del(&cs->list); diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 7e413902aa6a..c4bb518725b5 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -95,7 +95,7 @@ EXPORT_SYMBOL(jiffies); static int __init init_jiffies_clocksource(void) { - return clocksource_register(&clocksource_jiffies); + return __clocksource_register(&clocksource_jiffies); } core_initcall(init_jiffies_clocksource); @@ -131,6 +131,6 @@ int register_refined_jiffies(long cycles_per_second) refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT; - clocksource_register(&refined_jiffies); + __clocksource_register(&refined_jiffies); return 0; } -- cgit v1.2.3 From 8cc8c525ad4e7b581cacf84119e1a28dcb4044db Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:39 -0700 Subject: clocksource: Add some debug info about clocksources being registered Print the mask, max_cycles, and max_idle_ns values for clocksources being registered. Signed-off-by: John Stultz Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-12-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/clocksource.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 5cdf17eb4fa6..1977ebabd922 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -703,6 +703,9 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) cs->name); clocksource_update_max_deferment(cs); + + pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n", + cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns); } EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); -- cgit v1.2.3 From fba9e07208c0f9d92d9f73761c99c8612039da44 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 11 Mar 2015 21:16:40 -0700 Subject: clocksource: Rename __clocksource_updatefreq_*() to __clocksource_update_freq_*() Ingo requested this function be renamed to improve readability, so I've renamed __clocksource_updatefreq_scale() as well as the __clocksource_updatefreq_hz/khz() functions to avoid squishedtogethernames. This touches some of the sh clocksources, which I've not tested. The arch/arm/plat-omap change is just a comment change for consistency. Signed-off-by: John Stultz Cc: Daniel Lezcano Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1426133800-29329-13-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/clocksource.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 1977ebabd922..c3be3c71bbad 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -643,7 +643,7 @@ static void clocksource_enqueue(struct clocksource *cs) } /** - * __clocksource_updatefreq_scale - Used update clocksource with new freq + * __clocksource_update_freq_scale - Used update clocksource with new freq * @cs: clocksource to be registered * @scale: Scale factor multiplied against freq to get clocksource hz * @freq: clocksource frequency (cycles per second) divided by scale @@ -651,9 +651,10 @@ static void clocksource_enqueue(struct clocksource *cs) * This should only be called from the clocksource->enable() method. * * This *SHOULD NOT* be called directly! Please use the - * clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions. + * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper + * functions. */ -void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) +void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq) { u64 sec; @@ -707,7 +708,7 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq) pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n", cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns); } -EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale); +EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale); /** * __clocksource_register_scale - Used to install new clocksources @@ -724,7 +725,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq) { /* Initialize mult/shift and max_idle_ns */ - __clocksource_updatefreq_scale(cs, scale, freq); + __clocksource_update_freq_scale(cs, scale, freq); /* Add clocksource to the clocksource list */ mutex_lock(&clocksource_mutex); -- cgit v1.2.3 From 8710e914027e4f64058ebbf0501cc6db3cc8454f Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 26 Mar 2015 12:23:22 -0700 Subject: timers, sched/clock: Match scope of read and write seqcounts Currently the scope of the raw_write_seqcount_begin/end() in sched_clock_register() far exceeds the scope of the read section in sched_clock(). This gives the impression of safety during cursory review but achieves little. Note that this is likely to be a latent issue at present because sched_clock_register() is typically called before we enable interrupts, however the issue does risk bugs being needlessly introduced as the code evolves. This patch fixes the problem by increasing the scope of the read locking performed by sched_clock() to cover all data modified by sched_clock_register. We also improve clarity by moving writes to struct clock_data that do not impact sched_clock() outside of the critical section. Signed-off-by: Daniel Thompson [ Reworked it slightly to apply to tip/timers/core] Signed-off-by: John Stultz Reviewed-by: Stephen Boyd Acked-by: Peter Zijlstra (Intel) Cc: Catalin Marinas Cc: Peter Zijlstra Cc: Russell King Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/1427397806-20889-2-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/sched_clock.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index ca3bc5c7027c..1751e956add9 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -58,23 +58,21 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) unsigned long long notrace sched_clock(void) { - u64 epoch_ns; - u64 epoch_cyc; - u64 cyc; + u64 cyc, res; unsigned long seq; - if (cd.suspended) - return cd.epoch_ns; - do { seq = raw_read_seqcount_begin(&cd.seq); - epoch_cyc = cd.epoch_cyc; - epoch_ns = cd.epoch_ns; + + res = cd.epoch_ns; + if (!cd.suspended) { + cyc = read_sched_clock(); + cyc = (cyc - cd.epoch_cyc) & sched_clock_mask; + res += cyc_to_ns(cyc, cd.mult, cd.shift); + } } while (read_seqcount_retry(&cd.seq, seq)); - cyc = read_sched_clock(); - cyc = (cyc - epoch_cyc) & sched_clock_mask; - return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift); + return res; } /* @@ -111,7 +109,6 @@ void __init sched_clock_register(u64 (*read)(void), int bits, { u64 res, wrap, new_mask, new_epoch, cyc, ns; u32 new_mult, new_shift; - ktime_t new_wrap_kt; unsigned long r; char r_unit; @@ -124,10 +121,11 @@ void __init sched_clock_register(u64 (*read)(void), int bits, clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600); new_mask = CLOCKSOURCE_MASK(bits); + cd.rate = rate; /* calculate how many nanosecs until we risk wrapping */ wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL); - new_wrap_kt = ns_to_ktime(wrap); + cd.wrap_kt = ns_to_ktime(wrap); /* update epoch for new counter and update epoch_ns from old counter*/ new_epoch = read(); @@ -138,8 +136,6 @@ void __init sched_clock_register(u64 (*read)(void), int bits, raw_write_seqcount_begin(&cd.seq); read_sched_clock = read; sched_clock_mask = new_mask; - cd.rate = rate; - cd.wrap_kt = new_wrap_kt; cd.mult = new_mult; cd.shift = new_shift; cd.epoch_cyc = new_epoch; -- cgit v1.2.3 From cf7c9c170787d6870af54684822f58acc00a966c Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 26 Mar 2015 12:23:23 -0700 Subject: timers, sched/clock: Optimize cache line usage Currently sched_clock(), a very hot code path, is not optimized to minimise its cache profile. In particular: 1. cd is not ____cacheline_aligned, 2. struct clock_data does not distinguish between hotpath and coldpath data, reducing locality of reference in the hotpath, 3. Some hotpath data is missing from struct clock_data and is marked __read_mostly (which more or less guarantees it will not share a cache line with cd). This patch corrects these problems by extracting all hotpath data into a separate structure and using ____cacheline_aligned to ensure the hotpath uses a single (64 byte) cache line. Signed-off-by: Daniel Thompson Signed-off-by: John Stultz Reviewed-by: Stephen Boyd Acked-by: Peter Zijlstra (Intel) Cc: Catalin Marinas Cc: Peter Zijlstra Cc: Russell King Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/1427397806-20889-3-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/sched_clock.c | 112 +++++++++++++++++++++++++++++++--------------- 1 file changed, 77 insertions(+), 35 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 1751e956add9..872e0685d1fb 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -18,28 +18,59 @@ #include #include -struct clock_data { - ktime_t wrap_kt; +/** + * struct clock_read_data - data required to read from sched_clock + * + * @epoch_ns: sched_clock value at last update + * @epoch_cyc: Clock cycle value at last update + * @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit + * clocks + * @read_sched_clock: Current clock source (or dummy source when suspended) + * @mult: Multipler for scaled math conversion + * @shift: Shift value for scaled math conversion + * @suspended: Flag to indicate if the clock is suspended (stopped) + * + * Care must be taken when updating this structure; it is read by + * some very hot code paths. It occupies <=48 bytes and, when combined + * with the seqcount used to synchronize access, comfortably fits into + * a 64 byte cache line. + */ +struct clock_read_data { u64 epoch_ns; u64 epoch_cyc; - seqcount_t seq; - unsigned long rate; + u64 sched_clock_mask; + u64 (*read_sched_clock)(void); u32 mult; u32 shift; bool suspended; }; +/** + * struct clock_data - all data needed for sched_clock (including + * registration of a new clock source) + * + * @seq: Sequence counter for protecting updates. + * @read_data: Data required to read from sched_clock. + * @wrap_kt: Duration for which clock can run before wrapping + * @rate: Tick rate of the registered clock + * @actual_read_sched_clock: Registered clock read function + * + * The ordering of this structure has been chosen to optimize cache + * performance. In particular seq and read_data (combined) should fit + * into a single 64 byte cache line. + */ +struct clock_data { + seqcount_t seq; + struct clock_read_data read_data; + ktime_t wrap_kt; + unsigned long rate; +}; + static struct hrtimer sched_clock_timer; static int irqtime = -1; core_param(irqtime, irqtime, int, 0400); -static struct clock_data cd = { - .mult = NSEC_PER_SEC / HZ, -}; - -static u64 __read_mostly sched_clock_mask; - static u64 notrace jiffy_sched_clock_read(void) { /* @@ -49,7 +80,10 @@ static u64 notrace jiffy_sched_clock_read(void) return (u64)(jiffies - INITIAL_JIFFIES); } -static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read; +static struct clock_data cd ____cacheline_aligned = { + .read_data = { .mult = NSEC_PER_SEC / HZ, + .read_sched_clock = jiffy_sched_clock_read, }, +}; static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) { @@ -60,15 +94,16 @@ unsigned long long notrace sched_clock(void) { u64 cyc, res; unsigned long seq; + struct clock_read_data *rd = &cd.read_data; do { seq = raw_read_seqcount_begin(&cd.seq); - res = cd.epoch_ns; - if (!cd.suspended) { - cyc = read_sched_clock(); - cyc = (cyc - cd.epoch_cyc) & sched_clock_mask; - res += cyc_to_ns(cyc, cd.mult, cd.shift); + res = rd->epoch_ns; + if (!rd->suspended) { + cyc = rd->read_sched_clock(); + cyc = (cyc - rd->epoch_cyc) & rd->sched_clock_mask; + res += cyc_to_ns(cyc, rd->mult, rd->shift); } } while (read_seqcount_retry(&cd.seq, seq)); @@ -83,16 +118,17 @@ static void notrace update_sched_clock(void) unsigned long flags; u64 cyc; u64 ns; + struct clock_read_data *rd = &cd.read_data; - cyc = read_sched_clock(); - ns = cd.epoch_ns + - cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, - cd.mult, cd.shift); + cyc = rd->read_sched_clock(); + ns = rd->epoch_ns + + cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask, + rd->mult, rd->shift); raw_local_irq_save(flags); raw_write_seqcount_begin(&cd.seq); - cd.epoch_ns = ns; - cd.epoch_cyc = cyc; + rd->epoch_ns = ns; + rd->epoch_cyc = cyc; raw_write_seqcount_end(&cd.seq); raw_local_irq_restore(flags); } @@ -111,6 +147,7 @@ void __init sched_clock_register(u64 (*read)(void), int bits, u32 new_mult, new_shift; unsigned long r; char r_unit; + struct clock_read_data *rd = &cd.read_data; if (cd.rate > rate) return; @@ -129,17 +166,18 @@ void __init sched_clock_register(u64 (*read)(void), int bits, /* update epoch for new counter and update epoch_ns from old counter*/ new_epoch = read(); - cyc = read_sched_clock(); - ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, - cd.mult, cd.shift); + cyc = rd->read_sched_clock(); + ns = rd->epoch_ns + + cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask, + rd->mult, rd->shift); raw_write_seqcount_begin(&cd.seq); - read_sched_clock = read; - sched_clock_mask = new_mask; - cd.mult = new_mult; - cd.shift = new_shift; - cd.epoch_cyc = new_epoch; - cd.epoch_ns = ns; + rd->read_sched_clock = read; + rd->sched_clock_mask = new_mask; + rd->mult = new_mult; + rd->shift = new_shift; + rd->epoch_cyc = new_epoch; + rd->epoch_ns = ns; raw_write_seqcount_end(&cd.seq); r = rate; @@ -171,7 +209,7 @@ void __init sched_clock_postinit(void) * If no sched_clock function has been provided at that point, * make it the final one one. */ - if (read_sched_clock == jiffy_sched_clock_read) + if (cd.read_data.read_sched_clock == jiffy_sched_clock_read) sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ); update_sched_clock(); @@ -187,17 +225,21 @@ void __init sched_clock_postinit(void) static int sched_clock_suspend(void) { + struct clock_read_data *rd = &cd.read_data; + update_sched_clock(); hrtimer_cancel(&sched_clock_timer); - cd.suspended = true; + rd->suspended = true; return 0; } static void sched_clock_resume(void) { - cd.epoch_cyc = read_sched_clock(); + struct clock_read_data *rd = &cd.read_data; + + rd->epoch_cyc = rd->read_sched_clock(); hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); - cd.suspended = false; + rd->suspended = false; } static struct syscore_ops sched_clock_ops = { -- cgit v1.2.3 From 13dbeb384d2d3aa555ea48d511e8cb110bd172e0 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 26 Mar 2015 12:23:24 -0700 Subject: timers, sched/clock: Remove suspend from clock_read_data() Currently cd.read_data.suspended is read by the hotpath function sched_clock(). This variable need not be accessed on the hotpath. In fact, once it is removed, we can remove the conditional branches from sched_clock() and install a dummy read_sched_clock function to suspend the clock. The new master copy of the function pointer (actual_read_sched_clock) is introduced and is used for all reads of the clock hardware except those within sched_clock itself. Suggested-by: Thomas Gleixner Signed-off-by: Daniel Thompson Signed-off-by: John Stultz Reviewed-by: Stephen Boyd Acked-by: Peter Zijlstra (Intel) Cc: Catalin Marinas Cc: Peter Zijlstra Cc: Russell King Cc: Will Deacon Link: http://lkml.kernel.org/r/1427397806-20889-4-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/sched_clock.c | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 872e0685d1fb..52ea5d976393 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -28,10 +28,9 @@ * @read_sched_clock: Current clock source (or dummy source when suspended) * @mult: Multipler for scaled math conversion * @shift: Shift value for scaled math conversion - * @suspended: Flag to indicate if the clock is suspended (stopped) * * Care must be taken when updating this structure; it is read by - * some very hot code paths. It occupies <=48 bytes and, when combined + * some very hot code paths. It occupies <=40 bytes and, when combined * with the seqcount used to synchronize access, comfortably fits into * a 64 byte cache line. */ @@ -42,7 +41,6 @@ struct clock_read_data { u64 (*read_sched_clock)(void); u32 mult; u32 shift; - bool suspended; }; /** @@ -64,6 +62,7 @@ struct clock_data { struct clock_read_data read_data; ktime_t wrap_kt; unsigned long rate; + u64 (*actual_read_sched_clock)(void); }; static struct hrtimer sched_clock_timer; @@ -83,6 +82,8 @@ static u64 notrace jiffy_sched_clock_read(void) static struct clock_data cd ____cacheline_aligned = { .read_data = { .mult = NSEC_PER_SEC / HZ, .read_sched_clock = jiffy_sched_clock_read, }, + .actual_read_sched_clock = jiffy_sched_clock_read, + }; static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) @@ -99,12 +100,9 @@ unsigned long long notrace sched_clock(void) do { seq = raw_read_seqcount_begin(&cd.seq); - res = rd->epoch_ns; - if (!rd->suspended) { - cyc = rd->read_sched_clock(); - cyc = (cyc - rd->epoch_cyc) & rd->sched_clock_mask; - res += cyc_to_ns(cyc, rd->mult, rd->shift); - } + cyc = (rd->read_sched_clock() - rd->epoch_cyc) & + rd->sched_clock_mask; + res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift); } while (read_seqcount_retry(&cd.seq, seq)); return res; @@ -120,7 +118,7 @@ static void notrace update_sched_clock(void) u64 ns; struct clock_read_data *rd = &cd.read_data; - cyc = rd->read_sched_clock(); + cyc = cd.actual_read_sched_clock(); ns = rd->epoch_ns + cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask, rd->mult, rd->shift); @@ -166,10 +164,11 @@ void __init sched_clock_register(u64 (*read)(void), int bits, /* update epoch for new counter and update epoch_ns from old counter*/ new_epoch = read(); - cyc = rd->read_sched_clock(); + cyc = cd.actual_read_sched_clock(); ns = rd->epoch_ns + cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask, rd->mult, rd->shift); + cd.actual_read_sched_clock = read; raw_write_seqcount_begin(&cd.seq); rd->read_sched_clock = read; @@ -209,7 +208,7 @@ void __init sched_clock_postinit(void) * If no sched_clock function has been provided at that point, * make it the final one one. */ - if (cd.read_data.read_sched_clock == jiffy_sched_clock_read) + if (cd.actual_read_sched_clock == jiffy_sched_clock_read) sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ); update_sched_clock(); @@ -223,13 +222,24 @@ void __init sched_clock_postinit(void) hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); } +/* + * Clock read function for use when the clock is suspended. + * + * This function makes it appear to sched_clock() as if the clock + * stopped counting at its last update. + */ +static u64 notrace suspended_sched_clock_read(void) +{ + return cd.read_data.epoch_cyc; +} + static int sched_clock_suspend(void) { struct clock_read_data *rd = &cd.read_data; update_sched_clock(); hrtimer_cancel(&sched_clock_timer); - rd->suspended = true; + rd->read_sched_clock = suspended_sched_clock_read; return 0; } @@ -237,9 +247,9 @@ static void sched_clock_resume(void) { struct clock_read_data *rd = &cd.read_data; - rd->epoch_cyc = rd->read_sched_clock(); + rd->epoch_cyc = cd.actual_read_sched_clock(); hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); - rd->suspended = false; + rd->read_sched_clock = cd.actual_read_sched_clock; } static struct syscore_ops sched_clock_ops = { -- cgit v1.2.3 From 9fee69a8c8070b38b558161a3f18bd5e2b664682 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 26 Mar 2015 12:23:25 -0700 Subject: timers, sched/clock: Remove redundant notrace from update function Currently update_sched_clock() is marked as notrace but this function is not called by ftrace. This is trivially fixed by removing the mark up. Signed-off-by: Daniel Thompson Signed-off-by: John Stultz Reviewed-by: Stephen Boyd Acked-by: Peter Zijlstra (Intel) Cc: Catalin Marinas Cc: Peter Zijlstra Cc: Russell King Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/1427397806-20889-5-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/sched_clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 52ea5d976393..8adb9d0c969a 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -111,7 +111,7 @@ unsigned long long notrace sched_clock(void) /* * Atomically update the sched_clock epoch. */ -static void notrace update_sched_clock(void) +static void update_sched_clock(void) { unsigned long flags; u64 cyc; -- cgit v1.2.3 From 1809bfa44e1019e397fabaa6f2349bb7237e57a4 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Thu, 26 Mar 2015 12:23:26 -0700 Subject: timers, sched/clock: Avoid deadlock during read from NMI Currently it is possible for an NMI (or FIQ on ARM) to come in and read sched_clock() whilst update_sched_clock() has locked the seqcount for writing. This results in the NMI handler locking up when it calls raw_read_seqcount_begin(). This patch fixes the NMI safety issues by providing banked clock data. This is a similar approach to the one used in Thomas Gleixner's 4396e058c52e("timekeeping: Provide fast and NMI safe access to CLOCK_MONOTONIC"). Suggested-by: Stephen Boyd Signed-off-by: Daniel Thompson Signed-off-by: John Stultz Reviewed-by: Stephen Boyd Acked-by: Peter Zijlstra (Intel) Cc: Catalin Marinas Cc: Peter Zijlstra Cc: Russell King Cc: Thomas Gleixner Cc: Will Deacon Link: http://lkml.kernel.org/r/1427397806-20889-6-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/sched_clock.c | 103 ++++++++++++++++++++++++++++++---------------- 1 file changed, 68 insertions(+), 35 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 8adb9d0c969a..eeea1e950b72 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -47,19 +47,20 @@ struct clock_read_data { * struct clock_data - all data needed for sched_clock (including * registration of a new clock source) * - * @seq: Sequence counter for protecting updates. + * @seq: Sequence counter for protecting updates. The lowest + * bit is the index for @read_data. * @read_data: Data required to read from sched_clock. * @wrap_kt: Duration for which clock can run before wrapping * @rate: Tick rate of the registered clock * @actual_read_sched_clock: Registered clock read function * * The ordering of this structure has been chosen to optimize cache - * performance. In particular seq and read_data (combined) should fit + * performance. In particular seq and read_data[0] (combined) should fit * into a single 64 byte cache line. */ struct clock_data { seqcount_t seq; - struct clock_read_data read_data; + struct clock_read_data read_data[2]; ktime_t wrap_kt; unsigned long rate; u64 (*actual_read_sched_clock)(void); @@ -80,10 +81,9 @@ static u64 notrace jiffy_sched_clock_read(void) } static struct clock_data cd ____cacheline_aligned = { - .read_data = { .mult = NSEC_PER_SEC / HZ, - .read_sched_clock = jiffy_sched_clock_read, }, + .read_data[0] = { .mult = NSEC_PER_SEC / HZ, + .read_sched_clock = jiffy_sched_clock_read, }, .actual_read_sched_clock = jiffy_sched_clock_read, - }; static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) @@ -95,10 +95,11 @@ unsigned long long notrace sched_clock(void) { u64 cyc, res; unsigned long seq; - struct clock_read_data *rd = &cd.read_data; + struct clock_read_data *rd; do { - seq = raw_read_seqcount_begin(&cd.seq); + seq = raw_read_seqcount(&cd.seq); + rd = cd.read_data + (seq & 1); cyc = (rd->read_sched_clock() - rd->epoch_cyc) & rd->sched_clock_mask; @@ -108,27 +109,51 @@ unsigned long long notrace sched_clock(void) return res; } +/* + * Updating the data required to read the clock. + * + * sched_clock will never observe mis-matched data even if called from + * an NMI. We do this by maintaining an odd/even copy of the data and + * steering sched_clock to one or the other using a sequence counter. + * In order to preserve the data cache profile of sched_clock as much + * as possible the system reverts back to the even copy when the update + * completes; the odd copy is used *only* during an update. + */ +static void update_clock_read_data(struct clock_read_data *rd) +{ + /* update the backup (odd) copy with the new data */ + cd.read_data[1] = *rd; + + /* steer readers towards the odd copy */ + raw_write_seqcount_latch(&cd.seq); + + /* now its safe for us to update the normal (even) copy */ + cd.read_data[0] = *rd; + + /* switch readers back to the even copy */ + raw_write_seqcount_latch(&cd.seq); +} + /* * Atomically update the sched_clock epoch. */ static void update_sched_clock(void) { - unsigned long flags; u64 cyc; u64 ns; - struct clock_read_data *rd = &cd.read_data; + struct clock_read_data rd; + + rd = cd.read_data[0]; cyc = cd.actual_read_sched_clock(); - ns = rd->epoch_ns + - cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask, - rd->mult, rd->shift); - - raw_local_irq_save(flags); - raw_write_seqcount_begin(&cd.seq); - rd->epoch_ns = ns; - rd->epoch_cyc = cyc; - raw_write_seqcount_end(&cd.seq); - raw_local_irq_restore(flags); + ns = rd.epoch_ns + + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, + rd.mult, rd.shift); + + rd.epoch_ns = ns; + rd.epoch_cyc = cyc; + + update_clock_read_data(&rd); } static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt) @@ -145,7 +170,7 @@ void __init sched_clock_register(u64 (*read)(void), int bits, u32 new_mult, new_shift; unsigned long r; char r_unit; - struct clock_read_data *rd = &cd.read_data; + struct clock_read_data rd; if (cd.rate > rate) return; @@ -162,22 +187,23 @@ void __init sched_clock_register(u64 (*read)(void), int bits, wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL); cd.wrap_kt = ns_to_ktime(wrap); + rd = cd.read_data[0]; + /* update epoch for new counter and update epoch_ns from old counter*/ new_epoch = read(); cyc = cd.actual_read_sched_clock(); - ns = rd->epoch_ns + - cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask, - rd->mult, rd->shift); + ns = rd.epoch_ns + + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, + rd.mult, rd.shift); cd.actual_read_sched_clock = read; - raw_write_seqcount_begin(&cd.seq); - rd->read_sched_clock = read; - rd->sched_clock_mask = new_mask; - rd->mult = new_mult; - rd->shift = new_shift; - rd->epoch_cyc = new_epoch; - rd->epoch_ns = ns; - raw_write_seqcount_end(&cd.seq); + rd.read_sched_clock = read; + rd.sched_clock_mask = new_mask; + rd.mult = new_mult; + rd.shift = new_shift; + rd.epoch_cyc = new_epoch; + rd.epoch_ns = ns; + update_clock_read_data(&rd); r = rate; if (r >= 4000000) { @@ -227,15 +253,22 @@ void __init sched_clock_postinit(void) * * This function makes it appear to sched_clock() as if the clock * stopped counting at its last update. + * + * This function must only be called from the critical + * section in sched_clock(). It relies on the read_seqcount_retry() + * at the end of the critical section to be sure we observe the + * correct copy of epoch_cyc. */ static u64 notrace suspended_sched_clock_read(void) { - return cd.read_data.epoch_cyc; + unsigned long seq = raw_read_seqcount(&cd.seq); + + return cd.read_data[seq & 1].epoch_cyc; } static int sched_clock_suspend(void) { - struct clock_read_data *rd = &cd.read_data; + struct clock_read_data *rd = &cd.read_data[0]; update_sched_clock(); hrtimer_cancel(&sched_clock_timer); @@ -245,7 +278,7 @@ static int sched_clock_suspend(void) static void sched_clock_resume(void) { - struct clock_read_data *rd = &cd.read_data; + struct clock_read_data *rd = &cd.read_data[0]; rd->epoch_cyc = cd.actual_read_sched_clock(); hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); -- cgit v1.2.3 From 32fea568aec5b73ae27253125522b5c2a970a1f0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 27 Mar 2015 07:08:06 +0100 Subject: timers, sched/clock: Clean up the code a bit Trivial cleanups, to improve the readability of the generic sched_clock() code: - Improve and standardize comments - Standardize the coding style - Use vertical spacing where appropriate - etc. No code changed: md5: 19a053b31e0c54feaeff1492012b019a sched_clock.o.before.asm 19a053b31e0c54feaeff1492012b019a sched_clock.o.after.asm Cc: Catalin Marinas Cc: Daniel Thompson Cc: John Stultz Cc: Peter Zijlstra (Intel) Cc: Peter Zijlstra Cc: Russell King Cc: Stephen Boyd Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Ingo Molnar --- kernel/time/sched_clock.c | 107 ++++++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 51 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index eeea1e950b72..a26036d37a38 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -1,5 +1,6 @@ /* - * sched_clock.c: support for extending counters to full 64-bit ns counter + * sched_clock.c: Generic sched_clock() support, to extend low level + * hardware time counters to full 64-bit ns values. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -19,15 +20,15 @@ #include /** - * struct clock_read_data - data required to read from sched_clock + * struct clock_read_data - data required to read from sched_clock() * - * @epoch_ns: sched_clock value at last update - * @epoch_cyc: Clock cycle value at last update + * @epoch_ns: sched_clock() value at last update + * @epoch_cyc: Clock cycle value at last update. * @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit - * clocks - * @read_sched_clock: Current clock source (or dummy source when suspended) - * @mult: Multipler for scaled math conversion - * @shift: Shift value for scaled math conversion + * clocks. + * @read_sched_clock: Current clock source (or dummy source when suspended). + * @mult: Multipler for scaled math conversion. + * @shift: Shift value for scaled math conversion. * * Care must be taken when updating this structure; it is read by * some very hot code paths. It occupies <=40 bytes and, when combined @@ -44,25 +45,26 @@ struct clock_read_data { }; /** - * struct clock_data - all data needed for sched_clock (including + * struct clock_data - all data needed for sched_clock() (including * registration of a new clock source) * * @seq: Sequence counter for protecting updates. The lowest * bit is the index for @read_data. * @read_data: Data required to read from sched_clock. - * @wrap_kt: Duration for which clock can run before wrapping - * @rate: Tick rate of the registered clock - * @actual_read_sched_clock: Registered clock read function + * @wrap_kt: Duration for which clock can run before wrapping. + * @rate: Tick rate of the registered clock. + * @actual_read_sched_clock: Registered hardware level clock read function. * * The ordering of this structure has been chosen to optimize cache - * performance. In particular seq and read_data[0] (combined) should fit - * into a single 64 byte cache line. + * performance. In particular 'seq' and 'read_data[0]' (combined) should fit + * into a single 64-byte cache line. */ struct clock_data { - seqcount_t seq; - struct clock_read_data read_data[2]; - ktime_t wrap_kt; - unsigned long rate; + seqcount_t seq; + struct clock_read_data read_data[2]; + ktime_t wrap_kt; + unsigned long rate; + u64 (*actual_read_sched_clock)(void); }; @@ -112,10 +114,10 @@ unsigned long long notrace sched_clock(void) /* * Updating the data required to read the clock. * - * sched_clock will never observe mis-matched data even if called from + * sched_clock() will never observe mis-matched data even if called from * an NMI. We do this by maintaining an odd/even copy of the data and - * steering sched_clock to one or the other using a sequence counter. - * In order to preserve the data cache profile of sched_clock as much + * steering sched_clock() to one or the other using a sequence counter. + * In order to preserve the data cache profile of sched_clock() as much * as possible the system reverts back to the even copy when the update * completes; the odd copy is used *only* during an update. */ @@ -135,7 +137,7 @@ static void update_clock_read_data(struct clock_read_data *rd) } /* - * Atomically update the sched_clock epoch. + * Atomically update the sched_clock() epoch. */ static void update_sched_clock(void) { @@ -146,9 +148,7 @@ static void update_sched_clock(void) rd = cd.read_data[0]; cyc = cd.actual_read_sched_clock(); - ns = rd.epoch_ns + - cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, - rd.mult, rd.shift); + ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift); rd.epoch_ns = ns; rd.epoch_cyc = cyc; @@ -160,11 +160,12 @@ static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt) { update_sched_clock(); hrtimer_forward_now(hrt, cd.wrap_kt); + return HRTIMER_RESTART; } -void __init sched_clock_register(u64 (*read)(void), int bits, - unsigned long rate) +void __init +sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) { u64 res, wrap, new_mask, new_epoch, cyc, ns; u32 new_mult, new_shift; @@ -177,51 +178,53 @@ void __init sched_clock_register(u64 (*read)(void), int bits, WARN_ON(!irqs_disabled()); - /* calculate the mult/shift to convert counter ticks to ns. */ + /* Calculate the mult/shift to convert counter ticks to ns. */ clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600); new_mask = CLOCKSOURCE_MASK(bits); cd.rate = rate; - /* calculate how many nanosecs until we risk wrapping */ + /* Calculate how many nanosecs until we risk wrapping */ wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL); cd.wrap_kt = ns_to_ktime(wrap); rd = cd.read_data[0]; - /* update epoch for new counter and update epoch_ns from old counter*/ + /* Update epoch for new counter and update 'epoch_ns' from old counter*/ new_epoch = read(); cyc = cd.actual_read_sched_clock(); - ns = rd.epoch_ns + - cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, - rd.mult, rd.shift); + ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift); cd.actual_read_sched_clock = read; - rd.read_sched_clock = read; - rd.sched_clock_mask = new_mask; - rd.mult = new_mult; - rd.shift = new_shift; - rd.epoch_cyc = new_epoch; - rd.epoch_ns = ns; + rd.read_sched_clock = read; + rd.sched_clock_mask = new_mask; + rd.mult = new_mult; + rd.shift = new_shift; + rd.epoch_cyc = new_epoch; + rd.epoch_ns = ns; + update_clock_read_data(&rd); r = rate; if (r >= 4000000) { r /= 1000000; r_unit = 'M'; - } else if (r >= 1000) { - r /= 1000; - r_unit = 'k'; - } else - r_unit = ' '; - - /* calculate the ns resolution of this counter */ + } else { + if (r >= 1000) { + r /= 1000; + r_unit = 'k'; + } else { + r_unit = ' '; + } + } + + /* Calculate the ns resolution of this counter */ res = cyc_to_ns(1ULL, new_mult, new_shift); pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n", bits, r, r_unit, res, wrap); - /* Enable IRQ time accounting if we have a fast enough sched_clock */ + /* Enable IRQ time accounting if we have a fast enough sched_clock() */ if (irqtime > 0 || (irqtime == -1 && rate >= 1000000)) enable_sched_clock_irqtime(); @@ -231,7 +234,7 @@ void __init sched_clock_register(u64 (*read)(void), int bits, void __init sched_clock_postinit(void) { /* - * If no sched_clock function has been provided at that point, + * If no sched_clock() function has been provided at that point, * make it the final one one. */ if (cd.actual_read_sched_clock == jiffy_sched_clock_read) @@ -257,7 +260,7 @@ void __init sched_clock_postinit(void) * This function must only be called from the critical * section in sched_clock(). It relies on the read_seqcount_retry() * at the end of the critical section to be sure we observe the - * correct copy of epoch_cyc. + * correct copy of 'epoch_cyc'. */ static u64 notrace suspended_sched_clock_read(void) { @@ -273,6 +276,7 @@ static int sched_clock_suspend(void) update_sched_clock(); hrtimer_cancel(&sched_clock_timer); rd->read_sched_clock = suspended_sched_clock_read; + return 0; } @@ -286,13 +290,14 @@ static void sched_clock_resume(void) } static struct syscore_ops sched_clock_ops = { - .suspend = sched_clock_suspend, - .resume = sched_clock_resume, + .suspend = sched_clock_suspend, + .resume = sched_clock_resume, }; static int __init sched_clock_syscore_init(void) { register_syscore_ops(&sched_clock_ops); + return 0; } device_initcall(sched_clock_syscore_init); -- cgit v1.2.3 From 876e78818def2983be55878b21f7152fbaebbd36 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2015 10:09:06 +0100 Subject: time: Rename timekeeper::tkr to timekeeper::tkr_mono In preparation of adding another tkr field, rename this one to tkr_mono. Also rename tk_read_base::base_mono to tk_read_base::base, since the structure is not specific to CLOCK_MONOTONIC and the mono name got added to the tk_read_base instance. Lots of trivial churn. Signed-off-by: Peter Zijlstra (Intel) Acked-by: John Stultz Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150319093400.344679419@infradead.org Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 150 +++++++++++++++++++++++----------------------- 1 file changed, 75 insertions(+), 75 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 892f6cbf1e67..1405091f3acb 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -68,8 +68,8 @@ bool __read_mostly persistent_clock_exist = false; static inline void tk_normalize_xtime(struct timekeeper *tk) { - while (tk->tkr.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr.shift)) { - tk->tkr.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr.shift; + while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) { + tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift; tk->xtime_sec++; } } @@ -79,20 +79,20 @@ static inline struct timespec64 tk_xtime(struct timekeeper *tk) struct timespec64 ts; ts.tv_sec = tk->xtime_sec; - ts.tv_nsec = (long)(tk->tkr.xtime_nsec >> tk->tkr.shift); + ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); return ts; } static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts) { tk->xtime_sec = ts->tv_sec; - tk->tkr.xtime_nsec = (u64)ts->tv_nsec << tk->tkr.shift; + tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift; } static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts) { tk->xtime_sec += ts->tv_sec; - tk->tkr.xtime_nsec += (u64)ts->tv_nsec << tk->tkr.shift; + tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift; tk_normalize_xtime(tk); } @@ -136,8 +136,8 @@ static long timekeeping_last_warning; static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset) { - cycle_t max_cycles = tk->tkr.clock->max_cycles; - const char *name = tk->tkr.clock->name; + cycle_t max_cycles = tk->tkr_mono.clock->max_cycles; + const char *name = tk->tkr_mono.clock->name; if (offset > max_cycles) { printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n", @@ -246,11 +246,11 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) u64 tmp, ntpinterval; struct clocksource *old_clock; - old_clock = tk->tkr.clock; - tk->tkr.clock = clock; - tk->tkr.read = clock->read; - tk->tkr.mask = clock->mask; - tk->tkr.cycle_last = tk->tkr.read(clock); + old_clock = tk->tkr_mono.clock; + tk->tkr_mono.clock = clock; + tk->tkr_mono.read = clock->read; + tk->tkr_mono.mask = clock->mask; + tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock); /* Do the ns -> cycle conversion first, using original mult */ tmp = NTP_INTERVAL_LENGTH; @@ -274,11 +274,11 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) if (old_clock) { int shift_change = clock->shift - old_clock->shift; if (shift_change < 0) - tk->tkr.xtime_nsec >>= -shift_change; + tk->tkr_mono.xtime_nsec >>= -shift_change; else - tk->tkr.xtime_nsec <<= shift_change; + tk->tkr_mono.xtime_nsec <<= shift_change; } - tk->tkr.shift = clock->shift; + tk->tkr_mono.shift = clock->shift; tk->ntp_error = 0; tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; @@ -289,7 +289,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) * active clocksource. These value will be adjusted via NTP * to counteract clock drifting. */ - tk->tkr.mult = clock->mult; + tk->tkr_mono.mult = clock->mult; tk->ntp_err_mult = 0; } @@ -318,11 +318,11 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) { - struct clocksource *clock = tk->tkr.clock; + struct clocksource *clock = tk->tkr_mono.clock; cycle_t delta; s64 nsec; - delta = timekeeping_get_delta(&tk->tkr); + delta = timekeeping_get_delta(&tk->tkr_mono); /* convert delta to nanoseconds. */ nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift); @@ -428,7 +428,7 @@ u64 notrace ktime_get_mono_fast_ns(void) do { seq = raw_read_seqcount(&tk_fast_mono.seq); tkr = tk_fast_mono.base + (seq & 0x01); - now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr); + now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr); } while (read_seqcount_retry(&tk_fast_mono.seq, seq)); return now; @@ -456,7 +456,7 @@ static cycle_t dummy_clock_read(struct clocksource *cs) static void halt_fast_timekeeper(struct timekeeper *tk) { static struct tk_read_base tkr_dummy; - struct tk_read_base *tkr = &tk->tkr; + struct tk_read_base *tkr = &tk->tkr_mono; memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); cycles_at_suspend = tkr->read(tkr->clock); @@ -472,8 +472,8 @@ static inline void update_vsyscall(struct timekeeper *tk) xt = timespec64_to_timespec(tk_xtime(tk)); wm = timespec64_to_timespec(tk->wall_to_monotonic); - update_vsyscall_old(&xt, &wm, tk->tkr.clock, tk->tkr.mult, - tk->tkr.cycle_last); + update_vsyscall_old(&xt, &wm, tk->tkr_mono.clock, tk->tkr_mono.mult, + tk->tkr_mono.cycle_last); } static inline void old_vsyscall_fixup(struct timekeeper *tk) @@ -490,11 +490,11 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk) * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD * users are removed, this can be killed. */ - remainder = tk->tkr.xtime_nsec & ((1ULL << tk->tkr.shift) - 1); - tk->tkr.xtime_nsec -= remainder; - tk->tkr.xtime_nsec += 1ULL << tk->tkr.shift; + remainder = tk->tkr_mono.xtime_nsec & ((1ULL << tk->tkr_mono.shift) - 1); + tk->tkr_mono.xtime_nsec -= remainder; + tk->tkr_mono.xtime_nsec += 1ULL << tk->tkr_mono.shift; tk->ntp_error += remainder << tk->ntp_error_shift; - tk->ntp_error -= (1ULL << tk->tkr.shift) << tk->ntp_error_shift; + tk->ntp_error -= (1ULL << tk->tkr_mono.shift) << tk->ntp_error_shift; } #else #define old_vsyscall_fixup(tk) @@ -559,7 +559,7 @@ static inline void tk_update_ktime_data(struct timekeeper *tk) */ seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec); nsec = (u32) tk->wall_to_monotonic.tv_nsec; - tk->tkr.base_mono = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); + tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); /* Update the monotonic raw base */ tk->base_raw = timespec64_to_ktime(tk->raw_time); @@ -569,7 +569,7 @@ static inline void tk_update_ktime_data(struct timekeeper *tk) * wall_to_monotonic can be greater/equal one second. Take * this into account before updating tk->ktime_sec. */ - nsec += (u32)(tk->tkr.xtime_nsec >> tk->tkr.shift); + nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); if (nsec >= NSEC_PER_SEC) seconds++; tk->ktime_sec = seconds; @@ -592,7 +592,7 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) memcpy(&shadow_timekeeper, &tk_core.timekeeper, sizeof(tk_core.timekeeper)); - update_fast_timekeeper(&tk->tkr); + update_fast_timekeeper(&tk->tkr_mono); } /** @@ -604,18 +604,18 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) */ static void timekeeping_forward_now(struct timekeeper *tk) { - struct clocksource *clock = tk->tkr.clock; + struct clocksource *clock = tk->tkr_mono.clock; cycle_t cycle_now, delta; s64 nsec; - cycle_now = tk->tkr.read(clock); - delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask); - tk->tkr.cycle_last = cycle_now; + cycle_now = tk->tkr_mono.read(clock); + delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask); + tk->tkr_mono.cycle_last = cycle_now; - tk->tkr.xtime_nsec += delta * tk->tkr.mult; + tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult; /* If arch requires, add in get_arch_timeoffset() */ - tk->tkr.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr.shift; + tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift; tk_normalize_xtime(tk); @@ -640,7 +640,7 @@ int __getnstimeofday64(struct timespec64 *ts) seq = read_seqcount_begin(&tk_core.seq); ts->tv_sec = tk->xtime_sec; - nsecs = timekeeping_get_ns(&tk->tkr); + nsecs = timekeeping_get_ns(&tk->tkr_mono); } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -680,8 +680,8 @@ ktime_t ktime_get(void) do { seq = read_seqcount_begin(&tk_core.seq); - base = tk->tkr.base_mono; - nsecs = timekeeping_get_ns(&tk->tkr); + base = tk->tkr_mono.base; + nsecs = timekeeping_get_ns(&tk->tkr_mono); } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -706,8 +706,8 @@ ktime_t ktime_get_with_offset(enum tk_offsets offs) do { seq = read_seqcount_begin(&tk_core.seq); - base = ktime_add(tk->tkr.base_mono, *offset); - nsecs = timekeeping_get_ns(&tk->tkr); + base = ktime_add(tk->tkr_mono.base, *offset); + nsecs = timekeeping_get_ns(&tk->tkr_mono); } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -777,7 +777,7 @@ void ktime_get_ts64(struct timespec64 *ts) do { seq = read_seqcount_begin(&tk_core.seq); ts->tv_sec = tk->xtime_sec; - nsec = timekeeping_get_ns(&tk->tkr); + nsec = timekeeping_get_ns(&tk->tkr_mono); tomono = tk->wall_to_monotonic; } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -863,7 +863,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) ts_real->tv_nsec = 0; nsecs_raw = timekeeping_get_ns_raw(tk); - nsecs_real = timekeeping_get_ns(&tk->tkr); + nsecs_real = timekeeping_get_ns(&tk->tkr_mono); } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -1046,7 +1046,7 @@ static int change_clocksource(void *data) */ if (try_module_get(new->owner)) { if (!new->enable || new->enable(new) == 0) { - old = tk->tkr.clock; + old = tk->tkr_mono.clock; tk_setup_internals(tk, new); if (old->disable) old->disable(old); @@ -1074,11 +1074,11 @@ int timekeeping_notify(struct clocksource *clock) { struct timekeeper *tk = &tk_core.timekeeper; - if (tk->tkr.clock == clock) + if (tk->tkr_mono.clock == clock) return 0; stop_machine(change_clocksource, clock, NULL); tick_clock_notify(); - return tk->tkr.clock == clock ? 0 : -1; + return tk->tkr_mono.clock == clock ? 0 : -1; } /** @@ -1119,7 +1119,7 @@ int timekeeping_valid_for_hres(void) do { seq = read_seqcount_begin(&tk_core.seq); - ret = tk->tkr.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; + ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES; } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -1138,7 +1138,7 @@ u64 timekeeping_max_deferment(void) do { seq = read_seqcount_begin(&tk_core.seq); - ret = tk->tkr.clock->max_idle_ns; + ret = tk->tkr_mono.clock->max_idle_ns; } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -1303,7 +1303,7 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta) void timekeeping_resume(void) { struct timekeeper *tk = &tk_core.timekeeper; - struct clocksource *clock = tk->tkr.clock; + struct clocksource *clock = tk->tkr_mono.clock; unsigned long flags; struct timespec64 ts_new, ts_delta; struct timespec tmp; @@ -1331,16 +1331,16 @@ void timekeeping_resume(void) * The less preferred source will only be tried if there is no better * usable source. The rtc part is handled separately in rtc core code. */ - cycle_now = tk->tkr.read(clock); + cycle_now = tk->tkr_mono.read(clock); if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) && - cycle_now > tk->tkr.cycle_last) { + cycle_now > tk->tkr_mono.cycle_last) { u64 num, max = ULLONG_MAX; u32 mult = clock->mult; u32 shift = clock->shift; s64 nsec = 0; - cycle_delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, - tk->tkr.mask); + cycle_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, + tk->tkr_mono.mask); /* * "cycle_delta * mutl" may cause 64 bits overflow, if the @@ -1366,7 +1366,7 @@ void timekeeping_resume(void) __timekeeping_inject_sleeptime(tk, &ts_delta); /* Re-base the last cycle value */ - tk->tkr.cycle_last = cycle_now; + tk->tkr_mono.cycle_last = cycle_now; tk->ntp_error = 0; timekeeping_suspended = 0; timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); @@ -1519,15 +1519,15 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk, * * XXX - TODO: Doc ntp_error calculation. */ - if ((mult_adj > 0) && (tk->tkr.mult + mult_adj < mult_adj)) { + if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) { /* NTP adjustment caused clocksource mult overflow */ WARN_ON_ONCE(1); return; } - tk->tkr.mult += mult_adj; + tk->tkr_mono.mult += mult_adj; tk->xtime_interval += interval; - tk->tkr.xtime_nsec -= offset; + tk->tkr_mono.xtime_nsec -= offset; tk->ntp_error -= (interval - offset) << tk->ntp_error_shift; } @@ -1589,13 +1589,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) tk->ntp_err_mult = 0; } - if (unlikely(tk->tkr.clock->maxadj && - (abs(tk->tkr.mult - tk->tkr.clock->mult) - > tk->tkr.clock->maxadj))) { + if (unlikely(tk->tkr_mono.clock->maxadj && + (abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult) + > tk->tkr_mono.clock->maxadj))) { printk_once(KERN_WARNING "Adjusting %s more than 11%% (%ld vs %ld)\n", - tk->tkr.clock->name, (long)tk->tkr.mult, - (long)tk->tkr.clock->mult + tk->tkr.clock->maxadj); + tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult, + (long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj); } /* @@ -1612,9 +1612,9 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) * We'll correct this error next time through this function, when * xtime_nsec is not as small. */ - if (unlikely((s64)tk->tkr.xtime_nsec < 0)) { - s64 neg = -(s64)tk->tkr.xtime_nsec; - tk->tkr.xtime_nsec = 0; + if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) { + s64 neg = -(s64)tk->tkr_mono.xtime_nsec; + tk->tkr_mono.xtime_nsec = 0; tk->ntp_error += neg << tk->ntp_error_shift; } } @@ -1629,13 +1629,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) */ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk) { - u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr.shift; + u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift; unsigned int clock_set = 0; - while (tk->tkr.xtime_nsec >= nsecps) { + while (tk->tkr_mono.xtime_nsec >= nsecps) { int leap; - tk->tkr.xtime_nsec -= nsecps; + tk->tkr_mono.xtime_nsec -= nsecps; tk->xtime_sec++; /* Figure out if its a leap sec and apply if needed */ @@ -1680,9 +1680,9 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, /* Accumulate one shifted interval */ offset -= interval; - tk->tkr.cycle_last += interval; + tk->tkr_mono.cycle_last += interval; - tk->tkr.xtime_nsec += tk->xtime_interval << shift; + tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift; *clock_set |= accumulate_nsecs_to_secs(tk); /* Accumulate raw time */ @@ -1725,8 +1725,8 @@ void update_wall_time(void) #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET offset = real_tk->cycle_interval; #else - offset = clocksource_delta(tk->tkr.read(tk->tkr.clock), - tk->tkr.cycle_last, tk->tkr.mask); + offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock), + tk->tkr_mono.cycle_last, tk->tkr_mono.mask); #endif /* Check if there's really nothing to do */ @@ -1890,8 +1890,8 @@ ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot, do { seq = read_seqcount_begin(&tk_core.seq); - base = tk->tkr.base_mono; - nsecs = tk->tkr.xtime_nsec >> tk->tkr.shift; + base = tk->tkr_mono.base; + nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; *offs_real = tk->offs_real; *offs_boot = tk->offs_boot; @@ -1922,8 +1922,8 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot, do { seq = read_seqcount_begin(&tk_core.seq); - base = tk->tkr.base_mono; - nsecs = timekeeping_get_ns(&tk->tkr); + base = tk->tkr_mono.base; + nsecs = timekeeping_get_ns(&tk->tkr_mono); *offs_real = tk->offs_real; *offs_boot = tk->offs_boot; -- cgit v1.2.3 From 4a4ad80d32cea69ee93bd4589f24dc478804cd80 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2015 09:28:44 +0100 Subject: time: Add timerkeeper::tkr_raw Introduce tkr_raw and make use of it. base_raw -> tkr_raw.base clock->{mult,shift} -> tkr_raw.{mult.shift} Kill timekeeping_get_ns_raw() in favour of timekeeping_get_ns(&tkr_raw), this removes all mono_raw special casing. Duplicate the updates to tkr_mono.cycle_last into tkr_raw.cycle_last, both need the same value. Signed-off-by: Peter Zijlstra (Intel) Acked-by: John Stultz Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150319093400.422589590@infradead.org Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 1405091f3acb..cbb612ee813f 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -252,6 +252,11 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) tk->tkr_mono.mask = clock->mask; tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock); + tk->tkr_raw.clock = clock; + tk->tkr_raw.read = clock->read; + tk->tkr_raw.mask = clock->mask; + tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last; + /* Do the ns -> cycle conversion first, using original mult */ tmp = NTP_INTERVAL_LENGTH; tmp <<= clock->shift; @@ -278,7 +283,10 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) else tk->tkr_mono.xtime_nsec <<= shift_change; } + tk->tkr_raw.xtime_nsec = 0; + tk->tkr_mono.shift = clock->shift; + tk->tkr_raw.shift = clock->shift; tk->ntp_error = 0; tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift; @@ -290,6 +298,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) * to counteract clock drifting. */ tk->tkr_mono.mult = clock->mult; + tk->tkr_raw.mult = clock->mult; tk->ntp_err_mult = 0; } @@ -316,21 +325,6 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) return nsec + arch_gettimeoffset(); } -static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) -{ - struct clocksource *clock = tk->tkr_mono.clock; - cycle_t delta; - s64 nsec; - - delta = timekeeping_get_delta(&tk->tkr_mono); - - /* convert delta to nanoseconds. */ - nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift); - - /* If arch requires, add in get_arch_timeoffset() */ - return nsec + arch_gettimeoffset(); -} - /** * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper. * @tkr: Timekeeping readout base from which we take the update @@ -562,7 +556,7 @@ static inline void tk_update_ktime_data(struct timekeeper *tk) tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); /* Update the monotonic raw base */ - tk->base_raw = timespec64_to_ktime(tk->raw_time); + tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time); /* * The sum of the nanoseconds portions of xtime and @@ -611,6 +605,7 @@ static void timekeeping_forward_now(struct timekeeper *tk) cycle_now = tk->tkr_mono.read(clock); delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask); tk->tkr_mono.cycle_last = cycle_now; + tk->tkr_raw.cycle_last = cycle_now; tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult; @@ -619,7 +614,7 @@ static void timekeeping_forward_now(struct timekeeper *tk) tk_normalize_xtime(tk); - nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift); + nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift); timespec64_add_ns(&tk->raw_time, nsec); } @@ -748,8 +743,8 @@ ktime_t ktime_get_raw(void) do { seq = read_seqcount_begin(&tk_core.seq); - base = tk->base_raw; - nsecs = timekeeping_get_ns_raw(tk); + base = tk->tkr_raw.base; + nsecs = timekeeping_get_ns(&tk->tkr_raw); } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -862,7 +857,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) ts_real->tv_sec = tk->xtime_sec; ts_real->tv_nsec = 0; - nsecs_raw = timekeeping_get_ns_raw(tk); + nsecs_raw = timekeeping_get_ns(&tk->tkr_raw); nsecs_real = timekeeping_get_ns(&tk->tkr_mono); } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -1096,7 +1091,7 @@ void getrawmonotonic64(struct timespec64 *ts) do { seq = read_seqcount_begin(&tk_core.seq); - nsecs = timekeeping_get_ns_raw(tk); + nsecs = timekeeping_get_ns(&tk->tkr_raw); ts64 = tk->raw_time; } while (read_seqcount_retry(&tk_core.seq, seq)); @@ -1217,7 +1212,6 @@ void __init timekeeping_init(void) tk_set_xtime(tk, &now); tk->raw_time.tv_sec = 0; tk->raw_time.tv_nsec = 0; - tk->base_raw.tv64 = 0; if (boot.tv_sec == 0 && boot.tv_nsec == 0) boot = tk_xtime(tk); @@ -1367,6 +1361,8 @@ void timekeeping_resume(void) /* Re-base the last cycle value */ tk->tkr_mono.cycle_last = cycle_now; + tk->tkr_raw.cycle_last = cycle_now; + tk->ntp_error = 0; timekeeping_suspended = 0; timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); @@ -1681,6 +1677,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, /* Accumulate one shifted interval */ offset -= interval; tk->tkr_mono.cycle_last += interval; + tk->tkr_raw.cycle_last += interval; tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift; *clock_set |= accumulate_nsecs_to_secs(tk); -- cgit v1.2.3 From 4498e7467e9e441c18ca12f1ca08460356e0508a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2015 09:36:19 +0100 Subject: time: Parametrize all tk_fast_mono users In preparation for more tk_fast instances, remove all hard-coded tk_fast_mono references. Signed-off-by: Peter Zijlstra (Intel) Acked-by: John Stultz Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150319093400.484279927@infradead.org Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index cbb612ee813f..278373edb472 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -364,18 +364,18 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) * slightly wrong timestamp (a few nanoseconds). See * @ktime_get_mono_fast_ns. */ -static void update_fast_timekeeper(struct tk_read_base *tkr) +static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf) { - struct tk_read_base *base = tk_fast_mono.base; + struct tk_read_base *base = tkf->base; /* Force readers off to base[1] */ - raw_write_seqcount_latch(&tk_fast_mono.seq); + raw_write_seqcount_latch(&tkf->seq); /* Update base[0] */ memcpy(base, tkr, sizeof(*base)); /* Force readers back to base[0] */ - raw_write_seqcount_latch(&tk_fast_mono.seq); + raw_write_seqcount_latch(&tkf->seq); /* Update base[1] */ memcpy(base + 1, base, sizeof(*base)); @@ -413,20 +413,25 @@ static void update_fast_timekeeper(struct tk_read_base *tkr) * of the following timestamps. Callers need to be aware of that and * deal with it. */ -u64 notrace ktime_get_mono_fast_ns(void) +static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) { struct tk_read_base *tkr; unsigned int seq; u64 now; do { - seq = raw_read_seqcount(&tk_fast_mono.seq); - tkr = tk_fast_mono.base + (seq & 0x01); + seq = raw_read_seqcount(&tkf->seq); + tkr = tkf->base + (seq & 0x01); now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr); + } while (read_seqcount_retry(&tkf->seq, seq)); - } while (read_seqcount_retry(&tk_fast_mono.seq, seq)); return now; } + +u64 ktime_get_mono_fast_ns(void) +{ + return __ktime_get_fast_ns(&tk_fast_mono); +} EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns); /* Suspend-time cycles value for halted fast timekeeper. */ @@ -455,7 +460,7 @@ static void halt_fast_timekeeper(struct timekeeper *tk) memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); cycles_at_suspend = tkr->read(tkr->clock); tkr_dummy.read = dummy_clock_read; - update_fast_timekeeper(&tkr_dummy); + update_fast_timekeeper(&tkr_dummy, &tk_fast_mono); } #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD @@ -586,7 +591,7 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) memcpy(&shadow_timekeeper, &tk_core.timekeeper, sizeof(tk_core.timekeeper)); - update_fast_timekeeper(&tk->tkr_mono); + update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono); } /** -- cgit v1.2.3 From f09cb9a1808e35ad7502ea39b6bfb443c7fa0f19 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2015 09:39:08 +0100 Subject: time: Introduce tk_fast_raw Add the NMI safe CLOCK_MONOTONIC_RAW accessor.. Signed-off-by: Peter Zijlstra (Intel) Acked-by: John Stultz Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150319093400.562746929@infradead.org Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 278373edb472..c3fcff06d30a 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -59,6 +59,7 @@ struct tk_fast { }; static struct tk_fast tk_fast_mono ____cacheline_aligned; +static struct tk_fast tk_fast_raw ____cacheline_aligned; /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; @@ -434,6 +435,12 @@ u64 ktime_get_mono_fast_ns(void) } EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns); +u64 ktime_get_raw_fast_ns(void) +{ + return __ktime_get_fast_ns(&tk_fast_raw); +} +EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns); + /* Suspend-time cycles value for halted fast timekeeper. */ static cycle_t cycles_at_suspend; @@ -461,6 +468,11 @@ static void halt_fast_timekeeper(struct timekeeper *tk) cycles_at_suspend = tkr->read(tkr->clock); tkr_dummy.read = dummy_clock_read; update_fast_timekeeper(&tkr_dummy, &tk_fast_mono); + + tkr = &tk->tkr_raw; + memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); + tkr_dummy.read = dummy_clock_read; + update_fast_timekeeper(&tkr_dummy, &tk_fast_raw); } #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD @@ -592,6 +604,7 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) sizeof(tk_core.timekeeper)); update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono); + update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw); } /** -- cgit v1.2.3 From 554ef3876c6acdff1331feab10275e9e9e0adb84 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 27 Feb 2015 17:21:32 +0530 Subject: clockevents: Handle tick device's resume separately Upcoming patch will redefine possible states of a clockevent device. The RESUME mode is a special case only for tick's clockevent devices. In future it can be replaced by ->resume() callback already available for clockevent devices. Lets handle it separately so that clockevents_set_mode() only handles states valid across all devices. This also renames set_mode_resume() to tick_resume() to make it more explicit. Signed-off-by: Viresh Kumar Acked-by: Peter Zijlstra Cc: Daniel Lezcano Cc: Frederic Weisbecker Cc: Kevin Hilman Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: linaro-kernel@lists.linaro.org Cc: linaro-networking@linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/c1b0112410870f49e7bf06958e1483eac6c15e20.1425037853.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/clockevents.c | 30 +++++++++++++++++++++--------- kernel/time/tick-broadcast.c | 2 +- kernel/time/tick-common.c | 2 +- kernel/time/tick-internal.h | 1 + kernel/time/timer_list.c | 4 ++-- 5 files changed, 26 insertions(+), 13 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 489642b08d64..1b0ea63de69c 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -100,7 +100,7 @@ static int __clockevents_set_mode(struct clock_event_device *dev, /* Transition with legacy set_mode() callback */ if (dev->set_mode) { /* Legacy callback doesn't support new modes */ - if (mode > CLOCK_EVT_MODE_RESUME) + if (mode > CLOCK_EVT_MODE_ONESHOT) return -ENOSYS; dev->set_mode(mode, dev); return 0; @@ -133,13 +133,6 @@ static int __clockevents_set_mode(struct clock_event_device *dev, return -ENOSYS; return dev->set_mode_oneshot(dev); - case CLOCK_EVT_MODE_RESUME: - /* Optional callback */ - if (dev->set_mode_resume) - return dev->set_mode_resume(dev); - else - return 0; - default: return -ENOSYS; } @@ -184,6 +177,25 @@ void clockevents_shutdown(struct clock_event_device *dev) dev->next_event.tv64 = KTIME_MAX; } +/** + * clockevents_tick_resume - Resume the tick device before using it again + * @dev: device to resume + */ +int clockevents_tick_resume(struct clock_event_device *dev) +{ + int ret = 0; + + if (dev->set_mode) + dev->set_mode(CLOCK_EVT_MODE_RESUME, dev); + else if (dev->tick_resume) + ret = dev->tick_resume(dev); + + if (likely(!ret)) + dev->mode = CLOCK_EVT_MODE_RESUME; + + return ret; +} + #ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST /* Limit min_delta to a jiffie */ @@ -433,7 +445,7 @@ static int clockevents_sanity_check(struct clock_event_device *dev) if (dev->set_mode) { /* We shouldn't be supporting new modes now */ WARN_ON(dev->set_mode_periodic || dev->set_mode_oneshot || - dev->set_mode_shutdown || dev->set_mode_resume); + dev->set_mode_shutdown || dev->tick_resume); return 0; } diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 066f0ec05e48..542d5bb5c13d 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -464,7 +464,7 @@ int tick_resume_broadcast(void) bc = tick_broadcast_device.evtdev; if (bc) { - clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME); + clockevents_tick_resume(bc); switch (tick_broadcast_device.mode) { case TICKDEV_MODE_PERIODIC: diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index f7c515595b42..5c50664c21d7 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -384,7 +384,7 @@ void tick_resume(void) struct tick_device *td = this_cpu_ptr(&tick_cpu_device); int broadcast = tick_resume_broadcast(); - clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME); + clockevents_tick_resume(td->evtdev); if (!broadcast) { if (td->mode == TICKDEV_MODE_PERIODIC) diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 366aeb4f2c66..98700e4a2000 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -32,6 +32,7 @@ extern bool tick_check_replacement(struct clock_event_device *curdev, extern void tick_install_replacement(struct clock_event_device *dev); extern void clockevents_shutdown(struct clock_event_device *dev); +extern int clockevents_tick_resume(struct clock_event_device *dev); extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 2cfd19485824..2b3e9393034d 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -251,9 +251,9 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) SEQ_printf(m, "\n"); } - if (dev->set_mode_resume) { + if (dev->tick_resume) { SEQ_printf(m, " resume: "); - print_name_offset(m, dev->set_mode_resume); + print_name_offset(m, dev->tick_resume); SEQ_printf(m, "\n"); } } -- cgit v1.2.3 From 77e32c89a7117614ab3d66d20c1088de721abfaa Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 27 Feb 2015 17:21:33 +0530 Subject: clockevents: Manage device's state separately for the core 'enum clock_event_mode' is used for two purposes today: - to pass mode to the driver of clockevent device::set_mode(). - for managing state of the device for clockevents core. For supporting new modes/states we have moved away from the legacy set_mode() callback to new per-mode/state callbacks. New modes/states shouldn't be exposed to the legacy (now OBSOLOTE) callbacks and so we shouldn't add new states to 'enum clock_event_mode'. Lets have separate enums for the two use cases mentioned above. Keep using the earlier enum for legacy set_mode() callback and mark it OBSOLETE. And add another enum to clearly specify the possible states of a clockevent device. This also renames the newly added per-mode callbacks to reflect state changes. We haven't got rid of 'mode' member of 'struct clock_event_device' as it is used by some of the clockevent drivers and it would automatically die down once we migrate those drivers to the new interface. It ('mode') is only updated now for the drivers using the legacy interface. Suggested-by: Peter Zijlstra Suggested-by: Ingo Molnar Signed-off-by: Viresh Kumar Acked-by: Peter Zijlstra Cc: Daniel Lezcano Cc: Frederic Weisbecker Cc: Kevin Hilman Cc: Preeti U Murthy Cc: linaro-kernel@lists.linaro.org Cc: linaro-networking@linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/b6b0143a8a57bd58352ad35e08c25424c879c0cb.1425037853.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/clockevents.c | 99 ++++++++++++++++++++++++-------------------- kernel/time/tick-broadcast.c | 20 ++++----- kernel/time/tick-common.c | 7 ++-- kernel/time/tick-oneshot.c | 6 +-- kernel/time/timer_list.c | 12 +++--- 5 files changed, 76 insertions(+), 68 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 1b0ea63de69c..6e53e9a0c2e8 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -94,44 +94,49 @@ u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt) } EXPORT_SYMBOL_GPL(clockevent_delta2ns); -static int __clockevents_set_mode(struct clock_event_device *dev, - enum clock_event_mode mode) +static int __clockevents_set_state(struct clock_event_device *dev, + enum clock_event_state state) { /* Transition with legacy set_mode() callback */ if (dev->set_mode) { /* Legacy callback doesn't support new modes */ - if (mode > CLOCK_EVT_MODE_ONESHOT) + if (state > CLOCK_EVT_STATE_ONESHOT) return -ENOSYS; - dev->set_mode(mode, dev); + /* + * 'clock_event_state' and 'clock_event_mode' have 1-to-1 + * mapping until *_ONESHOT, and so a simple cast will work. + */ + dev->set_mode((enum clock_event_mode)state, dev); + dev->mode = (enum clock_event_mode)state; return 0; } if (dev->features & CLOCK_EVT_FEAT_DUMMY) return 0; - /* Transition with new mode-specific callbacks */ - switch (mode) { - case CLOCK_EVT_MODE_UNUSED: + /* Transition with new state-specific callbacks */ + switch (state) { + case CLOCK_EVT_STATE_DETACHED: /* * This is an internal state, which is guaranteed to go from - * SHUTDOWN to UNUSED. No driver interaction required. + * SHUTDOWN to DETACHED. No driver interaction required. */ return 0; - case CLOCK_EVT_MODE_SHUTDOWN: - return dev->set_mode_shutdown(dev); + case CLOCK_EVT_STATE_SHUTDOWN: + return dev->set_state_shutdown(dev); - case CLOCK_EVT_MODE_PERIODIC: + case CLOCK_EVT_STATE_PERIODIC: /* Core internal bug */ if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC)) return -ENOSYS; - return dev->set_mode_periodic(dev); + return dev->set_state_periodic(dev); - case CLOCK_EVT_MODE_ONESHOT: + case CLOCK_EVT_STATE_ONESHOT: /* Core internal bug */ if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) return -ENOSYS; - return dev->set_mode_oneshot(dev); + return dev->set_state_oneshot(dev); default: return -ENOSYS; @@ -139,26 +144,26 @@ static int __clockevents_set_mode(struct clock_event_device *dev, } /** - * clockevents_set_mode - set the operating mode of a clock event device + * clockevents_set_state - set the operating state of a clock event device * @dev: device to modify - * @mode: new mode + * @state: new state * * Must be called with interrupts disabled ! */ -void clockevents_set_mode(struct clock_event_device *dev, - enum clock_event_mode mode) +void clockevents_set_state(struct clock_event_device *dev, + enum clock_event_state state) { - if (dev->mode != mode) { - if (__clockevents_set_mode(dev, mode)) + if (dev->state != state) { + if (__clockevents_set_state(dev, state)) return; - dev->mode = mode; + dev->state = state; /* * A nsec2cyc multiplicator of 0 is invalid and we'd crash * on it, so fix it up and emit a warning: */ - if (mode == CLOCK_EVT_MODE_ONESHOT) { + if (state == CLOCK_EVT_STATE_ONESHOT) { if (unlikely(!dev->mult)) { dev->mult = 1; WARN_ON(1); @@ -173,7 +178,7 @@ void clockevents_set_mode(struct clock_event_device *dev, */ void clockevents_shutdown(struct clock_event_device *dev) { - clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); + clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN); dev->next_event.tv64 = KTIME_MAX; } @@ -185,13 +190,12 @@ int clockevents_tick_resume(struct clock_event_device *dev) { int ret = 0; - if (dev->set_mode) + if (dev->set_mode) { dev->set_mode(CLOCK_EVT_MODE_RESUME, dev); - else if (dev->tick_resume) - ret = dev->tick_resume(dev); - - if (likely(!ret)) dev->mode = CLOCK_EVT_MODE_RESUME; + } else if (dev->tick_resume) { + ret = dev->tick_resume(dev); + } return ret; } @@ -248,7 +252,7 @@ static int clockevents_program_min_delta(struct clock_event_device *dev) delta = dev->min_delta_ns; dev->next_event = ktime_add_ns(ktime_get(), delta); - if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) + if (dev->state == CLOCK_EVT_STATE_SHUTDOWN) return 0; dev->retries++; @@ -285,7 +289,7 @@ static int clockevents_program_min_delta(struct clock_event_device *dev) delta = dev->min_delta_ns; dev->next_event = ktime_add_ns(ktime_get(), delta); - if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) + if (dev->state == CLOCK_EVT_STATE_SHUTDOWN) return 0; dev->retries++; @@ -317,7 +321,7 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, dev->next_event = expires; - if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN) + if (dev->state == CLOCK_EVT_STATE_SHUTDOWN) return 0; /* Shortcut for clockevent devices that can deal with ktime. */ @@ -362,7 +366,7 @@ static int clockevents_replace(struct clock_event_device *ced) struct clock_event_device *dev, *newdev = NULL; list_for_each_entry(dev, &clockevent_devices, list) { - if (dev == ced || dev->mode != CLOCK_EVT_MODE_UNUSED) + if (dev == ced || dev->state != CLOCK_EVT_STATE_DETACHED) continue; if (!tick_check_replacement(newdev, dev)) @@ -388,7 +392,7 @@ static int clockevents_replace(struct clock_event_device *ced) static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu) { /* Fast track. Device is unused */ - if (ced->mode == CLOCK_EVT_MODE_UNUSED) { + if (ced->state == CLOCK_EVT_STATE_DETACHED) { list_del_init(&ced->list); return 0; } @@ -438,30 +442,30 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu) } EXPORT_SYMBOL_GPL(clockevents_unbind); -/* Sanity check of mode transition callbacks */ +/* Sanity check of state transition callbacks */ static int clockevents_sanity_check(struct clock_event_device *dev) { /* Legacy set_mode() callback */ if (dev->set_mode) { /* We shouldn't be supporting new modes now */ - WARN_ON(dev->set_mode_periodic || dev->set_mode_oneshot || - dev->set_mode_shutdown || dev->tick_resume); + WARN_ON(dev->set_state_periodic || dev->set_state_oneshot || + dev->set_state_shutdown || dev->tick_resume); return 0; } if (dev->features & CLOCK_EVT_FEAT_DUMMY) return 0; - /* New mode-specific callbacks */ - if (!dev->set_mode_shutdown) + /* New state-specific callbacks */ + if (!dev->set_state_shutdown) return -EINVAL; if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) && - !dev->set_mode_periodic) + !dev->set_state_periodic) return -EINVAL; if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) && - !dev->set_mode_oneshot) + !dev->set_state_oneshot) return -EINVAL; return 0; @@ -478,6 +482,9 @@ void clockevents_register_device(struct clock_event_device *dev) BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); BUG_ON(clockevents_sanity_check(dev)); + /* Initialize state to DETACHED */ + dev->state = CLOCK_EVT_STATE_DETACHED; + if (!dev->cpumask) { WARN_ON(num_possible_cpus() > 1); dev->cpumask = cpumask_of(smp_processor_id()); @@ -541,11 +548,11 @@ int __clockevents_update_freq(struct clock_event_device *dev, u32 freq) { clockevents_config(dev, freq); - if (dev->mode == CLOCK_EVT_MODE_ONESHOT) + if (dev->state == CLOCK_EVT_STATE_ONESHOT) return clockevents_program_event(dev, dev->next_event, false); - if (dev->mode == CLOCK_EVT_MODE_PERIODIC) - return __clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC); + if (dev->state == CLOCK_EVT_STATE_PERIODIC) + return __clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC); return 0; } @@ -601,13 +608,13 @@ void clockevents_exchange_device(struct clock_event_device *old, */ if (old) { module_put(old->owner); - clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED); + clockevents_set_state(old, CLOCK_EVT_STATE_DETACHED); list_del(&old->list); list_add(&old->list, &clockevents_released); } if (new) { - BUG_ON(new->mode != CLOCK_EVT_MODE_UNUSED); + BUG_ON(new->state != CLOCK_EVT_STATE_DETACHED); clockevents_shutdown(new); } local_irq_restore(flags); @@ -693,7 +700,7 @@ int clockevents_notify(unsigned long reason, void *arg) if (cpumask_test_cpu(cpu, dev->cpumask) && cpumask_weight(dev->cpumask) == 1 && !tick_is_broadcast_device(dev)) { - BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); + BUG_ON(dev->state != CLOCK_EVT_STATE_DETACHED); list_del(&dev->list); } } diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 542d5bb5c13d..f0f8ee9dbc28 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -303,7 +303,7 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) /* * The device is in periodic mode. No reprogramming necessary: */ - if (dev->mode == CLOCK_EVT_MODE_PERIODIC) + if (dev->state == CLOCK_EVT_STATE_PERIODIC) goto unlock; /* @@ -532,8 +532,8 @@ static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu, { int ret; - if (bc->mode != CLOCK_EVT_MODE_ONESHOT) - clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); + if (bc->state != CLOCK_EVT_STATE_ONESHOT) + clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT); ret = clockevents_program_event(bc, expires, force); if (!ret) @@ -543,7 +543,7 @@ static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu, int tick_resume_broadcast_oneshot(struct clock_event_device *bc) { - clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); + clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT); return 0; } @@ -562,8 +562,8 @@ void tick_check_oneshot_broadcast_this_cpu(void) * switched over, leave the device alone. */ if (td->mode == TICKDEV_MODE_ONESHOT) { - clockevents_set_mode(td->evtdev, - CLOCK_EVT_MODE_ONESHOT); + clockevents_set_state(td->evtdev, + CLOCK_EVT_STATE_ONESHOT); } } } @@ -666,7 +666,7 @@ static void broadcast_shutdown_local(struct clock_event_device *bc, if (dev->next_event.tv64 < bc->next_event.tv64) return; } - clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); + clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN); } static void broadcast_move_bc(int deadcpu) @@ -741,7 +741,7 @@ int tick_broadcast_oneshot_control(unsigned long reason) cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); } else { if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { - clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); + clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT); /* * The cpu which was handling the broadcast * timer marked this cpu in the broadcast @@ -842,7 +842,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) /* Set it up only once ! */ if (bc->event_handler != tick_handle_oneshot_broadcast) { - int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC; + int was_periodic = bc->state == CLOCK_EVT_STATE_PERIODIC; bc->event_handler = tick_handle_oneshot_broadcast; @@ -858,7 +858,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc) tick_broadcast_oneshot_mask, tmpmask); if (was_periodic && !cpumask_empty(tmpmask)) { - clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); + clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT); tick_broadcast_init_next_event(tmpmask, tick_next_period); tick_broadcast_set_event(bc, cpu, tick_next_period, 1); diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 5c50664c21d7..a5b877130ae9 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -102,7 +102,7 @@ void tick_handle_periodic(struct clock_event_device *dev) tick_periodic(cpu); - if (dev->mode != CLOCK_EVT_MODE_ONESHOT) + if (dev->state != CLOCK_EVT_STATE_ONESHOT) return; for (;;) { /* @@ -140,7 +140,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast) if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) && !tick_broadcast_oneshot_active()) { - clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC); + clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC); } else { unsigned long seq; ktime_t next; @@ -150,7 +150,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast) next = tick_next_period; } while (read_seqretry(&jiffies_lock, seq)); - clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); + clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT); for (;;) { if (!clockevents_program_event(dev, next, false)) @@ -365,6 +365,7 @@ void tick_shutdown(unsigned int *cpup) * Prevent that the clock events layer tries to call * the set mode function! */ + dev->state = CLOCK_EVT_STATE_DETACHED; dev->mode = CLOCK_EVT_MODE_UNUSED; clockevents_exchange_device(dev, NULL); dev->event_handler = clockevents_handle_noop; diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 7ce740e78e1b..67a64b1670bf 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -38,7 +38,7 @@ void tick_resume_oneshot(void) { struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); - clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); + clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT); clockevents_program_event(dev, ktime_get(), true); } @@ -50,7 +50,7 @@ void tick_setup_oneshot(struct clock_event_device *newdev, ktime_t next_event) { newdev->event_handler = handler; - clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT); + clockevents_set_state(newdev, CLOCK_EVT_STATE_ONESHOT); clockevents_program_event(newdev, next_event, true); } @@ -81,7 +81,7 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)) td->mode = TICKDEV_MODE_ONESHOT; dev->event_handler = handler; - clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); + clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT); tick_broadcast_switch_to_oneshot(); return 0; } diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 2b3e9393034d..05aa5590106a 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -233,21 +233,21 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) print_name_offset(m, dev->set_mode); SEQ_printf(m, "\n"); } else { - if (dev->set_mode_shutdown) { + if (dev->set_state_shutdown) { SEQ_printf(m, " shutdown: "); - print_name_offset(m, dev->set_mode_shutdown); + print_name_offset(m, dev->set_state_shutdown); SEQ_printf(m, "\n"); } - if (dev->set_mode_periodic) { + if (dev->set_state_periodic) { SEQ_printf(m, " periodic: "); - print_name_offset(m, dev->set_mode_periodic); + print_name_offset(m, dev->set_state_periodic); SEQ_printf(m, "\n"); } - if (dev->set_mode_oneshot) { + if (dev->set_state_oneshot) { SEQ_printf(m, " oneshot: "); - print_name_offset(m, dev->set_mode_oneshot); + print_name_offset(m, dev->set_state_oneshot); SEQ_printf(m, "\n"); } -- cgit v1.2.3 From de81e64b250d3865a75d221a80b4311e3273670a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 27 Feb 2015 17:21:34 +0530 Subject: clockevents: Don't validate dev->mode against CLOCK_EVT_MODE_UNUSED for new interface It was a requirement in the legacy interface that drivers must initialize ->mode field to 'CLOCK_EVT_MODE_UNUSED'. This field isn't used anymore by the new interface and so should be only checked for the legacy interface. Probably it can be dropped as well as core doesn't rely on it anymore, but lets keep it to support legacy interface. Signed-off-by: Viresh Kumar Acked-by: Peter Zijlstra Cc: Daniel Lezcano Cc: Frederic Weisbecker Cc: Kevin Hilman Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: linaro-kernel@lists.linaro.org Cc: linaro-networking@linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/c6604fa1a77fe1fc8dcab87769857228fb1dadd5.1425037853.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/clockevents.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 6e53e9a0c2e8..73689df1e4b8 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -450,6 +450,8 @@ static int clockevents_sanity_check(struct clock_event_device *dev) /* We shouldn't be supporting new modes now */ WARN_ON(dev->set_state_periodic || dev->set_state_oneshot || dev->set_state_shutdown || dev->tick_resume); + + BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); return 0; } @@ -479,7 +481,6 @@ void clockevents_register_device(struct clock_event_device *dev) { unsigned long flags; - BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); BUG_ON(clockevents_sanity_check(dev)); /* Initialize state to DETACHED */ -- cgit v1.2.3 From 9f083b74df3a7eaa100b456f2dc195512daf728e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2015 13:05:19 +0100 Subject: clockevents: Remove CONFIG_GENERIC_CLOCKEVENTS_BUILD This option was for simpler migration to the clock events code. Most architectures have been converted and the option has been disfunctional as a standalone option for quite some time. Remove it. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/5021859.jl9OC1medj@vostro.rjw.lan Signed-off-by: Ingo Molnar --- kernel/time/Kconfig | 6 ------ kernel/time/Makefile | 6 ++---- kernel/time/clockevents.c | 3 --- kernel/time/tick-internal.h | 4 ++-- 4 files changed, 4 insertions(+), 15 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index d626dc98e8df..579ce1b929af 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -33,12 +33,6 @@ config ARCH_USES_GETTIMEOFFSET config GENERIC_CLOCKEVENTS bool -# Migration helper. Builds, but does not invoke -config GENERIC_CLOCKEVENTS_BUILD - bool - default y - depends on GENERIC_CLOCKEVENTS - # Architecture can handle broadcast in a driver-agnostic way config ARCH_HAS_TICK_BROADCAST bool diff --git a/kernel/time/Makefile b/kernel/time/Makefile index c09c07817d7a..01f0312419b3 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -2,15 +2,13 @@ obj-y += time.o timer.o hrtimer.o itimer.o posix-timers.o posix-cpu-timers.o obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o obj-y += timeconv.o timecounter.o posix-clock.o alarmtimer.o -obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o -obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o +obj-$(CONFIG_GENERIC_CLOCKEVENTS) += clockevents.o tick-common.o ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y) obj-y += tick-broadcast.o obj-$(CONFIG_TICK_ONESHOT) += tick-broadcast-hrtimer.o endif obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o -obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o -obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o +obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o obj-$(CONFIG_TIMER_STATS) += timer_stats.o obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o obj-$(CONFIG_TEST_UDELAY) += test_udelay.o diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 73689df1e4b8..3531beecbe95 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -645,7 +645,6 @@ void clockevents_resume(void) dev->resume(dev); } -#ifdef CONFIG_GENERIC_CLOCKEVENTS /** * clockevents_notify - notification about relevant events * Returns 0 on success, any other value on error @@ -831,5 +830,3 @@ static int __init clockevents_init_sysfs(void) } device_initcall(clockevents_init_sysfs); #endif /* SYSFS */ - -#endif /* GENERIC_CLOCK_EVENTS */ diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 98700e4a2000..c7b75bec27f2 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -10,7 +10,7 @@ extern seqlock_t jiffies_lock; #define CS_NAME_LEN 32 -#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD +#ifdef CONFIG_GENERIC_CLOCKEVENTS #define TICK_DO_TIMER_NONE -1 #define TICK_DO_TIMER_BOOT -2 @@ -167,7 +167,7 @@ static inline int tick_device_is_functional(struct clock_event_device *dev) int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); -#endif +#endif /* GENERIC_CLOCKEVENTS */ extern void do_timer(unsigned long ticks); extern void update_wall_time(void); -- cgit v1.2.3 From bfb83b27519aa7ed9510f601a8f825a2c1484bc2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2015 13:06:04 +0100 Subject: tick: Move clocksource related stuff to timekeeping.h Move clocksource related stuff to timekeeping.h and remove the pointless include from ntp.c Signed-off-by: Thomas Gleixner [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/2714218.nM5AEfAHj0@vostro.rjw.lan Signed-off-by: Ingo Molnar --- kernel/time/clocksource.c | 2 +- kernel/time/jiffies.c | 2 +- kernel/time/ntp.c | 1 - kernel/time/tick-internal.h | 6 ------ kernel/time/timekeeping.h | 7 +++++++ 5 files changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c3be3c71bbad..8b4010f0b1b4 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -31,7 +31,7 @@ #include #include -#include "tick-internal.h" +#include "timekeeping.h" #include "timekeeping_internal.h" /** diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index c4bb518725b5..347fecf86a3f 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -25,7 +25,7 @@ #include #include -#include "tick-internal.h" +#include "timekeeping.h" /* The Jiffies based clocksource is the lowest common * denominator clock source which should function on diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 0f60b08a4f07..9ad60d028508 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -17,7 +17,6 @@ #include #include -#include "tick-internal.h" #include "ntp_internal.h" /* diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index c7b75bec27f2..cba52140a298 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -6,10 +6,6 @@ #include "timekeeping.h" -extern seqlock_t jiffies_lock; - -#define CS_NAME_LEN 32 - #ifdef CONFIG_GENERIC_CLOCKEVENTS #define TICK_DO_TIMER_NONE -1 @@ -169,5 +165,3 @@ int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); #endif /* GENERIC_CLOCKEVENTS */ -extern void do_timer(unsigned long ticks); -extern void update_wall_time(void); diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h index 1d91416055d5..ead8794b9a4e 100644 --- a/kernel/time/timekeeping.h +++ b/kernel/time/timekeeping.h @@ -19,4 +19,11 @@ extern void timekeeping_clocktai(struct timespec *ts); extern int timekeeping_suspend(void); extern void timekeeping_resume(void); +extern void do_timer(unsigned long ticks); +extern void update_wall_time(void); + +extern seqlock_t jiffies_lock; + +#define CS_NAME_LEN 32 + #endif -- cgit v1.2.3 From b7475eb599ddb2e8cab2dc86ff38a9507463ad6b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2015 13:06:47 +0100 Subject: tick: Simplify tick-internal.h tick-internal.h is pretty confusing as a lot of the stub inlines are there several times. Distangle the maze and make clear functional sections. Signed-off-by: Thomas Gleixner [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/16068264.vcNp79HLaT@vostro.rjw.lan Signed-off-by: Ingo Molnar --- kernel/time/tick-internal.h | 145 +++++++++++++++----------------------------- 1 file changed, 49 insertions(+), 96 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index cba52140a298..d86eb8d485e9 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -27,14 +27,19 @@ extern bool tick_check_replacement(struct clock_event_device *curdev, struct clock_event_device *newdev); extern void tick_install_replacement(struct clock_event_device *dev); -extern void clockevents_shutdown(struct clock_event_device *dev); extern int clockevents_tick_resume(struct clock_event_device *dev); +/* Check, if the device is functional or a dummy for broadcast */ +static inline int tick_device_is_functional(struct clock_event_device *dev) +{ + return !(dev->features & CLOCK_EVT_FEAT_DUMMY); +} +extern void clockevents_shutdown(struct clock_event_device *dev); +extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); +#endif /* GENERIC_CLOCKEVENTS */ -/* - * NO_HZ / high resolution timer shared code - */ +/* Oneshot related functions */ #ifdef CONFIG_TICK_ONESHOT extern void tick_setup_oneshot(struct clock_event_device *newdev, void (*handler)(struct clock_event_device *), @@ -43,69 +48,19 @@ extern int tick_program_event(ktime_t expires, int force); extern void tick_oneshot_notify(void); extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); extern void tick_resume_oneshot(void); -# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST -extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); -extern int tick_broadcast_oneshot_control(unsigned long reason); -extern void tick_broadcast_switch_to_oneshot(void); -extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); -extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); -extern int tick_broadcast_oneshot_active(void); -extern void tick_check_oneshot_broadcast_this_cpu(void); -bool tick_broadcast_oneshot_available(void); -# else /* BROADCAST */ -static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) -{ - BUG(); -} -static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; } -static inline void tick_broadcast_switch_to_oneshot(void) { } -static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } -static inline int tick_broadcast_oneshot_active(void) { return 0; } -static inline void tick_check_oneshot_broadcast_this_cpu(void) { } -static inline bool tick_broadcast_oneshot_available(void) { return true; } -# endif /* !BROADCAST */ - +static inline bool tick_oneshot_possible(void) { return true; } #else /* !ONESHOT */ static inline void tick_setup_oneshot(struct clock_event_device *newdev, void (*handler)(struct clock_event_device *), - ktime_t nextevt) -{ - BUG(); -} -static inline void tick_resume_oneshot(void) -{ - BUG(); -} -static inline int tick_program_event(ktime_t expires, int force) -{ - return 0; -} + ktime_t nextevt) { BUG(); } +static inline void tick_resume_oneshot(void) { BUG(); } +static inline int tick_program_event(ktime_t expires, int force) { return 0; } static inline void tick_oneshot_notify(void) { } -static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) -{ - BUG(); -} -static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; } -static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } -static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc) -{ - return 0; -} -static inline int tick_broadcast_oneshot_active(void) { return 0; } -static inline bool tick_broadcast_oneshot_available(void) { return false; } +static inline bool tick_oneshot_possible(void) { return false; } #endif /* !TICK_ONESHOT */ -/* NO_HZ_FULL internal */ -#ifdef CONFIG_NO_HZ_FULL -extern void tick_nohz_init(void); -# else -static inline void tick_nohz_init(void) { } -#endif - -/* - * Broadcasting support - */ +/* Broadcasting support */ #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); extern void tick_install_broadcast_device(struct clock_event_device *dev); @@ -115,53 +70,51 @@ extern void tick_shutdown_broadcast(unsigned int *cpup); extern void tick_suspend_broadcast(void); extern int tick_resume_broadcast(void); extern void tick_broadcast_init(void); -extern void -tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); -int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq); - +extern void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); +extern int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq); #else /* !BROADCAST */ - -static inline void tick_install_broadcast_device(struct clock_event_device *dev) -{ -} - -static inline int tick_is_broadcast_device(struct clock_event_device *dev) -{ - return 0; -} -static inline int tick_device_uses_broadcast(struct clock_event_device *dev, - int cpu) -{ - return 0; -} +static inline void tick_install_broadcast_device(struct clock_event_device *dev) { } +static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; } +static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; } static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { } static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { } static inline void tick_shutdown_broadcast(unsigned int *cpup) { } static inline void tick_suspend_broadcast(void) { } static inline int tick_resume_broadcast(void) { return 0; } static inline void tick_broadcast_init(void) { } -static inline int tick_broadcast_update_freq(struct clock_event_device *dev, - u32 freq) { return -ENODEV; } +static inline int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) { return -ENODEV; } -/* - * Set the periodic handler in non broadcast mode - */ -static inline void tick_set_periodic_handler(struct clock_event_device *dev, - int broadcast) +/* Set the periodic handler in non broadcast mode */ +static inline void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) { dev->event_handler = tick_handle_periodic; } #endif /* !BROADCAST */ -/* - * Check, if the device is functional or a dummy for broadcast - */ -static inline int tick_device_is_functional(struct clock_event_device *dev) -{ - return !(dev->features & CLOCK_EVT_FEAT_DUMMY); -} - -int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); - -#endif /* GENERIC_CLOCKEVENTS */ +/* Functions related to oneshot broadcasting */ +#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) +extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); +extern int tick_broadcast_oneshot_control(unsigned long reason); +extern void tick_broadcast_switch_to_oneshot(void); +extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); +extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); +extern int tick_broadcast_oneshot_active(void); +extern void tick_check_oneshot_broadcast_this_cpu(void); +bool tick_broadcast_oneshot_available(void); +#else /* BROADCAST && ONESHOT */ +static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } +static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; } +static inline void tick_broadcast_switch_to_oneshot(void) { } +static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } +static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc) { return 0; } +static inline int tick_broadcast_oneshot_active(void) { return 0; } +static inline void tick_check_oneshot_broadcast_this_cpu(void) { } +static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); } +#endif /* !BROADCAST && ONESHOT */ +/* NO_HZ_FULL internal */ +#ifdef CONFIG_NO_HZ_FULL +extern void tick_nohz_init(void); +# else +static inline void tick_nohz_init(void) { } +#endif -- cgit v1.2.3 From c1797baf6880174f899ce3960d0598f5bbeeb7ff Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2015 13:07:37 +0100 Subject: tick: Move core only declarations and functions to core No point to expose everything to the world. People just believe such functions can be abused for whatever purposes. Sigh. Signed-off-by: Thomas Gleixner [ Rebased on top of 4.0-rc5 ] Signed-off-by: Rafael J. Wysocki Cc: Nicolas Pitre Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/28017337.VbCUc39Gme@vostro.rjw.lan [ Merged to latest timers/core ] Signed-off-by: Ingo Molnar --- kernel/time/clocksource.c | 2 +- kernel/time/hrtimer.c | 2 +- kernel/time/tick-internal.h | 15 +++++++++++ kernel/time/tick-sched.c | 7 ++++- kernel/time/tick-sched.h | 64 +++++++++++++++++++++++++++++++++++++++++++++ kernel/time/timer_list.c | 2 +- 6 files changed, 88 insertions(+), 4 deletions(-) create mode 100644 kernel/time/tick-sched.h (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 8b4010f0b1b4..c3be3c71bbad 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -31,7 +31,7 @@ #include #include -#include "timekeeping.h" +#include "tick-internal.h" #include "timekeeping_internal.h" /** diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index bee0c1f78091..721d29b99d10 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -54,7 +54,7 @@ #include -#include "timekeeping.h" +#include "tick-internal.h" /* * The timer bases: diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index d86eb8d485e9..dd2c45d057b9 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -5,6 +5,7 @@ #include #include "timekeeping.h" +#include "tick-sched.h" #ifdef CONFIG_GENERIC_CLOCKEVENTS @@ -26,6 +27,7 @@ extern void tick_resume(void); extern bool tick_check_replacement(struct clock_event_device *curdev, struct clock_event_device *newdev); extern void tick_install_replacement(struct clock_event_device *dev); +extern int tick_is_oneshot_available(void); extern int clockevents_tick_resume(struct clock_event_device *dev); /* Check, if the device is functional or a dummy for broadcast */ @@ -35,6 +37,9 @@ static inline int tick_device_is_functional(struct clock_event_device *dev) } extern void clockevents_shutdown(struct clock_event_device *dev); +extern void clockevents_exchange_device(struct clock_event_device *old, + struct clock_event_device *new); +extern void clockevents_handle_noop(struct clock_event_device *dev); extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); #endif /* GENERIC_CLOCKEVENTS */ @@ -49,6 +54,10 @@ extern void tick_oneshot_notify(void); extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); extern void tick_resume_oneshot(void); static inline bool tick_oneshot_possible(void) { return true; } +extern int tick_oneshot_mode_active(void); +extern void tick_clock_notify(void); +extern int tick_check_oneshot_change(int allow_nohz); +extern int tick_init_highres(void); #else /* !ONESHOT */ static inline void tick_setup_oneshot(struct clock_event_device *newdev, @@ -58,6 +67,9 @@ static inline void tick_resume_oneshot(void) { BUG(); } static inline int tick_program_event(ktime_t expires, int force) { return 0; } static inline void tick_oneshot_notify(void) { } static inline bool tick_oneshot_possible(void) { return false; } +static inline int tick_oneshot_mode_active(void) { return 0; } +static inline void tick_clock_notify(void) { } +static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } #endif /* !TICK_ONESHOT */ /* Broadcasting support */ @@ -72,6 +84,8 @@ extern int tick_resume_broadcast(void); extern void tick_broadcast_init(void); extern void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); extern int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq); +extern struct tick_device *tick_get_broadcast_device(void); +extern struct cpumask *tick_get_broadcast_mask(void); #else /* !BROADCAST */ static inline void tick_install_broadcast_device(struct clock_event_device *dev) { } static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; } @@ -101,6 +115,7 @@ extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); extern int tick_broadcast_oneshot_active(void); extern void tick_check_oneshot_broadcast_this_cpu(void); bool tick_broadcast_oneshot_available(void); +extern struct cpumask *tick_get_broadcast_oneshot_mask(void); #else /* BROADCAST && ONESHOT */ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index a4c4edac4528..914259128145 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -34,7 +34,7 @@ /* * Per cpu nohz control structure */ -DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); +static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); /* * The time, when the last jiffy update happened. Protected by jiffies_lock. @@ -416,6 +416,11 @@ static int __init setup_tick_nohz(char *str) __setup("nohz=", setup_tick_nohz); +int tick_nohz_tick_stopped(void) +{ + return __this_cpu_read(tick_cpu_sched.tick_stopped); +} + /** * tick_nohz_update_jiffies - update jiffies when idle was interrupted * diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h new file mode 100644 index 000000000000..930743249127 --- /dev/null +++ b/kernel/time/tick-sched.h @@ -0,0 +1,64 @@ +#ifndef _TICK_SCHED_H +#define _TICK_SCHED_H + +#include + +enum tick_nohz_mode { + NOHZ_MODE_INACTIVE, + NOHZ_MODE_LOWRES, + NOHZ_MODE_HIGHRES, +}; + +/** + * struct tick_sched - sched tick emulation and no idle tick control/stats + * @sched_timer: hrtimer to schedule the periodic tick in high + * resolution mode + * @last_tick: Store the last tick expiry time when the tick + * timer is modified for nohz sleeps. This is necessary + * to resume the tick timer operation in the timeline + * when the CPU returns from nohz sleep. + * @tick_stopped: Indicator that the idle tick has been stopped + * @idle_jiffies: jiffies at the entry to idle for idle time accounting + * @idle_calls: Total number of idle calls + * @idle_sleeps: Number of idle calls, where the sched tick was stopped + * @idle_entrytime: Time when the idle call was entered + * @idle_waketime: Time when the idle was interrupted + * @idle_exittime: Time when the idle state was left + * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped + * @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding + * @sleep_length: Duration of the current idle sleep + * @do_timer_lst: CPU was the last one doing do_timer before going idle + */ +struct tick_sched { + struct hrtimer sched_timer; + unsigned long check_clocks; + enum tick_nohz_mode nohz_mode; + ktime_t last_tick; + int inidle; + int tick_stopped; + unsigned long idle_jiffies; + unsigned long idle_calls; + unsigned long idle_sleeps; + int idle_active; + ktime_t idle_entrytime; + ktime_t idle_waketime; + ktime_t idle_exittime; + ktime_t idle_sleeptime; + ktime_t iowait_sleeptime; + ktime_t sleep_length; + unsigned long last_jiffies; + unsigned long next_jiffies; + ktime_t idle_expires; + int do_timer_last; +}; + +extern struct tick_sched *tick_get_tick_sched(int cpu); + +extern void tick_setup_sched_timer(void); +#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS +extern void tick_cancel_sched_timer(int cpu); +#else +static inline void tick_cancel_sched_timer(int cpu) { } +#endif + +#endif diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 05aa5590106a..e878c2e0ba45 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -16,10 +16,10 @@ #include #include #include -#include #include +#include "tick-internal.h" struct timer_list_iter { int cpu; -- cgit v1.2.3 From db6f672ef11d7a3c5aa128a3c3e57c92580a25f7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2015 13:08:27 +0100 Subject: clockevents: Remove extra local_irq_save() in clockevents_exchange_device() Called with 'clockevents_lock' held and interrupts disabled already. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/51005827.yXt5tjZMBs@vostro.rjw.lan Signed-off-by: Ingo Molnar --- kernel/time/clockevents.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 3531beecbe95..b73002718536 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -595,14 +595,12 @@ void clockevents_handle_noop(struct clock_event_device *dev) * @old: device to release (can be NULL) * @new: device to request (can be NULL) * - * Called from the notifier chain. clockevents_lock is held already + * Called from various tick functions with clockevents_lock held and + * interrupts disabled. */ void clockevents_exchange_device(struct clock_event_device *old, struct clock_event_device *new) { - unsigned long flags; - - local_irq_save(flags); /* * Caller releases a clock event device. We queue it into the * released list and do a notify add later. @@ -618,7 +616,6 @@ void clockevents_exchange_device(struct clock_event_device *old, BUG_ON(new->state != CLOCK_EVT_STATE_DETACHED); clockevents_shutdown(new); } - local_irq_restore(flags); } /** -- cgit v1.2.3 From 4ffee521f36390c7720d493591b764ca35c8030b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2015 13:09:16 +0100 Subject: clockevents: Make suspend/resume calls explicit clockevents_notify() is a leftover from the early design of the clockevents facility. It's really not a notification mechanism, it's a multiplex call. We are way better off to have explicit calls instead of this monstrosity. Split out the suspend/resume() calls and invoke them directly from the call sites. No locking required at this point because these calls happen with interrupts disabled and a single cpu online. Signed-off-by: Thomas Gleixner [ Rebased on top of 4.0-rc5. ] Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/713674030.jVm1qaHuPf@vostro.rjw.lan [ Rebased on top of latest timers/core. ] Signed-off-by: Ingo Molnar --- kernel/time/clockevents.c | 9 --------- kernel/time/tick-common.c | 26 +++++++++++++++++++++++--- kernel/time/tick-internal.h | 3 ++- kernel/time/timekeeping.c | 6 ++---- 4 files changed, 27 insertions(+), 17 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index b73002718536..7af614829da1 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -670,15 +670,6 @@ int clockevents_notify(unsigned long reason, void *arg) tick_handover_do_timer(arg); break; - case CLOCK_EVT_NOTIFY_SUSPEND: - tick_suspend(); - tick_suspend_broadcast(); - break; - - case CLOCK_EVT_NOTIFY_RESUME: - tick_resume(); - break; - case CLOCK_EVT_NOTIFY_CPU_DEAD: tick_shutdown_broadcast_oneshot(arg); tick_shutdown_broadcast(arg); diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index a5b877130ae9..1a60c2ae96a8 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -373,18 +373,39 @@ void tick_shutdown(unsigned int *cpup) } } +/** + * tick_suspend - Suspend the tick and the broadcast device + * + * Called from syscore_suspend() via timekeeping_suspend with only one + * CPU online and interrupts disabled or from tick_unfreeze() under + * tick_freeze_lock. + * + * No locks required. Nothing can change the per cpu device. + */ void tick_suspend(void) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); clockevents_shutdown(td->evtdev); + tick_suspend_broadcast(); } +/** + * tick_resume - Resume the tick and the broadcast device + * + * Called from syscore_resume() via timekeeping_resume with only one + * CPU online and interrupts disabled or from tick_unfreeze() under + * tick_freeze_lock. + * + * No locks required. Nothing can change the per cpu device. + */ void tick_resume(void) { - struct tick_device *td = this_cpu_ptr(&tick_cpu_device); - int broadcast = tick_resume_broadcast(); + struct tick_device *td; + int broadcast; + broadcast = tick_resume_broadcast(); + td = this_cpu_ptr(&tick_cpu_device); clockevents_tick_resume(td->evtdev); if (!broadcast) { @@ -416,7 +437,6 @@ void tick_freeze(void) timekeeping_suspend(); } else { tick_suspend(); - tick_suspend_broadcast(); } raw_spin_unlock(&tick_freeze_lock); diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index dd2c45d057b9..85a957195bf6 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -23,7 +23,6 @@ extern void tick_check_new_device(struct clock_event_device *dev); extern void tick_handover_do_timer(int *cpup); extern void tick_shutdown(unsigned int *cpup); extern void tick_suspend(void); -extern void tick_resume(void); extern bool tick_check_replacement(struct clock_event_device *curdev, struct clock_event_device *newdev); extern void tick_install_replacement(struct clock_event_device *dev); @@ -42,6 +41,8 @@ extern void clockevents_exchange_device(struct clock_event_device *old, extern void clockevents_handle_noop(struct clock_event_device *dev); extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); +#else +static inline void tick_suspend(void) { } #endif /* GENERIC_CLOCKEVENTS */ /* Oneshot related functions */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index c3fcff06d30a..5b12292b343a 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1389,9 +1389,7 @@ void timekeeping_resume(void) touch_softlockup_watchdog(); - clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL); - - /* Resume hrtimers */ + tick_resume(); hrtimers_resume(); } @@ -1444,7 +1442,7 @@ int timekeeping_suspend(void) write_seqcount_end(&tk_core.seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); - clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); + tick_suspend(); clocksource_suspend(); clockevents_suspend(); -- cgit v1.2.3 From 080873ce2d1abd8c0a2b8c87bfa0762546a6b713 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2015 13:09:55 +0100 Subject: tick: Make tick_resume_broadcast_oneshot() static Solely used in tick-broadcast.c and the return value is hardcoded 0. Make it static and void. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1689058.QkHYDJSRKu@vostro.rjw.lan Signed-off-by: Ingo Molnar --- kernel/time/tick-broadcast.c | 7 ++++--- kernel/time/tick-internal.h | 2 -- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index f0f8ee9dbc28..60e6c23ce1c7 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -37,8 +37,10 @@ static int tick_broadcast_force; #ifdef CONFIG_TICK_ONESHOT static void tick_broadcast_clear_oneshot(int cpu); +static void tick_resume_broadcast_oneshot(struct clock_event_device *bc); #else static inline void tick_broadcast_clear_oneshot(int cpu) { } +static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { } #endif /* @@ -475,7 +477,7 @@ int tick_resume_broadcast(void) break; case TICKDEV_MODE_ONESHOT: if (!cpumask_empty(tick_broadcast_mask)) - broadcast = tick_resume_broadcast_oneshot(bc); + tick_resume_broadcast_oneshot(bc); break; } } @@ -541,10 +543,9 @@ static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu, return ret; } -int tick_resume_broadcast_oneshot(struct clock_event_device *bc) +static void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT); - return 0; } /* diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 85a957195bf6..5c9f0eec56b2 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -112,7 +112,6 @@ extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); extern int tick_broadcast_oneshot_control(unsigned long reason); extern void tick_broadcast_switch_to_oneshot(void); extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); -extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); extern int tick_broadcast_oneshot_active(void); extern void tick_check_oneshot_broadcast_this_cpu(void); bool tick_broadcast_oneshot_available(void); @@ -122,7 +121,6 @@ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; } static inline void tick_broadcast_switch_to_oneshot(void) { } static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } -static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc) { return 0; } static inline int tick_broadcast_oneshot_active(void) { return 0; } static inline void tick_check_oneshot_broadcast_this_cpu(void) { } static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); } -- cgit v1.2.3 From f46481d0a7cb942b84145acb80ad43bdb1ff8eb4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2015 13:11:04 +0100 Subject: tick/xen: Provide and use tick_suspend_local() and tick_resume_local() Xen calls on every cpu into tick_resume() which is just wrong. tick_resume() is for the syscore global suspend/resume invocation. What XEN really wants is a per cpu local resume function. Provide a tick_resume_local() function and use it in XEN. Also provide a complementary tick_suspend_local() and modify tick_unfreeze() and tick_freeze(), respectively, to use the new local tick resume/suspend functions. Signed-off-by: Thomas Gleixner [ Combined two patches, rebased, modified subject/changelog. ] Signed-off-by: Rafael J. Wysocki Cc: Boris Ostrovsky Cc: David Vrabel Cc: Konrad Rzeszutek Wilk Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1698741.eezk9tnXtG@vostro.rjw.lan [ Merged to latest timers/core. ] Signed-off-by: Ingo Molnar --- kernel/time/tick-broadcast.c | 24 +++++++++++++------ kernel/time/tick-common.c | 55 ++++++++++++++++++++++++++++++-------------- kernel/time/tick-internal.h | 8 +++++-- 3 files changed, 61 insertions(+), 26 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 60e6c23ce1c7..19cfb381faa9 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -455,11 +455,26 @@ void tick_suspend_broadcast(void) raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } -int tick_resume_broadcast(void) +/* + * This is called from tick_resume_local() on a resuming CPU. That's + * called from the core resume function, tick_unfreeze() and the magic XEN + * resume hackery. + * + * In none of these cases the broadcast device mode can change and the + * bit of the resuming CPU in the broadcast mask is safe as well. + */ +bool tick_resume_check_broadcast(void) +{ + if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) + return false; + else + return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask); +} + +void tick_resume_broadcast(void) { struct clock_event_device *bc; unsigned long flags; - int broadcast = 0; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); @@ -472,8 +487,6 @@ int tick_resume_broadcast(void) case TICKDEV_MODE_PERIODIC: if (!cpumask_empty(tick_broadcast_mask)) tick_broadcast_start_periodic(bc); - broadcast = cpumask_test_cpu(smp_processor_id(), - tick_broadcast_mask); break; case TICKDEV_MODE_ONESHOT: if (!cpumask_empty(tick_broadcast_mask)) @@ -482,11 +495,8 @@ int tick_resume_broadcast(void) } } raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); - - return broadcast; } - #ifdef CONFIG_TICK_ONESHOT static cpumask_var_t tick_broadcast_oneshot_mask; diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 1a60c2ae96a8..da796d65d1fb 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -374,40 +374,32 @@ void tick_shutdown(unsigned int *cpup) } /** - * tick_suspend - Suspend the tick and the broadcast device + * tick_suspend_local - Suspend the local tick device * - * Called from syscore_suspend() via timekeeping_suspend with only one - * CPU online and interrupts disabled or from tick_unfreeze() under - * tick_freeze_lock. + * Called from the local cpu for freeze with interrupts disabled. * * No locks required. Nothing can change the per cpu device. */ -void tick_suspend(void) +static void tick_suspend_local(void) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); clockevents_shutdown(td->evtdev); - tick_suspend_broadcast(); } /** - * tick_resume - Resume the tick and the broadcast device + * tick_resume_local - Resume the local tick device * - * Called from syscore_resume() via timekeeping_resume with only one - * CPU online and interrupts disabled or from tick_unfreeze() under - * tick_freeze_lock. + * Called from the local CPU for unfreeze or XEN resume magic. * * No locks required. Nothing can change the per cpu device. */ -void tick_resume(void) +void tick_resume_local(void) { - struct tick_device *td; - int broadcast; + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); + bool broadcast = tick_resume_check_broadcast(); - broadcast = tick_resume_broadcast(); - td = this_cpu_ptr(&tick_cpu_device); clockevents_tick_resume(td->evtdev); - if (!broadcast) { if (td->mode == TICKDEV_MODE_PERIODIC) tick_setup_periodic(td->evtdev, 0); @@ -416,6 +408,35 @@ void tick_resume(void) } } +/** + * tick_suspend - Suspend the tick and the broadcast device + * + * Called from syscore_suspend() via timekeeping_suspend with only one + * CPU online and interrupts disabled or from tick_unfreeze() under + * tick_freeze_lock. + * + * No locks required. Nothing can change the per cpu device. + */ +void tick_suspend(void) +{ + tick_suspend_local(); + tick_suspend_broadcast(); +} + +/** + * tick_resume - Resume the tick and the broadcast device + * + * Called from syscore_resume() via timekeeping_resume with only one + * CPU online and interrupts disabled. + * + * No locks required. Nothing can change the per cpu device. + */ +void tick_resume(void) +{ + tick_resume_broadcast(); + tick_resume_local(); +} + static DEFINE_RAW_SPINLOCK(tick_freeze_lock); static unsigned int tick_freeze_depth; @@ -436,7 +457,7 @@ void tick_freeze(void) if (tick_freeze_depth == num_online_cpus()) { timekeeping_suspend(); } else { - tick_suspend(); + tick_suspend_local(); } raw_spin_unlock(&tick_freeze_lock); diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 5c9f0eec56b2..6ba7bce732f2 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -23,6 +23,7 @@ extern void tick_check_new_device(struct clock_event_device *dev); extern void tick_handover_do_timer(int *cpup); extern void tick_shutdown(unsigned int *cpup); extern void tick_suspend(void); +extern void tick_resume(void); extern bool tick_check_replacement(struct clock_event_device *curdev, struct clock_event_device *newdev); extern void tick_install_replacement(struct clock_event_device *dev); @@ -43,6 +44,7 @@ extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); #else static inline void tick_suspend(void) { } +static inline void tick_resume(void) { } #endif /* GENERIC_CLOCKEVENTS */ /* Oneshot related functions */ @@ -81,7 +83,8 @@ extern int tick_is_broadcast_device(struct clock_event_device *dev); extern void tick_broadcast_on_off(unsigned long reason, int *oncpu); extern void tick_shutdown_broadcast(unsigned int *cpup); extern void tick_suspend_broadcast(void); -extern int tick_resume_broadcast(void); +extern void tick_resume_broadcast(void); +extern bool tick_resume_check_broadcast(void); extern void tick_broadcast_init(void); extern void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); extern int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq); @@ -95,7 +98,8 @@ static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { } static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { } static inline void tick_shutdown_broadcast(unsigned int *cpup) { } static inline void tick_suspend_broadcast(void) { } -static inline int tick_resume_broadcast(void) { return 0; } +static inline void tick_resume_broadcast(void) { } +static inline bool tick_resume_check_broadcast(void) { return false; } static inline void tick_broadcast_init(void) { } static inline int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) { return -ENODEV; } -- cgit v1.2.3 From 7270d11c56f594af4d166b2988421cd8ed933dc1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 25 Mar 2015 13:11:52 +0100 Subject: arm/bL_switcher: Kill tick suspend hackery Use the new tick_suspend/resume_local() and get rid of the homebrewn implementation of these in the ARM bL switcher. The check for the cpumask is completely pointless. There is no harm to suspend a per cpu tick device unconditionally. If that's a real issue then we fix it proper at the core level and not with some completely undocumented hacks in some random core code. Move the tick internals to the core code, now that this nuisance is gone. Signed-off-by: Thomas Gleixner [ rjw: Rebase, changelog ] Signed-off-by: Rafael J. Wysocki Cc: Nicolas Pitre Cc: Peter Zijlstra Cc: Russell King Link: http://lkml.kernel.org/r/1655112.Ws17YsMfN7@vostro.rjw.lan Signed-off-by: Ingo Molnar --- kernel/time/tick-common.c | 2 +- kernel/time/tick-internal.h | 5 +++++ kernel/time/tick-sched.h | 10 ++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index da796d65d1fb..e28ba5c044c5 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -380,7 +380,7 @@ void tick_shutdown(unsigned int *cpup) * * No locks required. Nothing can change the per cpu device. */ -static void tick_suspend_local(void) +void tick_suspend_local(void) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 6ba7bce732f2..5fc2dafabd58 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -28,6 +28,7 @@ extern bool tick_check_replacement(struct clock_event_device *curdev, struct clock_event_device *newdev); extern void tick_install_replacement(struct clock_event_device *dev); extern int tick_is_oneshot_available(void); +extern struct tick_device *tick_get_device(int cpu); extern int clockevents_tick_resume(struct clock_event_device *dev); /* Check, if the device is functional or a dummy for broadcast */ @@ -39,6 +40,10 @@ static inline int tick_device_is_functional(struct clock_event_device *dev) extern void clockevents_shutdown(struct clock_event_device *dev); extern void clockevents_exchange_device(struct clock_event_device *old, struct clock_event_device *new); +extern void clockevents_set_state(struct clock_event_device *dev, + enum clock_event_state state); +extern int clockevents_program_event(struct clock_event_device *dev, + ktime_t expires, bool force); extern void clockevents_handle_noop(struct clock_event_device *dev); extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index 930743249127..28b5da3e1a17 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -3,6 +3,16 @@ #include +enum tick_device_mode { + TICKDEV_MODE_PERIODIC, + TICKDEV_MODE_ONESHOT, +}; + +struct tick_device { + struct clock_event_device *evtdev; + enum tick_device_mode mode; +}; + enum tick_nohz_mode { NOHZ_MODE_INACTIVE, NOHZ_MODE_LOWRES, -- cgit v1.2.3 From 3ae7a939165c6159afb3c09e1d7405b6d1807f2b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Apr 2015 11:26:06 +0200 Subject: tick: Further simplify tick-internal.h Move the broadcasting related section to the GENERIC_CLOCKEVENTS=y section - this also solves build failures on architectures that don't use generic clockevents yet. Also standardize include file style to make it easier to read, and use nesting depth aware preprocessor directives to make future merges easier. Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- kernel/time/tick-internal.h | 79 +++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 39 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 5fc2dafabd58..b6ba0a44e740 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -9,8 +9,8 @@ #ifdef CONFIG_GENERIC_CLOCKEVENTS -#define TICK_DO_TIMER_NONE -1 -#define TICK_DO_TIMER_BOOT -2 +# define TICK_DO_TIMER_NONE -1 +# define TICK_DO_TIMER_BOOT -2 DECLARE_PER_CPU(struct tick_device, tick_cpu_device); extern ktime_t tick_next_period; @@ -47,41 +47,9 @@ extern int clockevents_program_event(struct clock_event_device *dev, extern void clockevents_handle_noop(struct clock_event_device *dev); extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); -#else -static inline void tick_suspend(void) { } -static inline void tick_resume(void) { } -#endif /* GENERIC_CLOCKEVENTS */ - -/* Oneshot related functions */ -#ifdef CONFIG_TICK_ONESHOT -extern void tick_setup_oneshot(struct clock_event_device *newdev, - void (*handler)(struct clock_event_device *), - ktime_t nextevt); -extern int tick_program_event(ktime_t expires, int force); -extern void tick_oneshot_notify(void); -extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); -extern void tick_resume_oneshot(void); -static inline bool tick_oneshot_possible(void) { return true; } -extern int tick_oneshot_mode_active(void); -extern void tick_clock_notify(void); -extern int tick_check_oneshot_change(int allow_nohz); -extern int tick_init_highres(void); -#else /* !ONESHOT */ -static inline -void tick_setup_oneshot(struct clock_event_device *newdev, - void (*handler)(struct clock_event_device *), - ktime_t nextevt) { BUG(); } -static inline void tick_resume_oneshot(void) { BUG(); } -static inline int tick_program_event(ktime_t expires, int force) { return 0; } -static inline void tick_oneshot_notify(void) { } -static inline bool tick_oneshot_possible(void) { return false; } -static inline int tick_oneshot_mode_active(void) { return 0; } -static inline void tick_clock_notify(void) { } -static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } -#endif /* !TICK_ONESHOT */ /* Broadcasting support */ -#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); extern void tick_install_broadcast_device(struct clock_event_device *dev); extern int tick_is_broadcast_device(struct clock_event_device *dev); @@ -95,7 +63,7 @@ extern void tick_set_periodic_handler(struct clock_event_device *dev, int broadc extern int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq); extern struct tick_device *tick_get_broadcast_device(void); extern struct cpumask *tick_get_broadcast_mask(void); -#else /* !BROADCAST */ +# else /* !CONFIG_GENERIC_CLOCKEVENTS_BROADCAST: */ static inline void tick_install_broadcast_device(struct clock_event_device *dev) { } static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; } static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; } @@ -113,7 +81,40 @@ static inline void tick_set_periodic_handler(struct clock_event_device *dev, int { dev->event_handler = tick_handle_periodic; } -#endif /* !BROADCAST */ +# endif /* !CONFIG_GENERIC_CLOCKEVENTS_BROADCAST */ + +#else /* !GENERIC_CLOCKEVENTS: */ +static inline void tick_suspend(void) { } +static inline void tick_resume(void) { } +#endif /* !GENERIC_CLOCKEVENTS */ + +/* Oneshot related functions */ +#ifdef CONFIG_TICK_ONESHOT +extern void tick_setup_oneshot(struct clock_event_device *newdev, + void (*handler)(struct clock_event_device *), + ktime_t nextevt); +extern int tick_program_event(ktime_t expires, int force); +extern void tick_oneshot_notify(void); +extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); +extern void tick_resume_oneshot(void); +static inline bool tick_oneshot_possible(void) { return true; } +extern int tick_oneshot_mode_active(void); +extern void tick_clock_notify(void); +extern int tick_check_oneshot_change(int allow_nohz); +extern int tick_init_highres(void); +#else /* !CONFIG_TICK_ONESHOT: */ +static inline +void tick_setup_oneshot(struct clock_event_device *newdev, + void (*handler)(struct clock_event_device *), + ktime_t nextevt) { BUG(); } +static inline void tick_resume_oneshot(void) { BUG(); } +static inline int tick_program_event(ktime_t expires, int force) { return 0; } +static inline void tick_oneshot_notify(void) { } +static inline bool tick_oneshot_possible(void) { return false; } +static inline int tick_oneshot_mode_active(void) { return 0; } +static inline void tick_clock_notify(void) { } +static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } +#endif /* !CONFIG_TICK_ONESHOT */ /* Functions related to oneshot broadcasting */ #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) @@ -125,7 +126,7 @@ extern int tick_broadcast_oneshot_active(void); extern void tick_check_oneshot_broadcast_this_cpu(void); bool tick_broadcast_oneshot_available(void); extern struct cpumask *tick_get_broadcast_oneshot_mask(void); -#else /* BROADCAST && ONESHOT */ +#else /* !(BROADCAST && ONESHOT): */ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; } static inline void tick_broadcast_switch_to_oneshot(void) { } @@ -133,7 +134,7 @@ static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } static inline int tick_broadcast_oneshot_active(void) { return 0; } static inline void tick_check_oneshot_broadcast_this_cpu(void) { } static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); } -#endif /* !BROADCAST && ONESHOT */ +#endif /* !(BROADCAST && ONESHOT) */ /* NO_HZ_FULL internal */ #ifdef CONFIG_NO_HZ_FULL -- cgit v1.2.3 From 345527b1edce8df719e0884500c76832a18211c3 Mon Sep 17 00:00:00 2001 From: Preeti U Murthy Date: Mon, 30 Mar 2015 14:59:19 +0530 Subject: clockevents: Fix cpu_down() race for hrtimer based broadcasting It was found when doing a hotplug stress test on POWER, that the machine either hit softlockups or rcu_sched stall warnings. The issue was traced to commit: 7cba160ad789 ("powernv/cpuidle: Redesign idle states management") which exposed the cpu_down() race with hrtimer based broadcast mode: 5d1638acb9f6 ("tick: Introduce hrtimer based broadcast") The race is the following: Assume CPU1 is the CPU which holds the hrtimer broadcasting duty before it is taken down. CPU0 CPU1 cpu_down() take_cpu_down() disable_interrupts() cpu_die() while (CPU1 != CPU_DEAD) { msleep(100); switch_to_idle(); stop_cpu_timer(); schedule_broadcast(); } tick_cleanup_cpu_dead() take_over_broadcast() So after CPU1 disabled interrupts it cannot handle the broadcast hrtimer anymore, so CPU0 will be stuck forever. Fix this by explicitly taking over broadcast duty before cpu_die(). This is a temporary workaround. What we really want is a callback in the clockevent device which allows us to do that from the dying CPU by pushing the hrtimer onto a different cpu. That might involve an IPI and is definitely more complex than this immediate fix. Changelog was picked up from: https://lkml.org/lkml/2015/2/16/213 Suggested-by: Thomas Gleixner Tested-by: Nicolas Pitre Signed-off-by: Preeti U. Murthy Cc: linuxppc-dev@lists.ozlabs.org Cc: mpe@ellerman.id.au Cc: nicolas.pitre@linaro.org Cc: peterz@infradead.org Cc: rjw@rjwysocki.net Fixes: http://linuxppc.10917.n7.nabble.com/offlining-cpus-breakage-td88619.html Link: http://lkml.kernel.org/r/20150330092410.24979.59887.stgit@preeti.in.ibm.com [ Merged it to the latest timer tree, renamed the callback, tidied up the changelog. ] Signed-off-by: Ingo Molnar --- kernel/time/tick-broadcast.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 19cfb381faa9..f5e0fd5652dc 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -680,14 +680,19 @@ static void broadcast_shutdown_local(struct clock_event_device *bc, clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN); } -static void broadcast_move_bc(int deadcpu) +void hotplug_cpu__broadcast_tick_pull(int deadcpu) { - struct clock_event_device *bc = tick_broadcast_device.evtdev; + struct clock_event_device *bc; + unsigned long flags; - if (!bc || !broadcast_needs_cpu(bc, deadcpu)) - return; - /* This moves the broadcast assignment to this cpu */ - clockevents_program_event(bc, bc->next_event, 1); + raw_spin_lock_irqsave(&tick_broadcast_lock, flags); + bc = tick_broadcast_device.evtdev; + + if (bc && broadcast_needs_cpu(bc, deadcpu)) { + /* This moves the broadcast assignment to this CPU: */ + clockevents_program_event(bc, bc->next_event, 1); + } + raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } /* @@ -924,8 +929,6 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup) cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); cpumask_clear_cpu(cpu, tick_broadcast_force_mask); - broadcast_move_bc(cpu); - raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } -- cgit v1.2.3 From b337a9380f7effd60d082569dd7e0b97a7549730 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 31 Mar 2015 20:49:00 +0530 Subject: timer: Allocate per-cpu tvec_base's statically Memory for the 'tvec_base' array is allocated separately for the boot CPU (statically) and non-boot CPUs (dynamically). The reason is because __TIMER_INITIALIZER() needs to set ->base to a valid pointer (because we've made NULL special, hint: lock_timer_base()) and we cannot get a compile time pointer to per-cpu entries because we don't know where we'll map the section, even for the boot cpu. This can be simplified a bit by statically allocating per-cpu memory. The only disadvantage is that memory for one of the structures will stay unused, i.e. for the boot CPU, which uses boot_tvec_bases. This will also guarantee that tvec_base is cacheline aligned. Even though tvec_base has ____cacheline_aligned stuck on, kzalloc_node() does not actually respect that (but guarantees a minimum u64 alignment). Signed-off-by: Peter Zijlstra (Intel) Acked-by: Viresh Kumar Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/17cdf560f2727f687ab159707d0aa591f8a2f82d.1427814611.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/timer.c | 48 +++++++++++++++++++----------------------------- 1 file changed, 19 insertions(+), 29 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 2d3f5c504939..f3cc653f876c 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -90,8 +90,19 @@ struct tvec_base { struct tvec tv5; } ____cacheline_aligned; +/* + * __TIMER_INITIALIZER() needs to set ->base to a valid pointer (because we've + * made NULL special, hint: lock_timer_base()) and we cannot get a compile time + * pointer to per-cpu entries because we don't know where we'll map the section, + * even for the boot cpu. + * + * And so we use boot_tvec_bases for boot CPU and per-cpu __tvec_bases for the + * rest of them. + */ struct tvec_base boot_tvec_bases; EXPORT_SYMBOL(boot_tvec_bases); +static DEFINE_PER_CPU(struct tvec_base, __tvec_bases); + static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; /* Functions below help us manage 'deferrable' flag */ @@ -1534,46 +1545,25 @@ EXPORT_SYMBOL(schedule_timeout_uninterruptible); static int init_timers_cpu(int cpu) { - int j; - struct tvec_base *base; + struct tvec_base *base = per_cpu(tvec_bases, cpu); static char tvec_base_done[NR_CPUS]; + int j; if (!tvec_base_done[cpu]) { - static char boot_done; + static char boot_cpu_skipped; - if (boot_done) { - /* - * The APs use this path later in boot - */ - base = kzalloc_node(sizeof(*base), GFP_KERNEL, - cpu_to_node(cpu)); - if (!base) - return -ENOMEM; - - /* Make sure tvec_base has TIMER_FLAG_MASK bits free */ - if (WARN_ON(base != tbase_get_base(base))) { - kfree(base); - return -ENOMEM; - } - per_cpu(tvec_bases, cpu) = base; + if (!boot_cpu_skipped) { + boot_cpu_skipped = 1; /* skip the boot cpu */ } else { - /* - * This is for the boot CPU - we use compile-time - * static initialisation because per-cpu memory isn't - * ready yet and because the memory allocators are not - * initialised either. - */ - boot_done = 1; - base = &boot_tvec_bases; + base = per_cpu_ptr(&__tvec_bases, cpu); + per_cpu(tvec_bases, cpu) = base; } + spin_lock_init(&base->lock); tvec_base_done[cpu] = 1; base->cpu = cpu; - } else { - base = per_cpu(tvec_bases, cpu); } - for (j = 0; j < TVN_SIZE; j++) { INIT_LIST_HEAD(base->tv5.vec + j); INIT_LIST_HEAD(base->tv4.vec + j); -- cgit v1.2.3 From 8def906044c02edcedac79aa3d6310ab4d90c4d8 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 31 Mar 2015 20:49:01 +0530 Subject: timer: Don't initialize 'tvec_base' on hotplug There is no need to call init_timers_cpu() on every CPU hotplug event, there is not much we need to reset. - Timer-lists are already empty at the end of migrate_timers(). - timer_jiffies will be refreshed while adding a new timer, after the CPU is online again. - active_timers and all_timers can be reset from migrate_timers(). Signed-off-by: Viresh Kumar Signed-off-by: Peter Zijlstra (Intel) Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/54a1c30ea7b805af55beb220cadf5a07a21b0a4d.1427814611.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/timer.c | 98 +++++++++++++++++++++++------------------------------ 1 file changed, 43 insertions(+), 55 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timer.c b/kernel/time/timer.c index f3cc653f876c..1feb9c7035c0 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1543,43 +1543,6 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout) } EXPORT_SYMBOL(schedule_timeout_uninterruptible); -static int init_timers_cpu(int cpu) -{ - struct tvec_base *base = per_cpu(tvec_bases, cpu); - static char tvec_base_done[NR_CPUS]; - int j; - - if (!tvec_base_done[cpu]) { - static char boot_cpu_skipped; - - if (!boot_cpu_skipped) { - boot_cpu_skipped = 1; /* skip the boot cpu */ - } else { - base = per_cpu_ptr(&__tvec_bases, cpu); - per_cpu(tvec_bases, cpu) = base; - } - - spin_lock_init(&base->lock); - tvec_base_done[cpu] = 1; - base->cpu = cpu; - } - - for (j = 0; j < TVN_SIZE; j++) { - INIT_LIST_HEAD(base->tv5.vec + j); - INIT_LIST_HEAD(base->tv4.vec + j); - INIT_LIST_HEAD(base->tv3.vec + j); - INIT_LIST_HEAD(base->tv2.vec + j); - } - for (j = 0; j < TVR_SIZE; j++) - INIT_LIST_HEAD(base->tv1.vec + j); - - base->timer_jiffies = jiffies; - base->next_timer = base->timer_jiffies; - base->active_timers = 0; - base->all_timers = 0; - return 0; -} - #ifdef CONFIG_HOTPLUG_CPU static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head) { @@ -1621,6 +1584,9 @@ static void migrate_timers(int cpu) migrate_timer_list(new_base, old_base->tv5.vec + i); } + old_base->active_timers = 0; + old_base->all_timers = 0; + spin_unlock(&old_base->lock); spin_unlock_irq(&new_base->lock); put_cpu_var(tvec_bases); @@ -1630,25 +1596,16 @@ static void migrate_timers(int cpu) static int timer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - long cpu = (long)hcpu; - int err; - - switch(action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - err = init_timers_cpu(cpu); - if (err < 0) - return notifier_from_errno(err); - break; #ifdef CONFIG_HOTPLUG_CPU + switch (action) { case CPU_DEAD: case CPU_DEAD_FROZEN: - migrate_timers(cpu); + migrate_timers((long)hcpu); break; -#endif default: break; } +#endif return NOTIFY_OK; } @@ -1656,18 +1613,49 @@ static struct notifier_block timers_nb = { .notifier_call = timer_cpu_notify, }; +static void __init init_timer_cpu(struct tvec_base *base, int cpu) +{ + int j; -void __init init_timers(void) + base->cpu = cpu; + per_cpu(tvec_bases, cpu) = base; + spin_lock_init(&base->lock); + + for (j = 0; j < TVN_SIZE; j++) { + INIT_LIST_HEAD(base->tv5.vec + j); + INIT_LIST_HEAD(base->tv4.vec + j); + INIT_LIST_HEAD(base->tv3.vec + j); + INIT_LIST_HEAD(base->tv2.vec + j); + } + for (j = 0; j < TVR_SIZE; j++) + INIT_LIST_HEAD(base->tv1.vec + j); + + base->timer_jiffies = jiffies; + base->next_timer = base->timer_jiffies; +} + +static void __init init_timer_cpus(void) { - int err; + struct tvec_base *base; + int local_cpu = smp_processor_id(); + int cpu; + + for_each_possible_cpu(cpu) { + if (cpu == local_cpu) + base = &boot_tvec_bases; + else + base = per_cpu_ptr(&__tvec_bases, cpu); + + init_timer_cpu(base, cpu); + } +} +void __init init_timers(void) +{ /* ensure there are enough low bits for flags in timer->base pointer */ BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK); - err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, - (void *)(long)smp_processor_id()); - BUG_ON(err != NOTIFY_OK); - + init_timer_cpus(); init_timer_stats(); register_cpu_notifier(&timers_nb); open_softirq(TIMER_SOFTIRQ, run_timer_softirq); -- cgit v1.2.3 From 3650b57fdf208bc0e36cbe7b5e0744bd0e0cf34d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 31 Mar 2015 20:49:02 +0530 Subject: timer: Further simplify the SMP and HOTPLUG logic Remove one CONFIG_HOTPLUG_CPU #ifdef in trade for introducing one CONFIG_SMP #ifdef. The CONFIG_SMP ifdef avoids declaring the per-CPU __tvec_bases storage on UP systems since they already have boot_tvec_bases. Also (re)add a runtime check on the base alignment -- for the paranoid amongst us :-) Signed-off-by: Peter Zijlstra (Intel) Acked-by: Viresh Kumar Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/fdd2d35e169bdc554ffa3fe77f77716298c75ada.1427814611.git.viresh.kumar@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/timer.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 1feb9c7035c0..2ece3aa5069c 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -101,7 +101,6 @@ struct tvec_base { */ struct tvec_base boot_tvec_bases; EXPORT_SYMBOL(boot_tvec_bases); -static DEFINE_PER_CPU(struct tvec_base, __tvec_bases); static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases; @@ -1038,6 +1037,8 @@ int try_to_del_timer_sync(struct timer_list *timer) EXPORT_SYMBOL(try_to_del_timer_sync); #ifdef CONFIG_SMP +static DEFINE_PER_CPU(struct tvec_base, __tvec_bases); + /** * del_timer_sync - deactivate a timer and wait for the handler to finish. * @timer: the timer to be deactivated @@ -1591,12 +1592,10 @@ static void migrate_timers(int cpu) spin_unlock_irq(&new_base->lock); put_cpu_var(tvec_bases); } -#endif /* CONFIG_HOTPLUG_CPU */ static int timer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { -#ifdef CONFIG_HOTPLUG_CPU switch (action) { case CPU_DEAD: case CPU_DEAD_FROZEN: @@ -1605,18 +1604,24 @@ static int timer_cpu_notify(struct notifier_block *self, default: break; } -#endif + return NOTIFY_OK; } -static struct notifier_block timers_nb = { - .notifier_call = timer_cpu_notify, -}; +static inline void timer_register_cpu_notifier(void) +{ + cpu_notifier(timer_cpu_notify, 0); +} +#else +static inline void timer_register_cpu_notifier(void) { } +#endif /* CONFIG_HOTPLUG_CPU */ static void __init init_timer_cpu(struct tvec_base *base, int cpu) { int j; + BUG_ON(base != tbase_get_base(base)); + base->cpu = cpu; per_cpu(tvec_bases, cpu) = base; spin_lock_init(&base->lock); @@ -1643,8 +1648,10 @@ static void __init init_timer_cpus(void) for_each_possible_cpu(cpu) { if (cpu == local_cpu) base = &boot_tvec_bases; +#ifdef CONFIG_SMP else base = per_cpu_ptr(&__tvec_bases, cpu); +#endif init_timer_cpu(base, cpu); } @@ -1657,7 +1664,7 @@ void __init init_timers(void) init_timer_cpus(); init_timer_stats(); - register_cpu_notifier(&timers_nb); + timer_register_cpu_notifier(); open_softirq(TIMER_SOFTIRQ, run_timer_softirq); } -- cgit v1.2.3 From 9a806ddbb9a18c510e4acdcc828b9a87f5fd3aef Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:21 -0700 Subject: time: Add y2038 safe read_boot_clock64() As part of addressing in-kernel y2038 issues, this patch adds read_boot_clock64() and replaces all the call sites of read_boot_clock() with this function. This is a __weak implementation, which simply calls the existing y2038 unsafe read_boot_clock(). This allows architecture specific implementations to be converted independently, and eventually the y2038 unsafe read_boot_clock() can be removed after all its architecture specific implementations have been converted to read_boot_clock64(). Suggested-by: Arnd Bergmann Signed-off-by: Xunlei Pang Signed-off-by: John Stultz Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-2-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 5b12292b343a..652e50a9c6ed 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1188,6 +1188,14 @@ void __weak read_boot_clock(struct timespec *ts) ts->tv_nsec = 0; } +void __weak read_boot_clock64(struct timespec64 *ts64) +{ + struct timespec ts; + + read_boot_clock(&ts); + *ts64 = timespec_to_timespec64(ts); +} + /* * timekeeping_init - Initializes the clocksource and common timekeeping values */ @@ -1209,8 +1217,7 @@ void __init timekeeping_init(void) } else if (now.tv_sec || now.tv_nsec) persistent_clock_exist = true; - read_boot_clock(&ts); - boot = timespec_to_timespec64(ts); + read_boot_clock64(&boot); if (!timespec64_valid_strict(&boot)) { pr_warn("WARNING: Boot clock returned invalid value!\n" " Check your CMOS/BIOS settings.\n"); -- cgit v1.2.3 From 2ee966320028ac846654eba5344540eeb4dc228d Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:22 -0700 Subject: time: Add y2038 safe read_persistent_clock64() As part of addressing in-kernel y2038 issues, this patch adds read_persistent_clock64() and replaces all the call sites of read_persistent_clock() with this function. This is a __weak implementation, which simply calls the existing y2038 unsafe read_persistent_clock(). This allows architecture specific implementations to be converted independently, and eventually the y2038 unsafe read_persistent_clock() can be removed after all its architecture specific implementations have been converted to read_persistent_clock64(). Suggested-by: Arnd Bergmann Signed-off-by: Xunlei Pang Signed-off-by: John Stultz Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-3-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 652e50a9c6ed..b1dbfa573dce 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1173,6 +1173,14 @@ void __weak read_persistent_clock(struct timespec *ts) ts->tv_nsec = 0; } +void __weak read_persistent_clock64(struct timespec64 *ts64) +{ + struct timespec ts; + + read_persistent_clock(&ts); + *ts64 = timespec_to_timespec64(ts); +} + /** * read_boot_clock - Return time of the system start. * @@ -1205,10 +1213,8 @@ void __init timekeeping_init(void) struct clocksource *clock; unsigned long flags; struct timespec64 now, boot, tmp; - struct timespec ts; - read_persistent_clock(&ts); - now = timespec_to_timespec64(ts); + read_persistent_clock64(&now); if (!timespec64_valid_strict(&now)) { pr_warn("WARNING: Persistent clock returned invalid value!\n" " Check your CMOS/BIOS settings.\n"); @@ -1278,7 +1284,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values * @delta: pointer to a timespec64 delta value * - * This hook is for architectures that cannot support read_persistent_clock + * This hook is for architectures that cannot support read_persistent_clock64 * because their RTC/persistent clock is only accessible when irqs are enabled. * * This function should only be called by rtc_resume(), and allows @@ -1325,12 +1331,10 @@ void timekeeping_resume(void) struct clocksource *clock = tk->tkr_mono.clock; unsigned long flags; struct timespec64 ts_new, ts_delta; - struct timespec tmp; cycle_t cycle_now, cycle_delta; bool suspendtime_found = false; - read_persistent_clock(&tmp); - ts_new = timespec_to_timespec64(tmp); + read_persistent_clock64(&ts_new); clockevents_resume(); clocksource_resume(); @@ -1406,10 +1410,8 @@ int timekeeping_suspend(void) unsigned long flags; struct timespec64 delta, delta_delta; static struct timespec64 old_delta; - struct timespec tmp; - read_persistent_clock(&tmp); - timekeeping_suspend_time = timespec_to_timespec64(tmp); + read_persistent_clock64(&timekeeping_suspend_time); /* * On some systems the persistent_clock can not be detected at -- cgit v1.2.3 From 3c00a1fe8496ff29ab62764bb3f4ce4b48089004 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:23 -0700 Subject: time: Add y2038 safe update_persistent_clock64() As part of addressing in-kernel y2038 issues, this patch adds update_persistent_clock64() and replaces all the call sites of update_persistent_clock() with this function. This is a __weak implementation, which simply calls the existing y2038 unsafe update_persistent_clock(). This allows architecture specific implementations to be converted independently, and eventually y2038-unsafe update_persistent_clock() can be removed after all its architecture specific implementations have been converted to update_persistent_clock64(). Suggested-by: Arnd Bergmann Signed-off-by: Xunlei Pang Signed-off-by: John Stultz Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-4-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/ntp.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 9ad60d028508..7a681003001c 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -458,6 +458,16 @@ out: return leap; } +#ifdef CONFIG_GENERIC_CMOS_UPDATE +int __weak update_persistent_clock64(struct timespec64 now64) +{ + struct timespec now; + + now = timespec64_to_timespec(now64); + return update_persistent_clock(now); +} +#endif + #if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) static void sync_cmos_clock(struct work_struct *work); @@ -493,8 +503,9 @@ static void sync_cmos_clock(struct work_struct *work) if (persistent_clock_is_local) adjust.tv_sec -= (sys_tz.tz_minuteswest * 60); #ifdef CONFIG_GENERIC_CMOS_UPDATE - fail = update_persistent_clock(timespec64_to_timespec(adjust)); + fail = update_persistent_clock64(adjust); #endif + #ifdef CONFIG_RTC_SYSTOHC if (fail == -ENODEV) fail = rtc_set_ntp_time(adjust); -- cgit v1.2.3 From 7f2981393af31a854879f2496cab4c978e886902 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:35 -0700 Subject: time: Don't build timekeeping_inject_sleeptime64() if no one uses it timekeeping_inject_sleeptime64() is only used by RTC suspend/resume, so add build dependencies on the necessary RTC related macros. Signed-off-by: Xunlei Pang [ Improve commit message clarity. ] Signed-off-by: John Stultz Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-16-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b1dbfa573dce..3be559b6fd0a 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1280,6 +1280,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, tk_debug_account_sleep_time(delta); } +#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE) /** * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values * @delta: pointer to a timespec64 delta value @@ -1317,6 +1318,7 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta) /* signal hrtimers about time change */ clock_was_set(); } +#endif /** * timekeeping_resume - Resumes the generic timekeeping subsystem. -- cgit v1.2.3 From 264bb3f79f2a465477cdcd2f0554e21aedc443a3 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:37 -0700 Subject: time: Fix a bug in timekeeping_suspend() with no persistent clock When there's no persistent clock, normally timekeeping_suspend_time should always be zero, but this can break in timekeeping_suspend(). At T1, there was a system suspend, so old_delta was assigned T1. After some time, one time adjustment happened, and xtime got the value of T1-dt(0s Signed-off-by: John Stultz Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-18-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3be559b6fd0a..b7db4916415b 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1255,7 +1255,7 @@ void __init timekeeping_init(void) raw_spin_unlock_irqrestore(&timekeeper_lock, flags); } -/* time in seconds when suspend began */ +/* time in seconds when suspend began for persistent clock */ static struct timespec64 timekeeping_suspend_time; /** @@ -1428,24 +1428,26 @@ int timekeeping_suspend(void) timekeeping_forward_now(tk); timekeeping_suspended = 1; - /* - * To avoid drift caused by repeated suspend/resumes, - * which each can add ~1 second drift error, - * try to compensate so the difference in system time - * and persistent_clock time stays close to constant. - */ - delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time); - delta_delta = timespec64_sub(delta, old_delta); - if (abs(delta_delta.tv_sec) >= 2) { + if (has_persistent_clock()) { /* - * if delta_delta is too large, assume time correction - * has occured and set old_delta to the current delta. + * To avoid drift caused by repeated suspend/resumes, + * which each can add ~1 second drift error, + * try to compensate so the difference in system time + * and persistent_clock time stays close to constant. */ - old_delta = delta; - } else { - /* Otherwise try to adjust old_system to compensate */ - timekeeping_suspend_time = - timespec64_add(timekeeping_suspend_time, delta_delta); + delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time); + delta_delta = timespec64_sub(delta, old_delta); + if (abs(delta_delta.tv_sec) >= 2) { + /* + * if delta_delta is too large, assume time correction + * has occurred and set old_delta to the current delta. + */ + old_delta = delta; + } else { + /* Otherwise try to adjust old_system to compensate */ + timekeeping_suspend_time = + timespec64_add(timekeeping_suspend_time, delta_delta); + } } timekeeping_update(tk, TK_MIRROR); -- cgit v1.2.3 From 0fa88cb4b82b5cf7429bc1cef9db006ca035754e Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 1 Apr 2015 20:34:38 -0700 Subject: time, drivers/rtc: Don't bother with rtc_resume() for the nonstop clocksource If a system does not provide a persistent_clock(), the time will be updated on resume by rtc_resume(). With the addition of the non-stop clocksources for suspend timing, those systems set the time on resume in timekeeping_resume(), but may not provide a valid persistent_clock(). This results in the rtc_resume() logic thinking no one has set the time and it then will over-write the suspend time again, which is not necessary and only increases clock error. So, fix this for rtc_resume(). This patch also improves the name of persistent_clock_exist to make it more grammatical. Signed-off-by: Xunlei Pang Signed-off-by: John Stultz Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-19-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 66 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 17 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b7db4916415b..79b9bc6e7876 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -64,9 +64,6 @@ static struct tk_fast tk_fast_raw ____cacheline_aligned; /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; -/* Flag for if there is a persistent clock on this platform */ -bool __read_mostly persistent_clock_exist = false; - static inline void tk_normalize_xtime(struct timekeeper *tk) { while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) { @@ -1204,6 +1201,12 @@ void __weak read_boot_clock64(struct timespec64 *ts64) *ts64 = timespec_to_timespec64(ts); } +/* Flag for if timekeeping_resume() has injected sleeptime */ +static bool sleeptime_injected; + +/* Flag for if there is a persistent clock on this platform */ +static bool persistent_clock_exists; + /* * timekeeping_init - Initializes the clocksource and common timekeeping values */ @@ -1221,7 +1224,7 @@ void __init timekeeping_init(void) now.tv_sec = 0; now.tv_nsec = 0; } else if (now.tv_sec || now.tv_nsec) - persistent_clock_exist = true; + persistent_clock_exists = true; read_boot_clock64(&boot); if (!timespec64_valid_strict(&boot)) { @@ -1281,12 +1284,48 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, } #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE) +/** + * We have three kinds of time sources to use for sleep time + * injection, the preference order is: + * 1) non-stop clocksource + * 2) persistent clock (ie: RTC accessible when irqs are off) + * 3) RTC + * + * 1) and 2) are used by timekeeping, 3) by RTC subsystem. + * If system has neither 1) nor 2), 3) will be used finally. + * + * + * If timekeeping has injected sleeptime via either 1) or 2), + * 3) becomes needless, so in this case we don't need to call + * rtc_resume(), and this is what timekeeping_rtc_skipresume() + * means. + */ +bool timekeeping_rtc_skipresume(void) +{ + return sleeptime_injected; +} + +/** + * 1) can be determined whether to use or not only when doing + * timekeeping_resume() which is invoked after rtc_suspend(), + * so we can't skip rtc_suspend() surely if system has 1). + * + * But if system has 2), 2) will definitely be used, so in this + * case we don't need to call rtc_suspend(), and this is what + * timekeeping_rtc_skipsuspend() means. + */ +bool timekeeping_rtc_skipsuspend(void) +{ + return persistent_clock_exists; +} + /** * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values * @delta: pointer to a timespec64 delta value * * This hook is for architectures that cannot support read_persistent_clock64 * because their RTC/persistent clock is only accessible when irqs are enabled. + * and also don't have an effective nonstop clocksource. * * This function should only be called by rtc_resume(), and allows * a suspend offset to be injected into the timekeeping values. @@ -1296,13 +1335,6 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta) struct timekeeper *tk = &tk_core.timekeeper; unsigned long flags; - /* - * Make sure we don't set the clock twice, as timekeeping_resume() - * already did it - */ - if (has_persistent_clock()) - return; - raw_spin_lock_irqsave(&timekeeper_lock, flags); write_seqcount_begin(&tk_core.seq); @@ -1334,8 +1366,8 @@ void timekeeping_resume(void) unsigned long flags; struct timespec64 ts_new, ts_delta; cycle_t cycle_now, cycle_delta; - bool suspendtime_found = false; + sleeptime_injected = false; read_persistent_clock64(&ts_new); clockevents_resume(); @@ -1381,13 +1413,13 @@ void timekeeping_resume(void) nsec += ((u64) cycle_delta * mult) >> shift; ts_delta = ns_to_timespec64(nsec); - suspendtime_found = true; + sleeptime_injected = true; } else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) { ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time); - suspendtime_found = true; + sleeptime_injected = true; } - if (suspendtime_found) + if (sleeptime_injected) __timekeeping_inject_sleeptime(tk, &ts_delta); /* Re-base the last cycle value */ @@ -1421,14 +1453,14 @@ int timekeeping_suspend(void) * value returned, update the persistent_clock_exists flag. */ if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec) - persistent_clock_exist = true; + persistent_clock_exists = true; raw_spin_lock_irqsave(&timekeeper_lock, flags); write_seqcount_begin(&tk_core.seq); timekeeping_forward_now(tk); timekeeping_suspended = 1; - if (has_persistent_clock()) { + if (persistent_clock_exists) { /* * To avoid drift caused by repeated suspend/resumes, * which each can add ~1 second drift error, -- cgit v1.2.3 From 8e56f33f8439b2f8e7f4ae7f3d0bfe683ecc3b09 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 1 Apr 2015 20:34:39 -0700 Subject: clocksource: Improve comment explaining clocks_calc_max_nsecs()'s 50% safety margin Ingo noted that the description of clocks_calc_max_nsecs()'s 50% safety margin was somewhat circular. So this patch tries to improve the comment to better explain what we mean by the 50% safety margin and why we need it. Signed-off-by: John Stultz Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Richard Cochran Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1427945681-29972-20-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar --- kernel/time/clocksource.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c3be3c71bbad..15facb1b9c60 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -472,8 +472,11 @@ static u32 clocksource_max_adjustment(struct clocksource *cs) * @max_cyc: maximum cycle value before potential overflow (does not include * any safety margin) * - * NOTE: This function includes a safety margin of 50%, so that bad clock values - * can be detected. + * NOTE: This function includes a safety margin of 50%, in other words, we + * return half the number of nanoseconds the hardware counter can technically + * cover. This is done so that we can potentially detect problems caused by + * delayed timers or bad hardware, which might result in time intervals that + * are larger then what the math used can handle without overflows. */ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc) { -- cgit v1.2.3 From 592a438ff3fea61d303c5784c209b3f1fd3e16df Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:01:10 +0200 Subject: clockevents: Provide explicit broadcast control functions clockevents_notify() is a leftover from the early design of the clockevents facility. It's really not a notification mechanism, it's a multiplex call. We are way better off to have explicit calls instead of this monstrosity. Split out the broadcast control into a separate function and provide inline helpers. Switch clockevents_notify() over. This will go away once all callers are converted. This also gets rid of the nested locking of clockevents_lock and broadcast_lock. The broadcast control functions do not require clockevents_lock. Only the managing functions (setup/shutdown/suspend/resume of the broadcast device require clockevents_lock. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Daniel Lezcano Cc: Len Brown Cc: Peter Zijlstra Cc: Tony Lindgren Link: http://lkml.kernel.org/r/8086559.ttsuS0n1Xr@vostro.rjw.lan Signed-off-by: Ingo Molnar --- kernel/time/clockevents.c | 6 ++++- kernel/time/tick-broadcast.c | 62 +++++++++++++++++++------------------------- kernel/time/tick-internal.h | 2 -- 3 files changed, 32 insertions(+), 38 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 7af614829da1..599ff8d3fda5 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -656,9 +656,13 @@ int clockevents_notify(unsigned long reason, void *arg) switch (reason) { case CLOCK_EVT_NOTIFY_BROADCAST_ON: + tick_broadcast_enable(); + break; case CLOCK_EVT_NOTIFY_BROADCAST_OFF: + tick_broadcast_disable(); + break; case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: - tick_broadcast_on_off(reason, arg); + tick_broadcast_force(); break; case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index f5e0fd5652dc..1a0bee04ef8c 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -33,7 +33,7 @@ static cpumask_var_t tick_broadcast_mask; static cpumask_var_t tick_broadcast_on; static cpumask_var_t tmpmask; static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); -static int tick_broadcast_force; +static int tick_broadcast_forced; #ifdef CONFIG_TICK_ONESHOT static void tick_broadcast_clear_oneshot(int cpu); @@ -326,49 +326,54 @@ unlock: raw_spin_unlock(&tick_broadcast_lock); } -/* - * Powerstate information: The system enters/leaves a state, where - * affected devices might stop +/** + * tick_broadcast_control - Enable/disable or force broadcast mode + * @mode: The selected broadcast mode + * + * Called when the system enters a state where affected tick devices + * might stop. Note: TICK_BROADCAST_FORCE cannot be undone. + * + * Called with interrupts disabled, so clockevents_lock is not + * required here because the local clock event device cannot go away + * under us. */ -static void tick_do_broadcast_on_off(unsigned long *reason) +void tick_broadcast_control(enum tick_broadcast_mode mode) { struct clock_event_device *bc, *dev; struct tick_device *td; - unsigned long flags; int cpu, bc_stopped; - raw_spin_lock_irqsave(&tick_broadcast_lock, flags); - - cpu = smp_processor_id(); - td = &per_cpu(tick_cpu_device, cpu); + td = this_cpu_ptr(&tick_cpu_device); dev = td->evtdev; - bc = tick_broadcast_device.evtdev; /* * Is the device not affected by the powerstate ? */ if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) - goto out; + return; if (!tick_device_is_functional(dev)) - goto out; + return; + raw_spin_lock(&tick_broadcast_lock); + cpu = smp_processor_id(); + bc = tick_broadcast_device.evtdev; bc_stopped = cpumask_empty(tick_broadcast_mask); - switch (*reason) { - case CLOCK_EVT_NOTIFY_BROADCAST_ON: - case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: + switch (mode) { + case TICK_BROADCAST_FORCE: + tick_broadcast_forced = 1; + case TICK_BROADCAST_ON: cpumask_set_cpu(cpu, tick_broadcast_on); if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) clockevents_shutdown(dev); } - if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE) - tick_broadcast_force = 1; break; - case CLOCK_EVT_NOTIFY_BROADCAST_OFF: - if (tick_broadcast_force) + + case TICK_BROADCAST_OFF: + if (tick_broadcast_forced) break; cpumask_clear_cpu(cpu, tick_broadcast_on); if (!tick_device_is_functional(dev)) @@ -390,22 +395,9 @@ static void tick_do_broadcast_on_off(unsigned long *reason) else tick_broadcast_setup_oneshot(bc); } -out: - raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); -} - -/* - * Powerstate information: The system enters/leaves a state, where - * affected devices might stop. - */ -void tick_broadcast_on_off(unsigned long reason, int *oncpu) -{ - if (!cpumask_test_cpu(*oncpu, cpu_online_mask)) - printk(KERN_ERR "tick-broadcast: ignoring broadcast for " - "offline CPU #%d\n", *oncpu); - else - tick_do_broadcast_on_off(&reason); + raw_spin_unlock(&tick_broadcast_lock); } +EXPORT_SYMBOL_GPL(tick_broadcast_control); /* * Set the periodic handler depending on broadcast on/off diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index b6ba0a44e740..62e331d1bc76 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -53,7 +53,6 @@ extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); extern void tick_install_broadcast_device(struct clock_event_device *dev); extern int tick_is_broadcast_device(struct clock_event_device *dev); -extern void tick_broadcast_on_off(unsigned long reason, int *oncpu); extern void tick_shutdown_broadcast(unsigned int *cpup); extern void tick_suspend_broadcast(void); extern void tick_resume_broadcast(void); @@ -68,7 +67,6 @@ static inline void tick_install_broadcast_device(struct clock_event_device *dev) static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; } static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; } static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { } -static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { } static inline void tick_shutdown_broadcast(unsigned int *cpup) { } static inline void tick_suspend_broadcast(void) { } static inline void tick_resume_broadcast(void) { } -- cgit v1.2.3 From 89feddbfe7023ccfb4a6d7f5e3f5161d91b28b18 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:03:42 +0200 Subject: clockevents: Remove the broadcast control leftovers All users converted. Remove the notify leftovers. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/2076318.76XJZ8QYP3@vostro.rjw.lan Signed-off-by: Ingo Molnar --- kernel/time/clockevents.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 599ff8d3fda5..dba0b83708b3 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -655,16 +655,6 @@ int clockevents_notify(unsigned long reason, void *arg) raw_spin_lock_irqsave(&clockevents_lock, flags); switch (reason) { - case CLOCK_EVT_NOTIFY_BROADCAST_ON: - tick_broadcast_enable(); - break; - case CLOCK_EVT_NOTIFY_BROADCAST_OFF: - tick_broadcast_disable(); - break; - case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: - tick_broadcast_force(); - break; - case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: ret = tick_broadcast_oneshot_control(reason); -- cgit v1.2.3 From 1fe5d5c3c9ba0c4ade18e3325cba0ffe35127941 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:05:15 +0200 Subject: clockevents: Provide explicit broadcast oneshot control functions clockevents_notify() is a leftover from the early design of the clockevents facility. It's really not a notification mechanism, it's a multiplex call. We are way better off to have explicit calls instead of this monstrosity. Split out the broadcast oneshot control into a separate function and provide inline helpers. Switch clockevents_notify() over. This will go away once all callers are converted. This also gets rid of the nested locking of clockevents_lock and broadcast_lock. The broadcast oneshot control functions do not require clockevents_lock. Only the managing functions (setup/shutdown/suspend/resume of the broadcast device require clockevents_lock. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Alexandre Courbot Cc: Daniel Lezcano Cc: Len Brown Cc: Peter Zijlstra Cc: Stephen Warren Cc: Thierry Reding Cc: Tony Lindgren Link: http://lkml.kernel.org/r/13000649.8qZuEDV0OA@vostro.rjw.lan Signed-off-by: Ingo Molnar --- kernel/time/clockevents.c | 4 +++- kernel/time/tick-broadcast.c | 28 +++++++++++++++++----------- kernel/time/tick-internal.h | 2 -- 3 files changed, 20 insertions(+), 14 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index dba0b83708b3..7791b1c94ef2 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -656,8 +656,10 @@ int clockevents_notify(unsigned long reason, void *arg) switch (reason) { case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: + tick_broadcast_enter(); + break; case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: - ret = tick_broadcast_oneshot_control(reason); + tick_broadcast_exit(); break; case CLOCK_EVT_NOTIFY_CPU_DYING: diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 1a0bee04ef8c..55e43f20987a 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -687,18 +687,23 @@ void hotplug_cpu__broadcast_tick_pull(int deadcpu) raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } -/* - * Powerstate information: The system enters/leaves a state, where - * affected devices might stop +/** + * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode + * @state: The target state (enter/exit) + * + * The system enters/leaves a state, where affected devices might stop * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups. + * + * Called with interrupts disabled, so clockevents_lock is not + * required here because the local clock event device cannot go away + * under us. */ -int tick_broadcast_oneshot_control(unsigned long reason) +int tick_broadcast_oneshot_control(enum tick_broadcast_state state) { struct clock_event_device *bc, *dev; struct tick_device *td; - unsigned long flags; - ktime_t now; int cpu, ret = 0; + ktime_t now; /* * Periodic mode does not care about the enter/exit of power @@ -711,17 +716,17 @@ int tick_broadcast_oneshot_control(unsigned long reason) * We are called with preemtion disabled from the depth of the * idle code, so we can't be moved away. */ - cpu = smp_processor_id(); - td = &per_cpu(tick_cpu_device, cpu); + td = this_cpu_ptr(&tick_cpu_device); dev = td->evtdev; if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) return 0; + raw_spin_lock(&tick_broadcast_lock); bc = tick_broadcast_device.evtdev; + cpu = smp_processor_id(); - raw_spin_lock_irqsave(&tick_broadcast_lock, flags); - if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { + if (state == TICK_BROADCAST_ENTER) { if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); broadcast_shutdown_local(bc, dev); @@ -813,9 +818,10 @@ int tick_broadcast_oneshot_control(unsigned long reason) } } out: - raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); + raw_spin_unlock(&tick_broadcast_lock); return ret; } +EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control); /* * Reset the one shot broadcast for a cpu diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 62e331d1bc76..0266f9dbd114 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -117,7 +117,6 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } /* Functions related to oneshot broadcasting */ #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); -extern int tick_broadcast_oneshot_control(unsigned long reason); extern void tick_broadcast_switch_to_oneshot(void); extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); extern int tick_broadcast_oneshot_active(void); @@ -126,7 +125,6 @@ bool tick_broadcast_oneshot_available(void); extern struct cpumask *tick_get_broadcast_oneshot_mask(void); #else /* !(BROADCAST && ONESHOT): */ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } -static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; } static inline void tick_broadcast_switch_to_oneshot(void) { } static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } static inline int tick_broadcast_oneshot_active(void) { return 0; } -- cgit v1.2.3 From ffa48c0d76803057ee89bf220305466d74256d7b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 3 Apr 2015 02:36:10 +0200 Subject: clockevents: Remove broadcast oneshot control leftovers Now that all users are converted over to explicit calls into the clockevents state machine, remove the notification chain leftovers. Original-from: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: John Stultz Link: http://lkml.kernel.org/r/14018863.NQUzkFuafr@vostro.rjw.lan Signed-off-by: Ingo Molnar --- kernel/time/clockevents.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 7791b1c94ef2..be9abf32c0b9 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -655,13 +655,6 @@ int clockevents_notify(unsigned long reason, void *arg) raw_spin_lock_irqsave(&clockevents_lock, flags); switch (reason) { - case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: - tick_broadcast_enter(); - break; - case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: - tick_broadcast_exit(); - break; - case CLOCK_EVT_NOTIFY_CPU_DYING: tick_handover_do_timer(arg); break; -- cgit v1.2.3 From 52c063d1adbc16c76e70fffa20727fcd4e9343b3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:37:24 +0200 Subject: clockevents: Make tick handover explicit clockevents_notify() is a leftover from the early design of the clockevents facility. It's really not a notification mechanism, it's a multiplex call. We are way better off to have explicit calls instead of this monstrosity. Split out the tick_handover call and invoke it explicitely from the hotplug code. Temporary solution will be cleaned up in later patches. Signed-off-by: Thomas Gleixner [ Rebase ] Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Cc: John Stultz Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/1658173.RkEEILFiQZ@vostro.rjw.lan Signed-off-by: Ingo Molnar --- kernel/time/clockevents.c | 4 ---- kernel/time/hrtimer.c | 4 ---- kernel/time/tick-common.c | 9 ++++++--- kernel/time/tick-internal.h | 1 - 4 files changed, 6 insertions(+), 12 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index be9abf32c0b9..88fb3b96c7cc 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -655,10 +655,6 @@ int clockevents_notify(unsigned long reason, void *arg) raw_spin_lock_irqsave(&clockevents_lock, flags); switch (reason) { - case CLOCK_EVT_NOTIFY_CPU_DYING: - tick_handover_do_timer(arg); - break; - case CLOCK_EVT_NOTIFY_CPU_DEAD: tick_shutdown_broadcast_oneshot(arg); tick_shutdown_broadcast(arg); diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 721d29b99d10..6a7a64ec7d1b 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1707,10 +1707,6 @@ static int hrtimer_cpu_notify(struct notifier_block *self, break; #ifdef CONFIG_HOTPLUG_CPU - case CPU_DYING: - case CPU_DYING_FROZEN: - clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu); - break; case CPU_DEAD: case CPU_DEAD_FROZEN: { diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index e28ba5c044c5..055c868f3ec9 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -332,20 +332,23 @@ out_bc: tick_install_broadcast_device(newdev); } +#ifdef CONFIG_HOTPLUG_CPU /* * Transfer the do_timer job away from a dying cpu. * - * Called with interrupts disabled. + * Called with interrupts disabled. Not locking required. If + * tick_do_timer_cpu is owned by this cpu, nothing can change it. */ -void tick_handover_do_timer(int *cpup) +void tick_handover_do_timer(void) { - if (*cpup == tick_do_timer_cpu) { + if (tick_do_timer_cpu == smp_processor_id()) { int cpu = cpumask_first(cpu_online_mask); tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu : TICK_DO_TIMER_NONE; } } +#endif /* * Shutdown an event device on a given cpu: diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 0266f9dbd114..aabcb5d00cf2 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -20,7 +20,6 @@ extern int tick_do_timer_cpu __read_mostly; extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast); extern void tick_handle_periodic(struct clock_event_device *dev); extern void tick_check_new_device(struct clock_event_device *dev); -extern void tick_handover_do_timer(int *cpup); extern void tick_shutdown(unsigned int *cpup); extern void tick_suspend(void); extern void tick_resume(void); -- cgit v1.2.3 From a49b116dcb1265f238f3169507424257b0519069 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:38:05 +0200 Subject: clockevents: Cleanup dead cpu explicitely clockevents_notify() is a leftover from the early design of the clockevents facility. It's really not a notification mechanism, it's a multiplex call. We are way better off to have explicit calls instead of this monstrosity. Split out the cleanup function for a dead cpu and invoke it directly from the cpu down code. Make it conditional on CPU_HOTPLUG as well. Temporary change, will be refined in the future. Signed-off-by: Thomas Gleixner [ Rebased, added clockevents_notify() removal ] Signed-off-by: Rafael J. Wysocki Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1735025.raBZdQHM3m@vostro.rjw.lan Signed-off-by: Ingo Molnar --- kernel/time/clockevents.c | 51 ++++++++++++++++++-------------------------- kernel/time/hrtimer.c | 3 --- kernel/time/tick-broadcast.c | 39 ++++++++++++++++----------------- kernel/time/tick-common.c | 6 +++--- kernel/time/tick-internal.h | 10 ++++----- 5 files changed, 49 insertions(+), 60 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 88fb3b96c7cc..25d942d1da27 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -642,49 +642,40 @@ void clockevents_resume(void) dev->resume(dev); } +#ifdef CONFIG_HOTPLUG_CPU /** - * clockevents_notify - notification about relevant events - * Returns 0 on success, any other value on error + * tick_cleanup_dead_cpu - Cleanup the tick and clockevents of a dead cpu */ -int clockevents_notify(unsigned long reason, void *arg) +void tick_cleanup_dead_cpu(int cpu) { struct clock_event_device *dev, *tmp; unsigned long flags; - int cpu, ret = 0; raw_spin_lock_irqsave(&clockevents_lock, flags); - switch (reason) { - case CLOCK_EVT_NOTIFY_CPU_DEAD: - tick_shutdown_broadcast_oneshot(arg); - tick_shutdown_broadcast(arg); - tick_shutdown(arg); - /* - * Unregister the clock event devices which were - * released from the users in the notify chain. - */ - list_for_each_entry_safe(dev, tmp, &clockevents_released, list) + tick_shutdown_broadcast_oneshot(cpu); + tick_shutdown_broadcast(cpu); + tick_shutdown(cpu); + /* + * Unregister the clock event devices which were + * released from the users in the notify chain. + */ + list_for_each_entry_safe(dev, tmp, &clockevents_released, list) + list_del(&dev->list); + /* + * Now check whether the CPU has left unused per cpu devices + */ + list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) { + if (cpumask_test_cpu(cpu, dev->cpumask) && + cpumask_weight(dev->cpumask) == 1 && + !tick_is_broadcast_device(dev)) { + BUG_ON(dev->state != CLOCK_EVT_STATE_DETACHED); list_del(&dev->list); - /* - * Now check whether the CPU has left unused per cpu devices - */ - cpu = *((int *)arg); - list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) { - if (cpumask_test_cpu(cpu, dev->cpumask) && - cpumask_weight(dev->cpumask) == 1 && - !tick_is_broadcast_device(dev)) { - BUG_ON(dev->state != CLOCK_EVT_STATE_DETACHED); - list_del(&dev->list); - } } - break; - default: - break; } raw_spin_unlock_irqrestore(&clockevents_lock, flags); - return ret; } -EXPORT_SYMBOL_GPL(clockevents_notify); +#endif #ifdef CONFIG_SYSFS struct bus_type clockevents_subsys = { diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 6a7a64ec7d1b..76d4bd962b19 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1709,11 +1709,8 @@ static int hrtimer_cpu_notify(struct notifier_block *self, #ifdef CONFIG_HOTPLUG_CPU case CPU_DEAD: case CPU_DEAD_FROZEN: - { - clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu); migrate_hrtimers(scpu); break; - } #endif default: diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 55e43f20987a..7e8ca4f448a8 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -410,14 +410,14 @@ void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) dev->event_handler = tick_handle_periodic_broadcast; } +#ifdef CONFIG_HOTPLUG_CPU /* * Remove a CPU from broadcasting */ -void tick_shutdown_broadcast(unsigned int *cpup) +void tick_shutdown_broadcast(unsigned int cpu) { struct clock_event_device *bc; unsigned long flags; - unsigned int cpu = *cpup; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); @@ -432,6 +432,7 @@ void tick_shutdown_broadcast(unsigned int *cpup) raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } +#endif void tick_suspend_broadcast(void) { @@ -672,21 +673,6 @@ static void broadcast_shutdown_local(struct clock_event_device *bc, clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN); } -void hotplug_cpu__broadcast_tick_pull(int deadcpu) -{ - struct clock_event_device *bc; - unsigned long flags; - - raw_spin_lock_irqsave(&tick_broadcast_lock, flags); - bc = tick_broadcast_device.evtdev; - - if (bc && broadcast_needs_cpu(bc, deadcpu)) { - /* This moves the broadcast assignment to this CPU: */ - clockevents_program_event(bc, bc->next_event, 1); - } - raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); -} - /** * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode * @state: The target state (enter/exit) @@ -908,14 +894,28 @@ void tick_broadcast_switch_to_oneshot(void) raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } +#ifdef CONFIG_HOTPLUG_CPU +void hotplug_cpu__broadcast_tick_pull(int deadcpu) +{ + struct clock_event_device *bc; + unsigned long flags; + + raw_spin_lock_irqsave(&tick_broadcast_lock, flags); + bc = tick_broadcast_device.evtdev; + + if (bc && broadcast_needs_cpu(bc, deadcpu)) { + /* This moves the broadcast assignment to this CPU: */ + clockevents_program_event(bc, bc->next_event, 1); + } + raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); +} /* * Remove a dead CPU from broadcasting */ -void tick_shutdown_broadcast_oneshot(unsigned int *cpup) +void tick_shutdown_broadcast_oneshot(unsigned int cpu) { unsigned long flags; - unsigned int cpu = *cpup; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); @@ -929,6 +929,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup) raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } +#endif /* * Check, whether the broadcast device is in one shot mode diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 055c868f3ec9..fac3e98fec49 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -348,7 +348,6 @@ void tick_handover_do_timer(void) TICK_DO_TIMER_NONE; } } -#endif /* * Shutdown an event device on a given cpu: @@ -357,9 +356,9 @@ void tick_handover_do_timer(void) * access the hardware device itself. * We just set the mode and remove it from the lists. */ -void tick_shutdown(unsigned int *cpup) +void tick_shutdown(unsigned int cpu) { - struct tick_device *td = &per_cpu(tick_cpu_device, *cpup); + struct tick_device *td = &per_cpu(tick_cpu_device, cpu); struct clock_event_device *dev = td->evtdev; td->mode = TICKDEV_MODE_PERIODIC; @@ -375,6 +374,7 @@ void tick_shutdown(unsigned int *cpup) td->evtdev = NULL; } } +#endif /** * tick_suspend_local - Suspend the local tick device diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index aabcb5d00cf2..b64fdd8054c5 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -20,7 +20,7 @@ extern int tick_do_timer_cpu __read_mostly; extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast); extern void tick_handle_periodic(struct clock_event_device *dev); extern void tick_check_new_device(struct clock_event_device *dev); -extern void tick_shutdown(unsigned int *cpup); +extern void tick_shutdown(unsigned int cpu); extern void tick_suspend(void); extern void tick_resume(void); extern bool tick_check_replacement(struct clock_event_device *curdev, @@ -52,7 +52,7 @@ extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt); extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu); extern void tick_install_broadcast_device(struct clock_event_device *dev); extern int tick_is_broadcast_device(struct clock_event_device *dev); -extern void tick_shutdown_broadcast(unsigned int *cpup); +extern void tick_shutdown_broadcast(unsigned int cpu); extern void tick_suspend_broadcast(void); extern void tick_resume_broadcast(void); extern bool tick_resume_check_broadcast(void); @@ -66,7 +66,7 @@ static inline void tick_install_broadcast_device(struct clock_event_device *dev) static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; } static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; } static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { } -static inline void tick_shutdown_broadcast(unsigned int *cpup) { } +static inline void tick_shutdown_broadcast(unsigned int cpu) { } static inline void tick_suspend_broadcast(void) { } static inline void tick_resume_broadcast(void) { } static inline bool tick_resume_check_broadcast(void) { return false; } @@ -117,7 +117,7 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); extern void tick_broadcast_switch_to_oneshot(void); -extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); +extern void tick_shutdown_broadcast_oneshot(unsigned int cpu); extern int tick_broadcast_oneshot_active(void); extern void tick_check_oneshot_broadcast_this_cpu(void); bool tick_broadcast_oneshot_available(void); @@ -125,7 +125,7 @@ extern struct cpumask *tick_get_broadcast_oneshot_mask(void); #else /* !(BROADCAST && ONESHOT): */ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } static inline void tick_broadcast_switch_to_oneshot(void) { } -static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } +static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { } static inline int tick_broadcast_oneshot_active(void) { return 0; } static inline void tick_check_oneshot_broadcast_this_cpu(void) { } static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); } -- cgit v1.2.3 From 347c6f6dda1098318088feb8e60188f0161e743d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 3 Apr 2015 02:39:05 +0200 Subject: timekeeping: Get rid of stale comment Arch specific management of xtime/jiffies/wall_to_monotonic is gone for quite a while. Zap the stale comment. Signed-off-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki Acked-by: John Stultz Cc: Peter Zijlstra Cc: John Stultz Link: http://lkml.kernel.org/r/2422730.dmO29q661S@vostro.rjw.lan Signed-off-by: Ingo Molnar --- kernel/time/timekeeping.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 79b9bc6e7876..946acb72179f 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1354,10 +1354,6 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta) /** * timekeeping_resume - Resumes the generic timekeeping subsystem. - * - * This is for the generic clocksource timekeeping. - * xtime/wall_to_monotonic/jiffies/etc are - * still managed by arch specific suspend/resume code. */ void timekeeping_resume(void) { -- cgit v1.2.3 From 422fe7502e3f16dc1c680f22d31f59f022edc10d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 3 Apr 2015 15:21:51 +0200 Subject: timers/PM: Fix up tick_unfreeze() A recent conflict resolution has left tick_resume() in tick_unfreeze() which leads to an unbalanced execution of tick_resume_broadcast() every time that function runs. Fix that by replacing the tick_resume() in tick_unfreeze() with tick_resume_local() as appropriate. Signed-off-by: Rafael J. Wysocki Cc: boris.ostrovsky@oracle.com Cc: david.vrabel@citrix.com Cc: konrad.wilk@oracle.com Cc: peterz@infradead.org Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/8099075.V0LvN3pQAV@vostro.rjw.lan Signed-off-by: Ingo Molnar --- kernel/time/tick-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index fac3e98fec49..ad66a51ca4fa 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -482,7 +482,7 @@ void tick_unfreeze(void) if (tick_freeze_depth == num_online_cpus()) timekeeping_resume(); else - tick_resume(); + tick_resume_local(); tick_freeze_depth--; -- cgit v1.2.3 From def747087e83aa5f6a71582cfa71e18341988688 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 3 Apr 2015 15:31:32 +0200 Subject: timers/PM: Drop unnecessary braces from tick_freeze() Some braces in tick_freeze() are not necessary, so drop them. Signed-off-by: Rafael J. Wysocki Cc: peterz@infradead.org Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1534128.H5hN3KBFB4@vostro.rjw.lan Signed-off-by: Ingo Molnar --- kernel/time/tick-common.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index ad66a51ca4fa..3ae6afa1eb98 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -457,11 +457,10 @@ void tick_freeze(void) raw_spin_lock(&tick_freeze_lock); tick_freeze_depth++; - if (tick_freeze_depth == num_online_cpus()) { + if (tick_freeze_depth == num_online_cpus()) timekeeping_suspend(); - } else { + else tick_suspend_local(); - } raw_spin_unlock(&tick_freeze_lock); } -- cgit v1.2.3