From cb08e0353c249a27aed10c6f60a13871ae449d33 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 23 Jul 2017 00:03:43 +0200 Subject: PM / timekeeping: Print debug messages when requested The messages printed by tk_debug_account_sleep_time() are basically useful for system sleep debugging, so print them only when the other debug messages from the core suspend/hibernate code are enabled. While at it, make it clear that the messages from tk_debug_account_sleep_time() are about timekeeping suspend duration, because in general timekeeping may be suspeded and resumed for multiple times during one system suspend-resume cycle. Signed-off-by: Rafael J. Wysocki --- kernel/time/timekeeping_debug.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c index 38bc4d2208e8..0754cadfa9e6 100644 --- a/kernel/time/timekeeping_debug.c +++ b/kernel/time/timekeeping_debug.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "timekeeping_internal.h" @@ -75,7 +76,7 @@ void tk_debug_account_sleep_time(struct timespec64 *t) int bin = min(fls(t->tv_sec), NUM_BINS-1); sleep_time_bin[bin]++; - printk_deferred(KERN_INFO "Suspended for %lld.%03lu seconds\n", - (s64)t->tv_sec, t->tv_nsec / NSEC_PER_MSEC); + pm_deferred_pr_dbg("Timekeeping suspended for %lld.%03lu seconds\n", + (s64)t->tv_sec, t->tv_nsec / NSEC_PER_MSEC); } -- cgit v1.2.3 From 34f41c0316ed52b0b44542491d89278efdaa70e4 Mon Sep 17 00:00:00 2001 From: Matija Glavinic Pecotic Date: Tue, 1 Aug 2017 09:11:52 +0200 Subject: timers: Fix overflow in get_next_timer_interrupt For e.g. HZ=100, timer being 430 jiffies in the future, and 32 bit unsigned int, there is an overflow on unsigned int right-hand side of the expression which results with wrong values being returned. Type cast the multiplier to 64bit to avoid that issue. Fixes: 46c8f0b077a8 ("timers: Fix get_next_timer_interrupt() computation") Signed-off-by: Matija Glavinic Pecotic Signed-off-by: Thomas Gleixner Reviewed-by: Alexander Sverdlin Cc: khilman@baylibre.com Cc: akpm@linux-foundation.org Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/a7900f04-2a21-c9fd-67be-ab334d459ee5@nokia.com --- kernel/time/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 71ce3f4eead3..8f5d1bf18854 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1495,7 +1495,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) base->is_idle = false; } else { if (!is_max_delta) - expires = basem + (nextevt - basej) * TICK_NSEC; + expires = basem + (u64)(nextevt - basej) * TICK_NSEC; /* * If we expect to sleep more than a tick, mark the base idle: */ -- cgit v1.2.3 From a529bea8fa6b6dded6179c72d3385e0f7d0a4fde Mon Sep 17 00:00:00 2001 From: Stafford Horne Date: Wed, 28 Jun 2017 22:21:35 +0900 Subject: timekeeping: Use proper timekeeper for debug code When CONFIG_DEBUG_TIMEKEEPING is enabled the timekeeping_check_update() function will update status like last_warning and underflow_seen on the timekeeper. If there are issues found this state is used to rate limit the warnings that get printed. This rate limiting doesn't really really work if stored in real_tk as the shadow timekeeper is overwritten onto real_tk at the end of every update_wall_time() call, resetting last_warning and other statuses. Fix rate limiting by using the shadow_timekeeper for timekeeping_check_update(). Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Miroslav Lichvar Cc: Richard Cochran Cc: Prarit Bhargava Cc: Stephen Boyd Fixes: commit 57d05a93ada7 ("time: Rework debugging variables so they aren't global") Signed-off-by: Stafford Horne Signed-off-by: John Stultz --- kernel/time/timekeeping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index cedafa008de5..8f5866981883 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2066,7 +2066,7 @@ void update_wall_time(void) goto out; /* Do some additional sanity checking */ - timekeeping_check_update(real_tk, offset); + timekeeping_check_update(tk, offset); /* * With NO_HZ we may have to accumulate many cycle_intervals -- cgit v1.2.3 From 47b4a457e4cc816b3fdd2ee55c65fda8ea6de051 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 5 Jul 2017 14:08:35 +0200 Subject: alarmtimer: Fix unavailable wake-up source in sysfs Currently the alarmtimer registers a wake-up source unconditionally, regardless of the system having a (wake-up capable) RTC or not. Hence the alarmtimer will always show up in /sys/kernel/debug/wakeup_sources, even if it is not available, and thus cannot be a wake-up source. To fix this, postpone registration until a wake-up capable RTC device is added. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Miroslav Lichvar Cc: Richard Cochran Cc: Prarit Bhargava Cc: Stephen Boyd Signed-off-by: Geert Uytterhoeven Signed-off-by: John Stultz --- kernel/time/alarmtimer.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 0b8ff7d257ea..73a2b476e59f 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -56,9 +56,9 @@ static ktime_t freezer_delta; static DEFINE_SPINLOCK(freezer_delta_lock); #endif +#ifdef CONFIG_RTC_CLASS static struct wakeup_source *ws; -#ifdef CONFIG_RTC_CLASS /* rtc timer and device for setting alarm wakeups at suspend */ static struct rtc_timer rtctimer; static struct rtc_device *rtcdev; @@ -89,6 +89,7 @@ static int alarmtimer_rtc_add_device(struct device *dev, { unsigned long flags; struct rtc_device *rtc = to_rtc_device(dev); + struct wakeup_source *__ws; if (rtcdev) return -EBUSY; @@ -98,13 +99,20 @@ static int alarmtimer_rtc_add_device(struct device *dev, if (!device_may_wakeup(rtc->dev.parent)) return -1; + __ws = wakeup_source_register("alarmtimer"); + spin_lock_irqsave(&rtcdev_lock, flags); if (!rtcdev) { rtcdev = rtc; /* hold a reference so it doesn't go away */ get_device(dev); + ws = __ws; + __ws = NULL; } spin_unlock_irqrestore(&rtcdev_lock, flags); + + wakeup_source_unregister(__ws); + return 0; } @@ -860,7 +868,6 @@ static int __init alarmtimer_init(void) error = PTR_ERR(pdev); goto out_drv; } - ws = wakeup_source_register("alarmtimer"); return 0; out_drv: -- cgit v1.2.3 From 3cf294962df8fcde710eb5e762e0929e2ba49947 Mon Sep 17 00:00:00 2001 From: Krzysztof Opasiak Date: Wed, 5 Jul 2017 19:25:48 +0200 Subject: posix-cpu-timers: Use dedicated helper to access rlimit values Use rlimit() and rlimit_max() helper instead of manually writing whole chain from task to rlimit value Signed-off-by: Krzysztof Opasiak Signed-off-by: Thomas Gleixner Link: http://lkml.kernel.org/r/20170705172548.7911-1-k.opasiak@samsung.com --- kernel/time/posix-cpu-timers.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index a3bd5dbe0dc4..8585ad6e472a 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -799,7 +799,6 @@ static void check_thread_timers(struct task_struct *tsk, struct list_head *firing) { struct list_head *timers = tsk->cpu_timers; - struct signal_struct *const sig = tsk->signal; struct task_cputime *tsk_expires = &tsk->cputime_expires; u64 expires; unsigned long soft; @@ -823,10 +822,9 @@ static void check_thread_timers(struct task_struct *tsk, /* * Check for the special case thread timers. */ - soft = READ_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_cur); + soft = task_rlimit(tsk, RLIMIT_RTTIME); if (soft != RLIM_INFINITY) { - unsigned long hard = - READ_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_max); + unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME); if (hard != RLIM_INFINITY && tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) { @@ -847,7 +845,8 @@ static void check_thread_timers(struct task_struct *tsk, */ if (soft < hard) { soft += USEC_PER_SEC; - sig->rlim[RLIMIT_RTTIME].rlim_cur = soft; + tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur = + soft; } if (print_fatal_signals) { pr_info("RT Watchdog Timeout (soft): %s[%d]\n", @@ -938,11 +937,10 @@ static void check_process_timers(struct task_struct *tsk, SIGPROF); check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime, SIGVTALRM); - soft = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); + soft = task_rlimit(tsk, RLIMIT_CPU); if (soft != RLIM_INFINITY) { unsigned long psecs = div_u64(ptime, NSEC_PER_SEC); - unsigned long hard = - READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_max); + unsigned long hard = task_rlimit_max(tsk, RLIMIT_CPU); u64 x; if (psecs >= hard) { /* -- cgit v1.2.3 From 2fe59f507a65dbd734b990a11ebc7488f6f87a24 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 22 Aug 2017 18:43:48 +1000 Subject: timers: Fix excessive granularity of new timers after a nohz idle When a timer base is idle, it is forwarded when a new timer is added to ensure that granularity does not become excessive. When not idle, the timer tick is expected to increment the base. However there are several problems: - If an existing timer is modified, the base is forwarded only after the index is calculated. - The base is not forwarded by add_timer_on. - There is a window after a timer is restarted from a nohz idle, after it is marked not-idle and before the timer tick on this CPU, where a timer may be added but the ancient base does not get forwarded. These result in excessive granularity (a 1 jiffy timeout can blow out to 100s of jiffies), which cause the rcu lockup detector to trigger, among other things. Fix this by keeping track of whether the timer base has been idle since it was last run or forwarded, and if so then forward it before adding a new timer. There is still a case where mod_timer optimises the case of a pending timer mod with the same expiry time, where the timer can see excessive granularity relative to the new, shorter interval. A comment is added, but it's not changed because it is an important fastpath for networking. This has been tested and found to fix the RCU softlockup messages. Testing was also done with tracing to measure requested versus achieved wakeup latencies for all non-deferrable timers in an idle system (with no lockup watchdogs running). Wakeup latency relative to absolute latency is calculated (note this suffers from round-up skew at low absolute times) and analysed: max avg std upstream 506.0 1.20 4.68 patched 2.0 1.08 0.15 The bug was noticed due to the lockup detector Kconfig changes dropping it out of people's .configs and resulting in larger base clk skew When the lockup detectors are enabled, no CPU can go idle for longer than 4 seconds, which limits the granularity errors. Sub-optimal timer behaviour is observable on a smaller scale in that case: max avg std upstream 9.0 1.05 0.19 patched 2.0 1.04 0.11 Fixes: Fixes: a683f390b93f ("timers: Forward the wheel clock whenever possible") Signed-off-by: Nicholas Piggin Signed-off-by: Thomas Gleixner Tested-by: Jonathan Cameron Tested-by: David Miller Cc: dzickus@redhat.com Cc: sfr@canb.auug.org.au Cc: mpe@ellerman.id.au Cc: Stephen Boyd Cc: linuxarm@huawei.com Cc: abdhalee@linux.vnet.ibm.com Cc: John Stultz Cc: akpm@linux-foundation.org Cc: paulmck@linux.vnet.ibm.com Cc: torvalds@linux-foundation.org Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20170822084348.21436-1-npiggin@gmail.com --- kernel/time/timer.c | 50 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 9 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 8f5d1bf18854..f2674a056c26 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -203,6 +203,7 @@ struct timer_base { bool migration_enabled; bool nohz_active; bool is_idle; + bool must_forward_clk; DECLARE_BITMAP(pending_map, WHEEL_SIZE); struct hlist_head vectors[WHEEL_SIZE]; } ____cacheline_aligned; @@ -856,13 +857,19 @@ get_target_base(struct timer_base *base, unsigned tflags) static inline void forward_timer_base(struct timer_base *base) { - unsigned long jnow = READ_ONCE(jiffies); + unsigned long jnow; /* - * We only forward the base when it's idle and we have a delta between - * base clock and jiffies. + * We only forward the base when we are idle or have just come out of + * idle (must_forward_clk logic), and have a delta between base clock + * and jiffies. In the common case, run_timers will take care of it. */ - if (!base->is_idle || (long) (jnow - base->clk) < 2) + if (likely(!base->must_forward_clk)) + return; + + jnow = READ_ONCE(jiffies); + base->must_forward_clk = base->is_idle; + if ((long)(jnow - base->clk) < 2) return; /* @@ -938,6 +945,11 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) * same array bucket then just return: */ if (timer_pending(timer)) { + /* + * The downside of this optimization is that it can result in + * larger granularity than you would get from adding a new + * timer with this expiry. + */ if (timer->expires == expires) return 1; @@ -948,6 +960,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) * dequeue/enqueue dance. */ base = lock_timer_base(timer, &flags); + forward_timer_base(base); clk = base->clk; idx = calc_wheel_index(expires, clk); @@ -964,6 +977,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) } } else { base = lock_timer_base(timer, &flags); + forward_timer_base(base); } ret = detach_if_pending(timer, base, false); @@ -991,12 +1005,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) raw_spin_lock(&base->lock); WRITE_ONCE(timer->flags, (timer->flags & ~TIMER_BASEMASK) | base->cpu); + forward_timer_base(base); } } - /* Try to forward a stale timer base clock */ - forward_timer_base(base); - timer->expires = expires; /* * If 'idx' was calculated above and the base time did not advance @@ -1112,6 +1124,7 @@ void add_timer_on(struct timer_list *timer, int cpu) WRITE_ONCE(timer->flags, (timer->flags & ~TIMER_BASEMASK) | cpu); } + forward_timer_base(base); debug_activate(timer, timer->expires); internal_add_timer(base, timer); @@ -1497,10 +1510,16 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) if (!is_max_delta) expires = basem + (u64)(nextevt - basej) * TICK_NSEC; /* - * If we expect to sleep more than a tick, mark the base idle: + * If we expect to sleep more than a tick, mark the base idle. + * Also the tick is stopped so any added timer must forward + * the base clk itself to keep granularity small. This idle + * logic is only maintained for the BASE_STD base, deferrable + * timers may still see large granularity skew (by design). */ - if ((expires - basem) > TICK_NSEC) + if ((expires - basem) > TICK_NSEC) { + base->must_forward_clk = true; base->is_idle = true; + } } raw_spin_unlock(&base->lock); @@ -1611,6 +1630,19 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h) { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); + /* + * must_forward_clk must be cleared before running timers so that any + * timer functions that call mod_timer will not try to forward the + * base. idle trcking / clock forwarding logic is only used with + * BASE_STD timers. + * + * The deferrable base does not do idle tracking at all, so we do + * not forward it. This can result in very large variations in + * granularity for deferrable timers, but they can be deferred for + * long periods due to idle. + */ + base->must_forward_clk = false; + __run_timers(base); if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); -- cgit v1.2.3 From 0bcdc0987cce9880436b70836c6a92bb8e744fd1 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 25 Aug 2017 15:57:04 -0700 Subject: time: Fix ktime_get_raw() incorrect base accumulation In comqit fc6eead7c1e2 ("time: Clean up CLOCK_MONOTONIC_RAW time handling"), the following code got mistakenly added to the update of the raw timekeeper: /* Update the monotonic raw base */ seconds = tk->raw_sec; nsec = (u32)(tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift); tk->tkr_raw.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); Which adds the raw_sec value and the shifted down raw xtime_nsec to the base value. But the read function adds the shifted down tk->tkr_raw.xtime_nsec value another time, The result of this is that ktime_get_raw() users (which are all internal users) see the raw time move faster then it should (the rate at which can vary with the current size of tkr_raw.xtime_nsec), which has resulted in at least problems with graphics rendering performance. The change tried to match the monotonic base update logic: seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec); nsec = (u32) tk->wall_to_monotonic.tv_nsec; tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); Which adds the wall_to_monotonic.tv_nsec value, but not the tk->tkr_mono.xtime_nsec value to the base. To fix this, simplify the tkr_raw.base accumulation to only accumulate the raw_sec portion, and do not include the tkr_raw.xtime_nsec portion, which will be added at read time. Fixes: fc6eead7c1e2 ("time: Clean up CLOCK_MONOTONIC_RAW time handling") Reported-and-tested-by: Chris Wilson Signed-off-by: John Stultz Signed-off-by: Thomas Gleixner Cc: Prarit Bhargava Cc: Kevin Brodsky Cc: Richard Cochran Cc: Stephen Boyd Cc: Will Deacon Cc: Miroslav Lichvar Cc: Daniel Mentz Link: http://lkml.kernel.org/r/1503701824-1645-1-git-send-email-john.stultz@linaro.org --- kernel/time/timekeeping.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index cedafa008de5..7e7e61c00d61 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -637,9 +637,7 @@ static inline void tk_update_ktime_data(struct timekeeper *tk) tk->ktime_sec = seconds; /* Update the monotonic raw base */ - seconds = tk->raw_sec; - nsec = (u32)(tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift); - tk->tkr_raw.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); + tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC); } /* must hold timekeeper_lock */ -- cgit v1.2.3 From 51218298a25e6942957c5595f2abf130d47d5df9 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 21 Aug 2017 00:01:46 +0200 Subject: alarmtimer: Ensure RTC module is not unloaded When registering the rtc device to be used to handle alarm timers, get_device is used to ensure the device doesn't go away but the module can still be unloaded. Call try_module_get to ensure the rtc driver will not go away. Reported-and-tested-by: Michal Simek Signed-off-by: Alexandre Belloni Signed-off-by: Thomas Gleixner Acked-by: John Stultz Cc: Stephen Boyd Link: http://lkml.kernel.org/r/20170820220146.30969-1-alexandre.belloni@free-electrons.com --- kernel/time/alarmtimer.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 73a2b476e59f..ec09ce9a6012 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "posix-timers.h" @@ -103,6 +104,11 @@ static int alarmtimer_rtc_add_device(struct device *dev, spin_lock_irqsave(&rtcdev_lock, flags); if (!rtcdev) { + if (!try_module_get(rtc->owner)) { + spin_unlock_irqrestore(&rtcdev_lock, flags); + return -1; + } + rtcdev = rtc; /* hold a reference so it doesn't go away */ get_device(dev); -- cgit v1.2.3 From a2d818030135c293f878fbb772cf40e7a14c5acc Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Fri, 8 Sep 2017 16:17:19 -0700 Subject: drivers/pps: aesthetic tweaks to PPS-related content Collection of aesthetic adjustments to various PPS-related files, directories and Documentation, some quite minor just for the sake of consistency, including: * Updated example of pps device tree node (courtesy Rodolfo G.) * "PPS-API" -> "PPS API" * "pps_source_info_s" -> "pps_source_info" * "ktimer driver" -> "pps-ktimer driver" * "ppstest /dev/pps0" -> "ppstest /dev/pps1" to match example * Add missing PPS-related entries to MAINTAINERS file * Other trivialities Link: http://lkml.kernel.org/r/alpine.LFD.2.20.1708261048220.8106@localhost.localdomain Signed-off-by: Robert P. J. Day Acked-by: Rodolfo Giometti Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/timekeeping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 8ea4fb315719..2cafb49aa65e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2316,7 +2316,7 @@ void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts) raw_spin_unlock_irqrestore(&timekeeper_lock, flags); } EXPORT_SYMBOL(hardpps); -#endif +#endif /* CONFIG_NTP_PPS */ /** * xtime_update() - advances the timekeeping infrastructure -- cgit v1.2.3 From 5df32107f609c1f621bcdac0a685c23677ef671e Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Mon, 28 Aug 2017 08:21:53 -0400 Subject: timekeeping: Make fast accessors return 0 before timekeeping is initialized printk timestamps will be extended to include mono and boot time by using the fast timekeeping accessors ktime_get_mono|boot_fast_ns(). The functions can return garbage before timekeeping is initialized resulting in garbage timestamps. Initialize the fast timekeepers with dummy clocks which guarantee a 0 readout up to timekeeping_init(). Suggested-by: Peter Zijlstra Signed-off-by: Prarit Bhargava Signed-off-by: Thomas Gleixner Cc: Stephen Boyd Cc: John Stultz Link: http://lkml.kernel.org/r/1503922914-10660-2-git-send-email-prarit@redhat.com --- kernel/time/timekeeping.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 2cafb49aa65e..6a92794427c9 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -60,8 +60,27 @@ struct tk_fast { struct tk_read_base base[2]; }; -static struct tk_fast tk_fast_mono ____cacheline_aligned; -static struct tk_fast tk_fast_raw ____cacheline_aligned; +/* Suspend-time cycles value for halted fast timekeeper. */ +static u64 cycles_at_suspend; + +static u64 dummy_clock_read(struct clocksource *cs) +{ + return cycles_at_suspend; +} + +static struct clocksource dummy_clock = { + .read = dummy_clock_read, +}; + +static struct tk_fast tk_fast_mono ____cacheline_aligned = { + .base[0] = { .clock = &dummy_clock, }, + .base[1] = { .clock = &dummy_clock, }, +}; + +static struct tk_fast tk_fast_raw ____cacheline_aligned = { + .base[0] = { .clock = &dummy_clock, }, + .base[1] = { .clock = &dummy_clock, }, +}; /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; @@ -477,18 +496,6 @@ u64 notrace ktime_get_boot_fast_ns(void) } EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns); -/* Suspend-time cycles value for halted fast timekeeper. */ -static u64 cycles_at_suspend; - -static u64 dummy_clock_read(struct clocksource *cs) -{ - return cycles_at_suspend; -} - -static struct clocksource dummy_clock = { - .read = dummy_clock_read, -}; - /** * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource. * @tk: Timekeeper to snapshot. -- cgit v1.2.3 From 4c3711d7fb4763c63b2654f2d07cbe21ca5aadd4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Aug 2017 17:12:48 +0200 Subject: timekeeping: Provide NMI safe access to clock realtime The configurable printk timestamping wants access to clock realtime. Right now there is no ktime_get_real_fast_ns() accessor because reading the monotonic base and the realtime offset cannot be done atomically. Contrary to boot time this offset can change during runtime and cause half updated readouts. struct tk_read_base was fully packed when the fast timekeeper access was implemented. commit ceea5e3771ed ("time: Fix clock->read(clock) race around clocksource changes") removed the 'read' function pointer from the structure, but of course left the comment stale. So now the structure can fit a new 64bit member w/o violating the cache line constraints. Add real_base to tk_read_base and update it in the fast timekeeper update sequence. Implement an accessor which follows the same scheme as the accessor to clock monotonic, but uses the new real_base to access clock real time. The runtime overhead for updating real_base is minimal as it just adds two cache hot values and stores them into an already dirtied cache line along with the other fast timekeeper updates. Signed-off-by: Thomas Gleixner Cc: Prarit Bhargava Cc: John Stultz Cc: Peter Zijlstra Link: https://lkml.kernel.org/r/1505757060-2004-3-git-send-email-prarit@redhat.com --- kernel/time/timekeeping.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 6a92794427c9..8af77006e937 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -496,6 +496,39 @@ u64 notrace ktime_get_boot_fast_ns(void) } EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns); + +/* + * See comment for __ktime_get_fast_ns() vs. timestamp ordering + */ +static __always_inline u64 __ktime_get_real_fast_ns(struct tk_fast *tkf) +{ + struct tk_read_base *tkr; + unsigned int seq; + u64 now; + + do { + seq = raw_read_seqcount_latch(&tkf->seq); + tkr = tkf->base + (seq & 0x01); + now = ktime_to_ns(tkr->base_real); + + now += timekeeping_delta_to_ns(tkr, + clocksource_delta( + tk_clock_read(tkr), + tkr->cycle_last, + tkr->mask)); + } while (read_seqcount_retry(&tkf->seq, seq)); + + return now; +} + +/** + * ktime_get_real_fast_ns: - NMI safe and fast access to clock realtime. + */ +u64 ktime_get_real_fast_ns(void) +{ + return __ktime_get_real_fast_ns(&tk_fast_mono); +} + /** * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource. * @tk: Timekeeper to snapshot. @@ -514,6 +547,7 @@ static void halt_fast_timekeeper(struct timekeeper *tk) memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); cycles_at_suspend = tk_clock_read(tkr); tkr_dummy.clock = &dummy_clock; + tkr_dummy.base_real = tkr->base + tk->offs_real; update_fast_timekeeper(&tkr_dummy, &tk_fast_mono); tkr = &tk->tkr_raw; @@ -661,6 +695,7 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) update_vsyscall(tk); update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET); + tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real; update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono); update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw); -- cgit v1.2.3 From 58e1177b4cd10b0d358faf7d7ebb3779f98bc3ea Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 4 Oct 2017 16:26:55 -0700 Subject: timer: Convert schedule_timeout() to use from_timer() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new from_timer() helper and passing the timer pointer explicitly. Since this special timer is on the stack, it needs to have a wrapper structure to carry state once .data is eliminated. Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Cc: linux-mips@linux-mips.org Cc: Petr Mladek Cc: Benjamin Herrenschmidt Cc: Lai Jiangshan Cc: Sebastian Reichel Cc: Kalle Valo Cc: Paul Mackerras Cc: Pavel Machek Cc: linux1394-devel@lists.sourceforge.net Cc: Chris Metcalf Cc: linux-s390@vger.kernel.org Cc: linux-wireless@vger.kernel.org Cc: "James E.J. Bottomley" Cc: Wim Van Sebroeck Cc: Michael Ellerman Cc: Ursula Braun Cc: Geert Uytterhoeven Cc: Viresh Kumar Cc: Harish Patil Cc: Guenter Roeck Cc: Manish Chopra Cc: Len Brown Cc: Arnd Bergmann Cc: linux-pm@vger.kernel.org Cc: Heiko Carstens Cc: Tejun Heo Cc: Julian Wiedmann Cc: John Stultz Cc: Mark Gross Cc: "Rafael J. Wysocki" Cc: linux-watchdog@vger.kernel.org Cc: linux-scsi@vger.kernel.org Cc: "Martin K. Petersen" Cc: Greg Kroah-Hartman Cc: Stephen Boyd Cc: Oleg Nesterov Cc: Ralf Baechle Cc: Stefan Richter Cc: Michael Reed Cc: netdev@vger.kernel.org Cc: Martin Schwidefsky Cc: Andrew Morton Cc: linuxppc-dev@lists.ozlabs.org Cc: Sudip Mukherjee Link: https://lkml.kernel.org/r/1507159627-127660-2-git-send-email-keescook@chromium.org --- kernel/time/timer.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timer.c b/kernel/time/timer.c index f2674a056c26..38613ced2324 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1668,9 +1668,20 @@ void run_local_timers(void) raise_softirq(TIMER_SOFTIRQ); } -static void process_timeout(unsigned long __data) +/* + * Since schedule_timeout()'s timer is defined on the stack, it must store + * the target task on the stack as well. + */ +struct process_timer { + struct timer_list timer; + struct task_struct *task; +}; + +static void process_timeout(struct timer_list *t) { - wake_up_process((struct task_struct *)__data); + struct process_timer *timeout = from_timer(timeout, t, timer); + + wake_up_process(timeout->task); } /** @@ -1704,7 +1715,7 @@ static void process_timeout(unsigned long __data) */ signed long __sched schedule_timeout(signed long timeout) { - struct timer_list timer; + struct process_timer timer; unsigned long expire; switch (timeout) @@ -1738,13 +1749,14 @@ signed long __sched schedule_timeout(signed long timeout) expire = timeout + jiffies; - setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); - __mod_timer(&timer, expire, false); + timer.task = current; + timer_setup_on_stack(&timer.timer, process_timeout, 0); + __mod_timer(&timer.timer, expire, false); schedule(); - del_singleshot_timer_sync(&timer); + del_singleshot_timer_sync(&timer.timer); /* Remove the timer from the object tracker */ - destroy_timer_on_stack(&timer); + destroy_timer_on_stack(&timer.timer); timeout = expire - jiffies; -- cgit v1.2.3 From 62cb1188ed86a9cf082fd2f757d4dd9b54741f24 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 29 Aug 2017 15:07:54 +0200 Subject: sched/idle: Move quiet_vmstate() into the NOHZ code quiet_vmstat() is an expensive function that only makes sense when we go into NOHZ. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: aubrey.li@linux.intel.com Cc: cl@linux.com Cc: fweisbec@gmail.com Signed-off-by: Ingo Molnar --- kernel/time/tick-sched.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index c7a899c5ce64..7b258c59d78a 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -787,6 +788,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, if (!ts->tick_stopped) { calc_load_nohz_start(); cpu_load_update_nohz_start(); + quiet_vmstat(); ts->last_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; -- cgit v1.2.3 From fe460423438b62eb7440d994ab19a9f444e6280d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 13 Oct 2017 20:29:38 +0200 Subject: posix-stubs: Use get_timespec64() and put_timespec64() This is a follow-up to commit 5c4994102fb5 ("posix-timers: Use get_timespec64() and put_timespec64()"), which left two system call using copy_from_user()/copy_to_user(). Change them as well for consistency. Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Acked-by: Nicolas Pitre Cc: y2038@lists.linaro.org Cc: John Stultz Cc: Al Viro Cc: Deepa Dinamani Link: https://lkml.kernel.org/r/20171013183009.3442318-1-arnd@arndb.de --- kernel/time/posix-stubs.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c index 06f34feb635e..b258bee13b02 100644 --- a/kernel/time/posix-stubs.c +++ b/kernel/time/posix-stubs.c @@ -117,8 +117,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, const struct timespec __user *, rqtp, struct timespec __user *, rmtp) { - struct timespec64 t64; - struct timespec t; + struct timespec64 t; switch (which_clock) { case CLOCK_REALTIME: @@ -129,16 +128,15 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, return -EINVAL; } - if (copy_from_user(&t, rqtp, sizeof (struct timespec))) + if (get_timespec64(&t, rqtp)) return -EFAULT; - t64 = timespec_to_timespec64(t); - if (!timespec64_valid(&t64)) + if (!timespec64_valid(&t)) return -EINVAL; if (flags & TIMER_ABSTIME) rmtp = NULL; current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE; current->restart_block.nanosleep.rmtp = rmtp; - return hrtimer_nanosleep(&t64, flags & TIMER_ABSTIME ? + return hrtimer_nanosleep(&t, flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL, which_clock); } @@ -203,8 +201,7 @@ COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags, struct compat_timespec __user *, rqtp, struct compat_timespec __user *, rmtp) { - struct timespec64 t64; - struct timespec t; + struct timespec64 t; switch (which_clock) { case CLOCK_REALTIME: @@ -215,16 +212,15 @@ COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags, return -EINVAL; } - if (compat_get_timespec(&t, rqtp)) + if (compat_get_timespec64(&t, rqtp)) return -EFAULT; - t64 = timespec_to_timespec64(t); - if (!timespec64_valid(&t64)) + if (!timespec64_valid(&t)) return -EINVAL; if (flags & TIMER_ABSTIME) rmtp = NULL; current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE; current->restart_block.nanosleep.compat_rmtp = rmtp; - return hrtimer_nanosleep(&t64, flags & TIMER_ABSTIME ? + return hrtimer_nanosleep(&t, flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL, which_clock); } -- cgit v1.2.3 From 4eb1bca1793385b8caff4b2e1f19b31a013dd1ec Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 13 Oct 2017 20:34:35 +0200 Subject: time: Use do_settimeofday64() internally do_settimeofday() is a wrapper around do_settimeofday64(), so that function can be called directly. The wrapper can be removed once the last user is gone. Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Cc: y2038@lists.linaro.org Cc: Frederic Weisbecker Cc: Stephen Boyd Cc: John Stultz Cc: Al Viro Cc: Deepa Dinamani Link: https://lkml.kernel.org/r/20171013183452.3635956-1-arnd@arndb.de --- kernel/time/time.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/time.c b/kernel/time/time.c index 44a8c1402133..cfe3d3e4679f 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -82,7 +82,7 @@ SYSCALL_DEFINE1(time, time_t __user *, tloc) SYSCALL_DEFINE1(stime, time_t __user *, tptr) { - struct timespec tv; + struct timespec64 tv; int err; if (get_user(tv.tv_sec, tptr)) @@ -90,11 +90,11 @@ SYSCALL_DEFINE1(stime, time_t __user *, tptr) tv.tv_nsec = 0; - err = security_settime(&tv, NULL); + err = security_settime64(&tv, NULL); if (err) return err; - do_settimeofday(&tv); + do_settimeofday64(&tv); return 0; } @@ -122,7 +122,7 @@ COMPAT_SYSCALL_DEFINE1(time, compat_time_t __user *, tloc) COMPAT_SYSCALL_DEFINE1(stime, compat_time_t __user *, tptr) { - struct timespec tv; + struct timespec64 tv; int err; if (get_user(tv.tv_sec, tptr)) @@ -130,11 +130,11 @@ COMPAT_SYSCALL_DEFINE1(stime, compat_time_t __user *, tptr) tv.tv_nsec = 0; - err = security_settime(&tv, NULL); + err = security_settime64(&tv, NULL); if (err) return err; - do_settimeofday(&tv); + do_settimeofday64(&tv); return 0; } -- cgit v1.2.3 From c310ce4dcb9df9b2f1be82caff7dae609fe53f72 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Sun, 8 Oct 2017 20:55:59 -0700 Subject: timers: Avoid an unnecessary iteration in __run_timers() If the base clock is behind jiffies in the soft irq expiry code then the next timer is retrieved by get_next_timer_interrupt() to avoid incrementing base clock one by one. If the next timer interrupt is past current jiffies then the base clock is set to jiffies - 1. At the call site this is incremented and another iteration through the expiry loop is executed which checks empty hash buckets. That's a pointless excercise because it's already known that the next timer is past jiffies. Set the base clock in that case to jiffies directly so it gets incremented to jiffies + 1 at the call site resulting in immediate termination of the expiry loop. [ tglx: Massaged changelog and added comment to the code ] Signed-off-by: Zhenzhong Duan Signed-off-by: Thomas Gleixner Acked-by: Anna-Maria Gleixner Cc: Joe Jin Cc: sboyd@codeaurora.org Cc: Srinivas Reddy Eeda Cc: john.stultz@linaro.org Link: https://lkml.kernel.org/r/7086a857-f90c-4616-bbe8-f7696f21626c@default --- kernel/time/timer.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 38613ced2324..ee1a88d8afb2 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1560,8 +1560,11 @@ static int collect_expired_timers(struct timer_base *base, * jiffies, otherwise forward to the next expiry time: */ if (time_after(next, jiffies)) { - /* The call site will increment clock! */ - base->clk = jiffies - 1; + /* + * The call site will increment base->clk and then + * terminate the expiry loop immediately. + */ + base->clk = jiffies; return 0; } base->clk = next; -- cgit v1.2.3 From ba16490eac146ebb178017e5de3d61c645552fab Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 18 Oct 2017 16:10:19 +0200 Subject: timer: Convert stub timer to timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Signed-off-by: Thomas Gleixner Cc: Kees Cook --- kernel/time/timer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timer.c b/kernel/time/timer.c index ee1a88d8afb2..fbb1f85327bf 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -610,7 +610,7 @@ static bool timer_fixup_init(void *addr, enum debug_obj_state state) } /* Stub timer callback for improperly used timers. */ -static void stub_timer(unsigned long data) +static void stub_timer(struct timer_list *unused) { WARN_ON(1); } @@ -626,7 +626,7 @@ static bool timer_fixup_activate(void *addr, enum debug_obj_state state) switch (state) { case ODEBUG_STATE_NOTAVAILABLE: - setup_timer(timer, stub_timer, 0); + timer_setup(timer, stub_timer, 0); return true; case ODEBUG_STATE_ACTIVE: @@ -665,7 +665,7 @@ static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state) switch (state) { case ODEBUG_STATE_NOTAVAILABLE: - setup_timer(timer, stub_timer, 0); + timer_setup(timer, stub_timer, 0); return true; default: return false; -- cgit v1.2.3 From 3a29ddb1c5986a6d3f941bfb1f434105203ce7f6 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 19 Oct 2017 15:17:23 +0100 Subject: clockevents: Retry programming min delta up to 10 times When CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=n, the call path hrtimer_reprogram -> clockevents_program_event -> clockevents_program_min_delta will not retry if the clock event driver returns -ETIME. If the driver could not satisfy the program_min_delta for any reason, the lack of a retry means the CPU may not receive a tick interrupt, potentially until the counter does a full period. This leads to rcu_sched timeout messages as the stalled CPU is detected by other CPUs, and other issues if the CPU is holding locks or other resources at the point at which it stalls. There have been a couple of observed mechanisms through which a clock event driver could not satisfy the requested min_delta and return -ETIME. With the MIPS GIC driver, the shared execution resource within MT cores means inconventient latency due to execution of instructions from other hardware threads in the core, within gic_next_event, can result in an event being set in the past. Additionally under virtualisation it is possible to get unexpected latency during a clockevent device's set_next_event() callback which can make it return -ETIME even for a delta based on min_delta_ns. It isn't appropriate to use MIN_ADJUST in the virtualisation case as occasional hypervisor induced high latency will cause min_delta_ns to quickly increase to the maximum. Instead, borrow the retry pattern from the MIN_ADJUST case, but without making adjustments. Retry up to 10 times, each time increasing the attempted delta by min_delta, before giving up. [ Matt: Reworked the loop and made retry increase the delta. ] Signed-off-by: James Hogan Signed-off-by: Matt Redfearn Signed-off-by: Thomas Gleixner Cc: linux-mips@linux-mips.org Cc: Daniel Lezcano Cc: "Martin Schwidefsky" Cc: James Hogan Link: https://lkml.kernel.org/r/1508422643-6075-1-git-send-email-matt.redfearn@mips.com --- kernel/time/clockevents.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 4237e0744e26..16c027e9cc73 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -280,17 +280,22 @@ static int clockevents_program_min_delta(struct clock_event_device *dev) static int clockevents_program_min_delta(struct clock_event_device *dev) { unsigned long long clc; - int64_t delta; + int64_t delta = 0; + int i; - delta = dev->min_delta_ns; - dev->next_event = ktime_add_ns(ktime_get(), delta); + for (i = 0; i < 10; i++) { + delta += dev->min_delta_ns; + dev->next_event = ktime_add_ns(ktime_get(), delta); - if (clockevent_state_shutdown(dev)) - return 0; + if (clockevent_state_shutdown(dev)) + return 0; - dev->retries++; - clc = ((unsigned long long) delta * dev->mult) >> dev->shift; - return dev->set_next_event((unsigned long) clc, dev); + dev->retries++; + clc = ((unsigned long long) delta * dev->mult) >> dev->shift; + if (dev->set_next_event((unsigned long) clc, dev) == 0) + return 0; + } + return -ETIME; } #endif /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */ -- cgit v1.2.3 From 7863406143d8bbbbda07a61285c5f4c217908dfd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:28 +0200 Subject: sched/isolation: Move housekeeping related code to its own file The housekeeping code is currently tied to the NOHZ code. As we are planning to make housekeeping independent from it, start with moving the relevant code to its own file. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Acked-by: Paul E. McKenney Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-2-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- kernel/time/tick-sched.c | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 7b258c59d78a..27d7d522ac4e 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -166,7 +166,6 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) #ifdef CONFIG_NO_HZ_FULL cpumask_var_t tick_nohz_full_mask; -cpumask_var_t housekeeping_mask; bool tick_nohz_full_running; static atomic_t tick_dep_mask; @@ -438,13 +437,6 @@ void __init tick_nohz_init(void) return; } - if (!alloc_cpumask_var(&housekeeping_mask, GFP_KERNEL)) { - WARN(1, "NO_HZ: Can't allocate not-full dynticks cpumask\n"); - cpumask_clear(tick_nohz_full_mask); - tick_nohz_full_running = false; - return; - } - /* * Full dynticks uses irq work to drive the tick rescheduling on safe * locking contexts. But then we need irq work to raise its own @@ -453,7 +445,6 @@ void __init tick_nohz_init(void) if (!arch_irq_work_has_interrupt()) { pr_warn("NO_HZ: Can't run full dynticks because arch doesn't support irq work self-IPIs\n"); cpumask_clear(tick_nohz_full_mask); - cpumask_copy(housekeeping_mask, cpu_possible_mask); tick_nohz_full_running = false; return; } @@ -466,9 +457,6 @@ void __init tick_nohz_init(void) cpumask_clear_cpu(cpu, tick_nohz_full_mask); } - cpumask_andnot(housekeeping_mask, - cpu_possible_mask, tick_nohz_full_mask); - for_each_cpu(cpu, tick_nohz_full_mask) context_tracking_cpu_set(cpu); @@ -478,12 +466,6 @@ void __init tick_nohz_init(void) WARN_ON(ret < 0); pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n", cpumask_pr_args(tick_nohz_full_mask)); - - /* - * We need at least one CPU to handle housekeeping work such - * as timekeeping, unbound timers, workqueues, ... - */ - WARN_ON_ONCE(cpumask_empty(housekeeping_mask)); } #endif -- cgit v1.2.3 From 6f1982fedd59856bcc42a9b521be4c3ffd2f60a7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:36 +0200 Subject: sched/isolation: Handle the nohz_full= parameter We want to centralize the isolation management, done by the housekeeping subsystem. Therefore we need to handle the nohz_full= parameter from there. Since nohz_full= so far has involved unbound timers, watchdog, RCU and tilegx NAPI isolation, we keep that default behaviour. nohz_full= will be deprecated in the future. We want to control the isolation features from the isolcpus= parameter. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-10-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- kernel/time/tick-sched.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 27d7d522ac4e..69f3dbe38984 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -385,20 +385,13 @@ out: local_irq_restore(flags); } -/* Parse the boot-time nohz CPU list from the kernel parameters. */ -static int __init tick_nohz_full_setup(char *str) +/* Get the boot-time nohz CPU list from the kernel parameters. */ +void __init tick_nohz_full_setup(cpumask_var_t cpumask) { alloc_bootmem_cpumask_var(&tick_nohz_full_mask); - if (cpulist_parse(str, tick_nohz_full_mask) < 0) { - pr_warn("NO_HZ: Incorrect nohz_full cpumask\n"); - free_bootmem_cpumask_var(tick_nohz_full_mask); - return 1; - } + cpumask_copy(tick_nohz_full_mask, cpumask); tick_nohz_full_running = true; - - return 1; } -__setup("nohz_full=", tick_nohz_full_setup); static int tick_nohz_cpu_down(unsigned int cpu) { -- cgit v1.2.3 From 0f295b0650c90362b4111f46d7f9149a0a4191be Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 13 Oct 2017 11:54:33 -0600 Subject: rtc: Allow rtc drivers to specify the tv_nsec value for ntp ntp is currently hardwired to try and call the rtc set when wall clock tv_nsec is 0.5 seconds. This historical behaviour works well with certain PC RTCs, but is not universal to all rtc hardware. Change how this works by introducing the driver specific concept of set_offset_nsec, the delay between current wall clock time and the target time to set (with a 0 tv_nsecs). For x86-style CMOS set_offset_nsec should be -0.5 s which causes the last second to be written 0.5 s after it has started. For compat with the old rtc_set_ntp_time, the value is defaulted to + 0.5 s, which causes the next second to be written 0.5s before it starts, as things were before this patch. Testing shows many non-x86 RTCs would like set_offset_nsec ~= 0, so ultimately each RTC driver should set the set_offset_nsec according to its needs, and non x86 architectures should stop using update_persistent_clock64 in order to access this feature. Future patches will revise the drivers as needed. Since CMOS and RTC now have very different handling they are split into two dedicated code paths, sharing the support code, and ifdefs are replaced with IS_ENABLED. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Miroslav Lichvar Cc: Richard Cochran Cc: Prarit Bhargava Cc: Stephen Boyd Signed-off-by: Jason Gunthorpe Signed-off-by: John Stultz --- kernel/time/ntp.c | 166 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 113 insertions(+), 53 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index edf19cc53140..bc19de1a0683 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -492,6 +492,67 @@ out: return leap; } +static void sync_hw_clock(struct work_struct *work); +static DECLARE_DELAYED_WORK(sync_work, sync_hw_clock); + +static void sched_sync_hw_clock(struct timespec64 now, + unsigned long target_nsec, bool fail) + +{ + struct timespec64 next; + + getnstimeofday64(&next); + if (!fail) + next.tv_sec = 659; + else { + /* + * Try again as soon as possible. Delaying long periods + * decreases the accuracy of the work queue timer. Due to this + * the algorithm is very likely to require a short-sleep retry + * after the above long sleep to synchronize ts_nsec. + */ + next.tv_sec = 0; + } + + /* Compute the needed delay that will get to tv_nsec == target_nsec */ + next.tv_nsec = target_nsec - next.tv_nsec; + if (next.tv_nsec <= 0) + next.tv_nsec += NSEC_PER_SEC; + if (next.tv_nsec >= NSEC_PER_SEC) { + next.tv_sec++; + next.tv_nsec -= NSEC_PER_SEC; + } + + queue_delayed_work(system_power_efficient_wq, &sync_work, + timespec64_to_jiffies(&next)); +} + +static void sync_rtc_clock(void) +{ + unsigned long target_nsec; + struct timespec64 adjust, now; + int rc; + + if (!IS_ENABLED(CONFIG_RTC_SYSTOHC)) + return; + + getnstimeofday64(&now); + + adjust = now; + if (persistent_clock_is_local) + adjust.tv_sec -= (sys_tz.tz_minuteswest * 60); + + /* + * The current RTC in use will provide the target_nsec it wants to be + * called at, and does rtc_tv_nsec_ok internally. + */ + rc = rtc_set_ntp_time(adjust, &target_nsec); + if (rc == -ENODEV) + return; + + sched_sync_hw_clock(now, target_nsec, rc); +} + #ifdef CONFIG_GENERIC_CMOS_UPDATE int __weak update_persistent_clock(struct timespec now) { @@ -507,76 +568,75 @@ int __weak update_persistent_clock64(struct timespec64 now64) } #endif -#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) -static void sync_cmos_clock(struct work_struct *work); - -static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock); - -static void sync_cmos_clock(struct work_struct *work) +static bool sync_cmos_clock(void) { + static bool no_cmos; struct timespec64 now; - struct timespec64 next; - int fail = 1; + struct timespec64 adjust; + int rc = -EPROTO; + long target_nsec = NSEC_PER_SEC / 2; + + if (!IS_ENABLED(CONFIG_GENERIC_CMOS_UPDATE)) + return false; + + if (no_cmos) + return false; /* - * If we have an externally synchronized Linux clock, then update - * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be - * called as close as possible to 500 ms before the new second starts. - * This code is run on a timer. If the clock is set, that timer - * may not expire at the correct time. Thus, we adjust... - * We want the clock to be within a couple of ticks from the target. + * Historically update_persistent_clock64() has followed x86 + * semantics, which match the MC146818A/etc RTC. This RTC will store + * 'adjust' and then in .5s it will advance once second. + * + * Architectures are strongly encouraged to use rtclib and not + * implement this legacy API. */ - if (!ntp_synced()) { - /* - * Not synced, exit, do not restart a timer (if one is - * running, let it run out). - */ - return; - } - getnstimeofday64(&now); - if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec * 5) { - struct timespec64 adjust = now; - - fail = -ENODEV; + if (rtc_tv_nsec_ok(-1 * target_nsec, &adjust, &now)) { if (persistent_clock_is_local) adjust.tv_sec -= (sys_tz.tz_minuteswest * 60); -#ifdef CONFIG_GENERIC_CMOS_UPDATE - fail = update_persistent_clock64(adjust); -#endif - -#ifdef CONFIG_RTC_SYSTOHC - if (fail == -ENODEV) - fail = rtc_set_ntp_time(adjust); -#endif + rc = update_persistent_clock64(adjust); + /* + * The machine does not support update_persistent_clock64 even + * though it defines CONFIG_GENERIC_CMOS_UPDATE. + */ + if (rc == -ENODEV) { + no_cmos = true; + return false; + } } - next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec - (TICK_NSEC / 2); - if (next.tv_nsec <= 0) - next.tv_nsec += NSEC_PER_SEC; + sched_sync_hw_clock(now, target_nsec, rc); + return true; +} - if (!fail || fail == -ENODEV) - next.tv_sec = 659; - else - next.tv_sec = 0; +/* + * If we have an externally synchronized Linux clock, then update RTC clock + * accordingly every ~11 minutes. Generally RTCs can only store second + * precision, but many RTCs will adjust the phase of their second tick to + * match the moment of update. This infrastructure arranges to call to the RTC + * set at the correct moment to phase synchronize the RTC second tick over + * with the kernel clock. + */ +static void sync_hw_clock(struct work_struct *work) +{ + if (!ntp_synced()) + return; - if (next.tv_nsec >= NSEC_PER_SEC) { - next.tv_sec++; - next.tv_nsec -= NSEC_PER_SEC; - } - queue_delayed_work(system_power_efficient_wq, - &sync_cmos_work, timespec64_to_jiffies(&next)); + if (sync_cmos_clock()) + return; + + sync_rtc_clock(); } void ntp_notify_cmos_timer(void) { - queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0); -} - -#else -void ntp_notify_cmos_timer(void) { } -#endif + if (!ntp_synced()) + return; + if (IS_ENABLED(CONFIG_GENERIC_CMOS_UPDATE) || + IS_ENABLED(CONFIG_RTC_SYSTOHC)) + queue_delayed_work(system_power_efficient_wq, &sync_work, 0); +} /* * Propagate a new txc->status value into the NTP state: -- cgit v1.2.3 From e0956dcc4ba74ec4b17e32fc9a156fcba1ef6610 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 19 Oct 2017 13:14:44 +0200 Subject: timekeeping: Consolidate timekeeping_inject_offset code The code to check the adjtimex() or clock_adjtime() arguments is spread out across multiple files for presumably only historic reasons. As a preparatation for a rework to get rid of the use of 'struct timeval' and 'struct timespec' in there, this moves all the portions into kernel/time/timekeeping.c and marks them as 'static'. The warp_clock() function here is not as closely related as the others, but I feel it still makes sense to move it here in order to consolidate all callers of timekeeping_inject_offset(). Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Miroslav Lichvar Cc: Richard Cochran Cc: Prarit Bhargava Cc: Stephen Boyd Signed-off-by: Arnd Bergmann [jstultz: Whitespace fixup] Signed-off-by: John Stultz --- kernel/time/ntp.c | 61 ---------------------- kernel/time/ntp_internal.h | 1 - kernel/time/time.c | 36 +------------ kernel/time/timekeeping.c | 123 ++++++++++++++++++++++++++++++++++++++++++++- kernel/time/timekeeping.h | 2 +- 5 files changed, 123 insertions(+), 100 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index bc19de1a0683..90f84582a076 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -713,67 +713,6 @@ static inline void process_adjtimex_modes(struct timex *txc, } - -/** - * ntp_validate_timex - Ensures the timex is ok for use in do_adjtimex - */ -int ntp_validate_timex(struct timex *txc) -{ - if (txc->modes & ADJ_ADJTIME) { - /* singleshot must not be used with any other mode bits */ - if (!(txc->modes & ADJ_OFFSET_SINGLESHOT)) - return -EINVAL; - if (!(txc->modes & ADJ_OFFSET_READONLY) && - !capable(CAP_SYS_TIME)) - return -EPERM; - } else { - /* In order to modify anything, you gotta be super-user! */ - if (txc->modes && !capable(CAP_SYS_TIME)) - return -EPERM; - /* - * if the quartz is off by more than 10% then - * something is VERY wrong! - */ - if (txc->modes & ADJ_TICK && - (txc->tick < 900000/USER_HZ || - txc->tick > 1100000/USER_HZ)) - return -EINVAL; - } - - if (txc->modes & ADJ_SETOFFSET) { - /* In order to inject time, you gotta be super-user! */ - if (!capable(CAP_SYS_TIME)) - return -EPERM; - - if (txc->modes & ADJ_NANO) { - struct timespec ts; - - ts.tv_sec = txc->time.tv_sec; - ts.tv_nsec = txc->time.tv_usec; - if (!timespec_inject_offset_valid(&ts)) - return -EINVAL; - - } else { - if (!timeval_inject_offset_valid(&txc->time)) - return -EINVAL; - } - } - - /* - * Check for potential multiplication overflows that can - * only happen on 64-bit systems: - */ - if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) { - if (LLONG_MIN / PPM_SCALE > txc->freq) - return -EINVAL; - if (LLONG_MAX / PPM_SCALE < txc->freq) - return -EINVAL; - } - - return 0; -} - - /* * adjtimex mainly allows reading (and writing, if superuser) of * kernel time-keeping variables. used by xntpd. diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h index d8a7c11fa71a..74b52cd48209 100644 --- a/kernel/time/ntp_internal.h +++ b/kernel/time/ntp_internal.h @@ -7,7 +7,6 @@ extern void ntp_clear(void); extern u64 ntp_tick_length(void); extern ktime_t ntp_get_next_leap(void); extern int second_overflow(time64_t secs); -extern int ntp_validate_timex(struct timex *); extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *); extern void __hardpps(const struct timespec64 *, const struct timespec64 *); #endif /* _LINUX_NTP_INTERNAL_H */ diff --git a/kernel/time/time.c b/kernel/time/time.c index 44a8c1402133..04684e294f00 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -157,40 +157,6 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv, return 0; } -/* - * Indicates if there is an offset between the system clock and the hardware - * clock/persistent clock/rtc. - */ -int persistent_clock_is_local; - -/* - * Adjust the time obtained from the CMOS to be UTC time instead of - * local time. - * - * This is ugly, but preferable to the alternatives. Otherwise we - * would either need to write a program to do it in /etc/rc (and risk - * confusion if the program gets run more than once; it would also be - * hard to make the program warp the clock precisely n hours) or - * compile in the timezone information into the kernel. Bad, bad.... - * - * - TYT, 1992-01-01 - * - * The best thing to do is to keep the CMOS clock in universal time (UTC) - * as real UNIX machines always do it. This avoids all headaches about - * daylight saving times and warping kernel clocks. - */ -static inline void warp_clock(void) -{ - if (sys_tz.tz_minuteswest != 0) { - struct timespec adjust; - - persistent_clock_is_local = 1; - adjust.tv_sec = sys_tz.tz_minuteswest * 60; - adjust.tv_nsec = 0; - timekeeping_inject_offset(&adjust); - } -} - /* * In case for some reason the CMOS clock has not already been running * in UTC, but in some local time: The first time we set the timezone, @@ -224,7 +190,7 @@ int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz if (firsttime) { firsttime = 0; if (!tv) - warp_clock(); + timekeeping_warp_clock(); } } if (tv) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 2cafb49aa65e..7d8e0e842484 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1258,13 +1258,39 @@ out: } EXPORT_SYMBOL(do_settimeofday64); +/* + * Validates if a timespec/timeval used to inject a time offset is valid. + * Offsets can be postive or negative. The value of the timeval/timespec + * is the sum of its fields, but *NOTE*: the field tv_usec/tv_nsec must + * always be non-negative. + */ +static inline bool timeval_inject_offset_valid(const struct timeval *tv) +{ + /* We don't check the tv_sec as it can be positive or negative */ + + /* Can't have more microseconds then a second */ + if (tv->tv_usec < 0 || tv->tv_usec >= USEC_PER_SEC) + return false; + return true; +} + +static inline bool timespec_inject_offset_valid(const struct timespec *ts) +{ + /* We don't check the tv_sec as it can be positive or negative */ + + /* Can't have more nanoseconds then a second */ + if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC) + return false; + return true; +} + /** * timekeeping_inject_offset - Adds or subtracts from the current time. * @tv: pointer to the timespec variable containing the offset * * Adds or subtracts an offset value from the current time. */ -int timekeeping_inject_offset(struct timespec *ts) +static int timekeeping_inject_offset(struct timespec *ts) { struct timekeeper *tk = &tk_core.timekeeper; unsigned long flags; @@ -1303,7 +1329,40 @@ error: /* even if we error out, we forwarded the time, so call update */ return ret; } -EXPORT_SYMBOL(timekeeping_inject_offset); + +/* + * Indicates if there is an offset between the system clock and the hardware + * clock/persistent clock/rtc. + */ +int persistent_clock_is_local; + +/* + * Adjust the time obtained from the CMOS to be UTC time instead of + * local time. + * + * This is ugly, but preferable to the alternatives. Otherwise we + * would either need to write a program to do it in /etc/rc (and risk + * confusion if the program gets run more than once; it would also be + * hard to make the program warp the clock precisely n hours) or + * compile in the timezone information into the kernel. Bad, bad.... + * + * - TYT, 1992-01-01 + * + * The best thing to do is to keep the CMOS clock in universal time (UTC) + * as real UNIX machines always do it. This avoids all headaches about + * daylight saving times and warping kernel clocks. + */ +void timekeeping_warp_clock(void) +{ + if (sys_tz.tz_minuteswest != 0) { + struct timespec adjust; + + persistent_clock_is_local = 1; + adjust.tv_sec = sys_tz.tz_minuteswest * 60; + adjust.tv_nsec = 0; + timekeeping_inject_offset(&adjust); + } +} /** * __timekeeping_set_tai_offset - Sets the TAI offset from UTC and monotonic @@ -2247,6 +2306,66 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real, return base; } +/** + * ntp_validate_timex - Ensures the timex is ok for use in do_adjtimex + */ +static int ntp_validate_timex(struct timex *txc) +{ + if (txc->modes & ADJ_ADJTIME) { + /* singleshot must not be used with any other mode bits */ + if (!(txc->modes & ADJ_OFFSET_SINGLESHOT)) + return -EINVAL; + if (!(txc->modes & ADJ_OFFSET_READONLY) && + !capable(CAP_SYS_TIME)) + return -EPERM; + } else { + /* In order to modify anything, you gotta be super-user! */ + if (txc->modes && !capable(CAP_SYS_TIME)) + return -EPERM; + /* + * if the quartz is off by more than 10% then + * something is VERY wrong! + */ + if (txc->modes & ADJ_TICK && + (txc->tick < 900000/USER_HZ || + txc->tick > 1100000/USER_HZ)) + return -EINVAL; + } + + if (txc->modes & ADJ_SETOFFSET) { + /* In order to inject time, you gotta be super-user! */ + if (!capable(CAP_SYS_TIME)) + return -EPERM; + + if (txc->modes & ADJ_NANO) { + struct timespec ts; + + ts.tv_sec = txc->time.tv_sec; + ts.tv_nsec = txc->time.tv_usec; + if (!timespec_inject_offset_valid(&ts)) + return -EINVAL; + + } else { + if (!timeval_inject_offset_valid(&txc->time)) + return -EINVAL; + } + } + + /* + * Check for potential multiplication overflows that can + * only happen on 64-bit systems: + */ + if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) { + if (LLONG_MIN / PPM_SCALE > txc->freq) + return -EINVAL; + if (LLONG_MAX / PPM_SCALE < txc->freq) + return -EINVAL; + } + + return 0; +} + + /** * do_adjtimex() - Accessor function to NTP __do_adjtimex function */ diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h index d0914676d4c5..44aec7893cdd 100644 --- a/kernel/time/timekeeping.h +++ b/kernel/time/timekeeping.h @@ -10,7 +10,7 @@ extern ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, extern int timekeeping_valid_for_hres(void); extern u64 timekeeping_max_deferment(void); -extern int timekeeping_inject_offset(struct timespec *ts); +extern void timekeeping_warp_clock(void); extern int timekeeping_suspend(void); extern void timekeeping_resume(void); -- cgit v1.2.3 From 1572fa03784831b81ec26ec379374cf6bdec04fb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 19 Oct 2017 13:14:45 +0200 Subject: timekeeping: Use timespec64 in timekeeping_inject_offset As part of changing all the timekeeping code to use 64-bit time_t consistently, this removes the uses of timeval and timespec as much as possible from do_adjtimex() and timekeeping_inject_offset(). The timeval_inject_offset_valid() and timespec_inject_offset_valid() just complicate this, so I'm folding them into the respective callers. This leaves the actual 'struct timex' definition, which is part of the user-space ABI and should be dealt with separately when we have agreed on the ABI change. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Miroslav Lichvar Cc: Richard Cochran Cc: Prarit Bhargava Cc: Stephen Boyd Signed-off-by: Arnd Bergmann Signed-off-by: John Stultz --- kernel/time/timekeeping.c | 72 ++++++++++++++++------------------------------- 1 file changed, 25 insertions(+), 47 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 7d8e0e842484..c6a35fb3cf76 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1258,65 +1258,37 @@ out: } EXPORT_SYMBOL(do_settimeofday64); -/* - * Validates if a timespec/timeval used to inject a time offset is valid. - * Offsets can be postive or negative. The value of the timeval/timespec - * is the sum of its fields, but *NOTE*: the field tv_usec/tv_nsec must - * always be non-negative. - */ -static inline bool timeval_inject_offset_valid(const struct timeval *tv) -{ - /* We don't check the tv_sec as it can be positive or negative */ - - /* Can't have more microseconds then a second */ - if (tv->tv_usec < 0 || tv->tv_usec >= USEC_PER_SEC) - return false; - return true; -} - -static inline bool timespec_inject_offset_valid(const struct timespec *ts) -{ - /* We don't check the tv_sec as it can be positive or negative */ - - /* Can't have more nanoseconds then a second */ - if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC) - return false; - return true; -} - /** * timekeeping_inject_offset - Adds or subtracts from the current time. * @tv: pointer to the timespec variable containing the offset * * Adds or subtracts an offset value from the current time. */ -static int timekeeping_inject_offset(struct timespec *ts) +static int timekeeping_inject_offset(struct timespec64 *ts) { struct timekeeper *tk = &tk_core.timekeeper; unsigned long flags; - struct timespec64 ts64, tmp; + struct timespec64 tmp; int ret = 0; - if (!timespec_inject_offset_valid(ts)) + if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC) return -EINVAL; - ts64 = timespec_to_timespec64(*ts); - raw_spin_lock_irqsave(&timekeeper_lock, flags); write_seqcount_begin(&tk_core.seq); timekeeping_forward_now(tk); /* Make sure the proposed value is valid */ - tmp = timespec64_add(tk_xtime(tk), ts64); - if (timespec64_compare(&tk->wall_to_monotonic, &ts64) > 0 || + tmp = timespec64_add(tk_xtime(tk), *ts); + if (timespec64_compare(&tk->wall_to_monotonic, ts) > 0 || !timespec64_valid_strict(&tmp)) { ret = -EINVAL; goto error; } - tk_xtime_add(tk, &ts64); - tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, ts64)); + tk_xtime_add(tk, ts); + tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *ts)); error: /* even if we error out, we forwarded the time, so call update */ timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); @@ -1355,7 +1327,7 @@ int persistent_clock_is_local; void timekeeping_warp_clock(void) { if (sys_tz.tz_minuteswest != 0) { - struct timespec adjust; + struct timespec64 adjust; persistent_clock_is_local = 1; adjust.tv_sec = sys_tz.tz_minuteswest * 60; @@ -2307,9 +2279,9 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real, } /** - * ntp_validate_timex - Ensures the timex is ok for use in do_adjtimex + * timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex */ -static int ntp_validate_timex(struct timex *txc) +static int timekeeping_validate_timex(struct timex *txc) { if (txc->modes & ADJ_ADJTIME) { /* singleshot must not be used with any other mode bits */ @@ -2337,16 +2309,22 @@ static int ntp_validate_timex(struct timex *txc) if (!capable(CAP_SYS_TIME)) return -EPERM; - if (txc->modes & ADJ_NANO) { - struct timespec ts; + /* + * Validate if a timespec/timeval used to inject a time + * offset is valid. Offsets can be postive or negative, so + * we don't check tv_sec. The value of the timeval/timespec + * is the sum of its fields,but *NOTE*: + * The field tv_usec/tv_nsec must always be non-negative and + * we can't have more nanoseconds/microseconds than a second. + */ + if (txc->time.tv_usec < 0) + return -EINVAL; - ts.tv_sec = txc->time.tv_sec; - ts.tv_nsec = txc->time.tv_usec; - if (!timespec_inject_offset_valid(&ts)) + if (txc->modes & ADJ_NANO) { + if (txc->time.tv_usec >= NSEC_PER_SEC) return -EINVAL; - } else { - if (!timeval_inject_offset_valid(&txc->time)) + if (txc->time.tv_usec >= USEC_PER_SEC) return -EINVAL; } } @@ -2378,12 +2356,12 @@ int do_adjtimex(struct timex *txc) int ret; /* Validate the data before disabling interrupts */ - ret = ntp_validate_timex(txc); + ret = timekeeping_validate_timex(txc); if (ret) return ret; if (txc->modes & ADJ_SETOFFSET) { - struct timespec delta; + struct timespec64 delta; delta.tv_sec = txc->time.tv_sec; delta.tv_nsec = txc->time.tv_usec; if (!(txc->modes & ADJ_NANO)) -- cgit v1.2.3 From 85bf19e7df2479140eff2348a4e6a9c19b5c3960 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 19 Oct 2017 13:14:46 +0200 Subject: time: Remove unused functions The (slow but) ongoing work on conversion from timespec to timespec64 has led some timespec based helper functions to become unused. No new code should use them, so we can remove the functions entirely. I'm planning to obsolete additional interfaces next and remove more of these. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Miroslav Lichvar Cc: Richard Cochran Cc: Prarit Bhargava Cc: Stephen Boyd Signed-off-by: Arnd Bergmann Signed-off-by: John Stultz --- kernel/time/time.c | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/time.c b/kernel/time/time.c index 04684e294f00..947fb614c78f 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -818,24 +818,6 @@ unsigned long nsecs_to_jiffies(u64 n) } EXPORT_SYMBOL_GPL(nsecs_to_jiffies); -/* - * Add two timespec values and do a safety check for overflow. - * It's assumed that both values are valid (>= 0) - */ -struct timespec timespec_add_safe(const struct timespec lhs, - const struct timespec rhs) -{ - struct timespec res; - - set_normalized_timespec(&res, lhs.tv_sec + rhs.tv_sec, - lhs.tv_nsec + rhs.tv_nsec); - - if (res.tv_sec < lhs.tv_sec || res.tv_sec < rhs.tv_sec) - res.tv_sec = TIME_T_MAX; - - return res; -} - /* * Add two timespec64 values and do a safety check for overflow. * It's assumed that both values are valid (>= 0). -- cgit v1.2.3 From abc8f96e3eb846fcf6333395ee1f6ed4a734576c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 19 Oct 2017 13:14:48 +0200 Subject: time: Move time_t conversion helpers to time32.h On 64-bit architectures, the timespec64 based helpers in linux/time.h are defined as macros pointing to their timespec based counterparts. This made sense when they were first introduced, but as we are migrating away from timespec in general, it's much less intuitive now. This changes the macros to work in the exact opposite way: we always provide the timespec64 based helpers and define the old interfaces as macros for them. Now we can move those macros into linux/time32.h, which already contains the respective helpers for 32-bit architectures. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Miroslav Lichvar Cc: Richard Cochran Cc: Prarit Bhargava Cc: Stephen Boyd Signed-off-by: Arnd Bergmann Signed-off-by: John Stultz --- kernel/time/time.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/time.c b/kernel/time/time.c index 947fb614c78f..fe60ebd301cf 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -407,6 +407,7 @@ time64_t mktime64(const unsigned int year0, const unsigned int mon0, } EXPORT_SYMBOL(mktime64); +#if __BITS_PER_LONG == 32 /** * set_normalized_timespec - set timespec sec and nsec parts and normalize * @@ -467,6 +468,7 @@ struct timespec ns_to_timespec(const s64 nsec) return ts; } EXPORT_SYMBOL(ns_to_timespec); +#endif /** * ns_to_timeval - Convert nanoseconds to timeval @@ -486,7 +488,6 @@ struct timeval ns_to_timeval(const s64 nsec) } EXPORT_SYMBOL(ns_to_timeval); -#if BITS_PER_LONG == 32 /** * set_normalized_timespec - set timespec sec and nsec parts and normalize * @@ -547,7 +548,7 @@ struct timespec64 ns_to_timespec64(const s64 nsec) return ts; } EXPORT_SYMBOL(ns_to_timespec64); -#endif + /** * msecs_to_jiffies: - convert milliseconds to jiffies * @m: time in milliseconds -- cgit v1.2.3 From 39c82caff8610d57ffe32157cb3130dfabe12fbe Mon Sep 17 00:00:00 2001 From: Prasad Sodagudi Date: Thu, 26 Oct 2017 11:37:22 -0700 Subject: clockevents: Update clockevents device next_event on stop clockevent_device::next_event holds the next timer event of a clock event device. The value is updated in clockevents_program_event(), i.e. when the hardware timer is armed for the next expiry. When there are no software timers armed on a CPU, the corresponding per CPU clockevent device is brought into ONESHOT_STOPPED state, but clockevent_device::next_event is not updated, because clockevents_program_event() is not called. So the content of clockevent_device::next_event is stale, which is not an issue when real hardware is used. But the hrtimer broadcast device relies on that information and the stale value causes spurious wakeups. Update clockevent_device::next_event to KTIME_MAX when it has been brought into ONESHOT_STOPPED state to avoid spurious wakeups. This reflects the proper expiry time of the stopped timer: infinity. [ tglx: Massaged changelog ] Signed-off-by: Prasad Sodagudi Signed-off-by: Thomas Gleixner Cc: viresh.kumar@linaro.org Link: https://lkml.kernel.org/r/1509043042-32486-1-git-send-email-psodagud@codeaurora.org --- kernel/time/tick-oneshot.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/time') diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 6b009c207671..c1f518e7aa80 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -33,6 +33,7 @@ int tick_program_event(ktime_t expires, int force) * We don't need the clock event device any more, stop it. */ clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED); + dev->next_event = KTIME_MAX; return 0; } -- cgit v1.2.3 From b24413180f5600bcb3bb70fbed5cf186b60864bd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 1 Nov 2017 15:07:57 +0100 Subject: License cleanup: add SPDX GPL-2.0 license identifier to files with no license Many source files in the tree are missing licensing information, which makes it harder for compliance tools to determine the correct license. By default all files without license information are under the default license of the kernel, which is GPL version 2. Update the files which contain no license information with the 'GPL-2.0' SPDX license identifier. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This patch is based on work done by Thomas Gleixner and Kate Stewart and Philippe Ombredanne. How this work was done: Patches were generated and checked against linux-4.14-rc6 for a subset of the use cases: - file had no licensing information it it. - file was a */uapi/* one with no licensing information in it, - file was a */uapi/* one with existing licensing information, Further patches will be generated in subsequent months to fix up cases where non-standard license headers were used, and references to license had to be inferred by heuristics based on keywords. The analysis to determine which SPDX License Identifier to be applied to a file was done in a spreadsheet of side by side results from of the output of two independent scanners (ScanCode & Windriver) producing SPDX tag:value files created by Philippe Ombredanne. Philippe prepared the base worksheet, and did an initial spot review of a few 1000 files. The 4.13 kernel was the starting point of the analysis with 60,537 files assessed. Kate Stewart did a file by file comparison of the scanner results in the spreadsheet to determine which SPDX license identifier(s) to be applied to the file. She confirmed any determination that was not immediately clear with lawyers working with the Linux Foundation. Criteria used to select files for SPDX license identifier tagging was: - Files considered eligible had to be source code files. - Make and config files were included as candidates if they contained >5 lines of source - File already had some variant of a license header in it (even if <5 lines). All documentation files were explicitly excluded. The following heuristics were used to determine which SPDX license identifiers to apply. - when both scanners couldn't find any license traces, file was considered to have no license information in it, and the top level COPYING file license applied. For non */uapi/* files that summary was: SPDX license identifier # files ---------------------------------------------------|------- GPL-2.0 11139 and resulted in the first patch in this series. If that file was a */uapi/* path one, it was "GPL-2.0 WITH Linux-syscall-note" otherwise it was "GPL-2.0". Results of that was: SPDX license identifier # files ---------------------------------------------------|------- GPL-2.0 WITH Linux-syscall-note 930 and resulted in the second patch in this series. - if a file had some form of licensing information in it, and was one of the */uapi/* ones, it was denoted with the Linux-syscall-note if any GPL family license was found in the file or had no licensing in it (per prior point). Results summary: SPDX license identifier # files ---------------------------------------------------|------ GPL-2.0 WITH Linux-syscall-note 270 GPL-2.0+ WITH Linux-syscall-note 169 ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) 21 ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) 17 LGPL-2.1+ WITH Linux-syscall-note 15 GPL-1.0+ WITH Linux-syscall-note 14 ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause) 5 LGPL-2.0+ WITH Linux-syscall-note 4 LGPL-2.1 WITH Linux-syscall-note 3 ((GPL-2.0 WITH Linux-syscall-note) OR MIT) 3 ((GPL-2.0 WITH Linux-syscall-note) AND MIT) 1 and that resulted in the third patch in this series. - when the two scanners agreed on the detected license(s), that became the concluded license(s). - when there was disagreement between the two scanners (one detected a license but the other didn't, or they both detected different licenses) a manual inspection of the file occurred. - In most cases a manual inspection of the information in the file resulted in a clear resolution of the license that should apply (and which scanner probably needed to revisit its heuristics). - When it was not immediately clear, the license identifier was confirmed with lawyers working with the Linux Foundation. - If there was any question as to the appropriate license identifier, the file was flagged for further research and to be revisited later in time. In total, over 70 hours of logged manual review was done on the spreadsheet to determine the SPDX license identifiers to apply to the source files by Kate, Philippe, Thomas and, in some cases, confirmation by lawyers working with the Linux Foundation. Kate also obtained a third independent scan of the 4.13 code base from FOSSology, and compared selected files where the other two scanners disagreed against that SPDX file, to see if there was new insights. The Windriver scanner is based on an older version of FOSSology in part, so they are related. Thomas did random spot checks in about 500 files from the spreadsheets for the uapi headers and agreed with SPDX license identifier in the files he inspected. For the non-uapi files Thomas did random spot checks in about 15000 files. In initial set of patches against 4.14-rc6, 3 files were found to have copy/paste license identifier errors, and have been fixed to reflect the correct identifier. Additionally Philippe spent 10 hours this week doing a detailed manual inspection and review of the 12,461 patched files from the initial patch version early this week with: - a full scancode scan run, collecting the matched texts, detected license ids and scores - reviewing anything where there was a license detected (about 500+ files) to ensure that the applied SPDX license was correct - reviewing anything where there was no detection but the patch license was not GPL-2.0 WITH Linux-syscall-note to ensure that the applied SPDX license was correct This produced a worksheet with 20 files needing minor correction. This worksheet was then exported into 3 different .csv files for the different types of files to be modified. These .csv files were then reviewed by Greg. Thomas wrote a script to parse the csv files and add the proper SPDX tag to the file, in the format that the file expected. This script was further refined by Greg based on the output to detect more types of files automatically and to distinguish between header and source .c files (which need different comment types.) Finally Greg ran the script using the .csv files to generate the patches. Reviewed-by: Kate Stewart Reviewed-by: Philippe Ombredanne Reviewed-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/time/Makefile | 1 + kernel/time/itimer.c | 1 + kernel/time/ntp.c | 1 + kernel/time/ntp_internal.h | 1 + kernel/time/posix-cpu-timers.c | 1 + kernel/time/posix-timers.h | 1 + kernel/time/tick-broadcast-hrtimer.c | 1 + kernel/time/tick-internal.h | 1 + kernel/time/tick-sched.h | 1 + kernel/time/timekeeping.h | 1 + kernel/time/timekeeping_internal.h | 1 + 11 files changed, 11 insertions(+) (limited to 'kernel/time') diff --git a/kernel/time/Makefile b/kernel/time/Makefile index 938dbf33ef49..f1e46f338a9c 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-y += time.o timer.o hrtimer.o obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o obj-y += timeconv.o timecounter.o alarmtimer.o diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c index 2ef98a02376a..f26acef5d7b4 100644 --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/kernel/itimer.c * diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index edf19cc53140..99e03bec68e4 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * NTP state machine interfaces and logic. * diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h index d8a7c11fa71a..0a53e6ea47b1 100644 --- a/kernel/time/ntp_internal.h +++ b/kernel/time/ntp_internal.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_NTP_INTERNAL_H #define _LINUX_NTP_INTERNAL_H diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 8585ad6e472a..5b117110b55b 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Implement CPU time clocks for the POSIX clock interface. */ diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h index fb303c3be4d3..151e28f5bf30 100644 --- a/kernel/time/posix-timers.h +++ b/kernel/time/posix-timers.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #define TIMER_RETRY 1 struct k_clock { diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c index a7bb8f33ae07..58045eb976c3 100644 --- a/kernel/time/tick-broadcast-hrtimer.c +++ b/kernel/time/tick-broadcast-hrtimer.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/kernel/time/tick-broadcast-hrtimer.c * This file emulates a local clock event device diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index be0ac01f2e12..f8e1845aa464 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * tick internal variable and functions used by low/high res code */ diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index 075444e3d48e..954b43dbf21c 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _TICK_SCHED_H #define _TICK_SCHED_H diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h index d0914676d4c5..c9f9af339914 100644 --- a/kernel/time/timekeeping.h +++ b/kernel/time/timekeeping.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _KERNEL_TIME_TIMEKEEPING_H #define _KERNEL_TIME_TIMEKEEPING_H /* diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h index 9a18f121f399..fdbeeb02dde9 100644 --- a/kernel/time/timekeeping_internal.h +++ b/kernel/time/timekeeping_internal.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _TIMEKEEPING_INTERNAL_H #define _TIMEKEEPING_INTERNAL_H /* -- cgit v1.2.3 From 6082a6e44434a17f194048b7d48df56f148ec6d4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 1 Nov 2017 11:04:51 -0700 Subject: kernel/time/Kconfig: Fix typo in comment Fix typo in Kconfig comment text. Signed-off-by: Randy Dunlap Signed-off-by: Thomas Gleixner Cc: John Stultz Cc: Stephen Boyd Cc: Jiri Kosina Link: https://lkml.kernel.org/r/0e586dd4-2b27-864e-c252-bc72df52fd01@infradead.org --- kernel/time/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index ac09bc29eb08..d689a9557e17 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -56,7 +56,7 @@ menu "Timers subsystem" # Core internal switch. Selected by NO_HZ_COMMON / HIGH_RES_TIMERS. This is # only related to the tick functionality. Oneshot clockevent devices -# are supported independ of this. +# are supported independent of this. config TICK_ONESHOT bool -- cgit v1.2.3 From ebf3adbad012b89c4a51a3beae718a587d988a3a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 6 Nov 2017 16:01:20 +0100 Subject: timers/nohz: Use lockdep to assert IRQs are disabled/enabled Use lockdep to check that IRQs are enabled or disabled as expected. This way the sanity check only shows overhead when concurrency correctness debug code is enabled. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: David S . Miller Cc: Lai Jiangshan Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Tejun Heo Link: http://lkml.kernel.org/r/1509980490-4285-5-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- kernel/time/tick-sched.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index c7a899c5ce64..dd4b7b492c9b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -198,7 +198,7 @@ static bool check_tick_dependency(atomic_t *dep) static bool can_stop_full_tick(int cpu, struct tick_sched *ts) { - WARN_ON_ONCE(!irqs_disabled()); + lockdep_assert_irqs_disabled(); if (unlikely(!cpu_online(cpu))) return false; @@ -960,8 +960,7 @@ void tick_nohz_idle_enter(void) { struct tick_sched *ts; - WARN_ON_ONCE(irqs_disabled()); - + lockdep_assert_irqs_enabled(); /* * Update the idle state in the scheduler domain hierarchy * when tick_nohz_stop_sched_tick() is called from the idle loop. -- cgit v1.2.3 From 53bef3fd47f69e40b52c9f9acd3551dfff9f8702 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 6 Nov 2017 16:01:21 +0100 Subject: timers/hrtimer: Use lockdep to assert IRQs are disabled/enabled Use lockdep to check that IRQs are enabled or disabled as expected. This way the sanity check only shows overhead when concurrency correctness debug code is enabled. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: David S . Miller Cc: Lai Jiangshan Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Tejun Heo Link: http://lkml.kernel.org/r/1509980490-4285-6-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- kernel/time/hrtimer.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 88f75f92ef36..d32520840fde 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -758,9 +758,7 @@ void clock_was_set(void) */ void hrtimers_resume(void) { - WARN_ONCE(!irqs_disabled(), - KERN_INFO "hrtimers_resume() called with IRQs enabled!"); - + lockdep_assert_irqs_disabled(); /* Retrigger on the local CPU */ retrigger_next_event(NULL); /* And schedule a retrigger for all others */ -- cgit v1.2.3 From a69682200db9c2c26594188f81dd2df560af4683 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 6 Nov 2017 16:01:28 +0100 Subject: timers/posix-cpu-timers: Use lockdep to assert IRQs are disabled/enabled Use lockdep to check that IRQs are enabled or disabled as expected. This way the sanity check only shows overhead when concurrency correctness debug code is enabled. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: David S . Miller Cc: Lai Jiangshan Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Tejun Heo Link: http://lkml.kernel.org/r/1509980490-4285-13-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- kernel/time/posix-cpu-timers.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 5b117110b55b..1f27887aa194 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -603,7 +603,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, /* * Disarm any old timer after extracting its expiry time. */ - WARN_ON_ONCE(!irqs_disabled()); + lockdep_assert_irqs_disabled(); ret = 0; old_incr = timer->it.cpu.incr; @@ -1034,7 +1034,7 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer) /* * Now re-arm for the new expiry time. */ - WARN_ON_ONCE(!irqs_disabled()); + lockdep_assert_irqs_disabled(); arm_timer(timer); unlock: unlock_task_sighand(p, &flags); @@ -1125,7 +1125,7 @@ void run_posix_cpu_timers(struct task_struct *tsk) struct k_itimer *timer, *next; unsigned long flags; - WARN_ON_ONCE(!irqs_disabled()); + lockdep_assert_irqs_disabled(); /* * The fast path checks that there are no expired thread or thread -- cgit v1.2.3 From df27067e6040b51188184876253d93da002433aa Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 10 Nov 2017 16:25:04 +0100 Subject: pstore: Use ktime_get_real_fast_ns() instead of __getnstimeofday() __getnstimeofday() is a rather odd interface, with a number of quirks: - The caller may come from NMI context, but the implementation is not NMI safe, one way to get there from NMI is NMI handler: something bad panic() kmsg_dump() pstore_dump() pstore_record_init() __getnstimeofday() - The calling conventions are different from any other timekeeping functions, to deal with returning an error code during suspended timekeeping. Address the above issues by using a completely different method to get the time: ktime_get_real_fast_ns() is NMI safe and has a reasonable behavior when timekeeping is suspended: it returns the time at which it got suspended. As Thomas Gleixner explained, this is safe, as ktime_get_real_fast_ns() does not call into the clocksource driver that might be suspended. The result can easily be transformed into a timespec structure. Since ktime_get_real_fast_ns() was not exported to modules, add the export. The pstore behavior for the suspended case changes slightly, as it now stores the timestamp at which timekeeping was suspended instead of storing a zero timestamp. This change is not addressing y2038-safety, that's subject to a more complex follow up patch. Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Acked-by: Kees Cook Cc: Tony Luck Cc: Anton Vorontsov Cc: Stephen Boyd Cc: John Stultz Cc: Colin Cross Link: https://lkml.kernel.org/r/20171110152530.1926955-1-arnd@arndb.de --- kernel/time/timekeeping.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 353f7bd1eeb0..198afa78bf69 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -528,6 +528,7 @@ u64 ktime_get_real_fast_ns(void) { return __ktime_get_real_fast_ns(&tk_fast_mono); } +EXPORT_SYMBOL_GPL(ktime_get_real_fast_ns); /** * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource. -- cgit v1.2.3 From b24591e2fcf852ad7ad2ccf745c8220bf378d312 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 9 Nov 2017 12:35:07 +0000 Subject: timers: Add a function to start/reduce a timer Add a function, similar to mod_timer(), that will start a timer if it isn't running and will modify it if it is running and has an expiry time longer than the new time. If the timer is running with an expiry time that's the same or sooner, no change is made. The function looks like: int timer_reduce(struct timer_list *timer, unsigned long expires); This can be used by code such as networking code to make it easier to share a timer for multiple timeouts. For instance, in upcoming AF_RXRPC code, the rxrpc_call struct will maintain a number of timeouts: unsigned long ack_at; unsigned long resend_at; unsigned long ping_at; unsigned long expect_rx_by; unsigned long expect_req_by; unsigned long expect_term_by; each of which is set independently of the others. With timer reduction available, when the code needs to set one of the timeouts, it only needs to look at that timeout and then call timer_reduce() to modify the timer, starting it or bringing it forward if necessary. There is no need to refer to the other timeouts to see which is earliest and no need to take any lock other than, potentially, the timer lock inside timer_reduce(). Note, that this does not protect against concurrent invocations of any of the timer functions. As an example, the expect_rx_by timeout above, which terminates a call if we don't get a packet from the server within a certain time window, would be set something like this: unsigned long now = jiffies; unsigned long expect_rx_by = now + packet_receive_timeout; WRITE_ONCE(call->expect_rx_by, expect_rx_by); timer_reduce(&call->timer, expect_rx_by); The timer service code (which might, say, be in a work function) would then check all the timeouts to see which, if any, had triggered, deal with those: t = READ_ONCE(call->ack_at); if (time_after_eq(now, t)) { cmpxchg(&call->ack_at, t, now + MAX_JIFFY_OFFSET); set_bit(RXRPC_CALL_EV_ACK, &call->events); } and then restart the timer if necessary by finding the soonest timeout that hasn't yet passed and then calling timer_reduce(). The disadvantage of doing things this way rather than comparing the timers each time and calling mod_timer() is that you *will* take timer events unless you can finish what you're doing and delete the timer in time. The advantage of doing things this way is that you don't need to use a lock to work out when the next timer should be set, other than the timer's own lock - which you might not have to take. [ tglx: Fixed weird formatting and adopted it to pending changes ] Signed-off-by: David Howells Signed-off-by: Thomas Gleixner Cc: keyrings@vger.kernel.org Cc: linux-afs@lists.infradead.org Link: https://lkml.kernel.org/r/151023090769.23050.1801643667223880753.stgit@warthog.procyon.org.uk --- kernel/time/timer.c | 45 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timer.c b/kernel/time/timer.c index fbb1f85327bf..af0b8bae4502 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -929,8 +929,11 @@ static struct timer_base *lock_timer_base(struct timer_list *timer, } } +#define MOD_TIMER_PENDING_ONLY 0x01 +#define MOD_TIMER_REDUCE 0x02 + static inline int -__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) +__mod_timer(struct timer_list *timer, unsigned long expires, unsigned int options) { struct timer_base *base, *new_base; unsigned int idx = UINT_MAX; @@ -950,7 +953,11 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) * larger granularity than you would get from adding a new * timer with this expiry. */ - if (timer->expires == expires) + long diff = timer->expires - expires; + + if (!diff) + return 1; + if (options & MOD_TIMER_REDUCE && diff <= 0) return 1; /* @@ -962,6 +969,12 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) base = lock_timer_base(timer, &flags); forward_timer_base(base); + if (timer_pending(timer) && (options & MOD_TIMER_REDUCE) && + time_before_eq(timer->expires, expires)) { + ret = 1; + goto out_unlock; + } + clk = base->clk; idx = calc_wheel_index(expires, clk); @@ -971,7 +984,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) * subsequent call will exit in the expires check above. */ if (idx == timer_get_idx(timer)) { - timer->expires = expires; + if (!(options & MOD_TIMER_REDUCE)) + timer->expires = expires; + else if (time_after(timer->expires, expires)) + timer->expires = expires; ret = 1; goto out_unlock; } @@ -981,7 +997,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) } ret = detach_if_pending(timer, base, false); - if (!ret && pending_only) + if (!ret && (options & MOD_TIMER_PENDING_ONLY)) goto out_unlock; debug_activate(timer, expires); @@ -1042,7 +1058,7 @@ out_unlock: */ int mod_timer_pending(struct timer_list *timer, unsigned long expires) { - return __mod_timer(timer, expires, true); + return __mod_timer(timer, expires, MOD_TIMER_PENDING_ONLY); } EXPORT_SYMBOL(mod_timer_pending); @@ -1068,10 +1084,25 @@ EXPORT_SYMBOL(mod_timer_pending); */ int mod_timer(struct timer_list *timer, unsigned long expires) { - return __mod_timer(timer, expires, false); + return __mod_timer(timer, expires, 0); } EXPORT_SYMBOL(mod_timer); +/** + * timer_reduce - Modify a timer's timeout if it would reduce the timeout + * @timer: The timer to be modified + * @expires: New timeout in jiffies + * + * timer_reduce() is very similar to mod_timer(), except that it will only + * modify a running timer if that would reduce the expiration time (it will + * start a timer that isn't running). + */ +int timer_reduce(struct timer_list *timer, unsigned long expires) +{ + return __mod_timer(timer, expires, MOD_TIMER_REDUCE); +} +EXPORT_SYMBOL(timer_reduce); + /** * add_timer - start a timer * @timer: the timer to be added @@ -1754,7 +1785,7 @@ signed long __sched schedule_timeout(signed long timeout) timer.task = current; timer_setup_on_stack(&timer.timer, process_timeout, 0); - __mod_timer(&timer.timer, expire, false); + __mod_timer(&timer.timer, expire, 0); schedule(); del_singleshot_timer_sync(&timer.timer); -- cgit v1.2.3 From 8e7df2b5b7f245c9bd11064712db5cb69044a362 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 13 Nov 2017 07:15:41 +0100 Subject: timer/debug: Change /proc/timer_list from 0444 to 0400 While it uses %pK, there's still few reasons to read this file as non-root. Suggested-by: Linus Torvalds Acked-by: Thomas Gleixner Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/time/timer_list.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 0e7f5428a148..0ed768b56c60 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -389,7 +389,7 @@ static int __init init_timer_list_procfs(void) { struct proc_dir_entry *pe; - pe = proc_create("timer_list", 0444, NULL, &timer_list_fops); + pe = proc_create("timer_list", 0400, NULL, &timer_list_fops); if (!pe) return -ENOMEM; return 0; -- cgit v1.2.3 From aea3706cfc4d952ed6d32b6d5845b5ecd99ed7f5 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Mon, 13 Nov 2017 14:51:31 -0800 Subject: timekeeping: Remove CONFIG_GENERIC_TIME_VSYSCALL_OLD As of d4d1fc61eb38f (ia64: Update fsyscall gettime to use modern vsyscall_update)the last user of CONFIG_GENERIC_TIME_VSYSCALL_OLD have been updated, the legacy support for old-style vsyscall implementations can be removed from the timekeeping code. (Thanks again to Tony Luck for helping remove the last user!) [jstultz: Commit message rework] Signed-off-by: Miroslav Lichvar Signed-off-by: John Stultz Signed-off-by: Thomas Gleixner Cc: Prarit Bhargava Cc: Tony Luck Cc: Richard Cochran Cc: Stephen Boyd Link: https://lkml.kernel.org/r/1510613491-16695-1-git-send-email-john.stultz@linaro.org --- kernel/time/Kconfig | 4 ---- kernel/time/timekeeping.c | 45 --------------------------------------------- 2 files changed, 49 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index d689a9557e17..e776fc8cc1df 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -21,10 +21,6 @@ config CLOCKSOURCE_VALIDATE_LAST_CYCLE config GENERIC_TIME_VSYSCALL bool -# Timekeeping vsyscall support -config GENERIC_TIME_VSYSCALL_OLD - bool - # Old style timekeeping config ARCH_USES_GETTIMEOFFSET bool diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 198afa78bf69..cd03317e7b57 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -557,45 +557,6 @@ static void halt_fast_timekeeper(struct timekeeper *tk) update_fast_timekeeper(&tkr_dummy, &tk_fast_raw); } -#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD -#warning Please contact your maintainers, as GENERIC_TIME_VSYSCALL_OLD compatibity will disappear soon. - -static inline void update_vsyscall(struct timekeeper *tk) -{ - struct timespec xt, wm; - - xt = timespec64_to_timespec(tk_xtime(tk)); - wm = timespec64_to_timespec(tk->wall_to_monotonic); - update_vsyscall_old(&xt, &wm, tk->tkr_mono.clock, tk->tkr_mono.mult, - tk->tkr_mono.cycle_last); -} - -static inline void old_vsyscall_fixup(struct timekeeper *tk) -{ - s64 remainder; - - /* - * Store only full nanoseconds into xtime_nsec after rounding - * it up and add the remainder to the error difference. - * XXX - This is necessary to avoid small 1ns inconsistnecies caused - * by truncating the remainder in vsyscalls. However, it causes - * additional work to be done in timekeeping_adjust(). Once - * the vsyscall implementations are converted to use xtime_nsec - * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD - * users are removed, this can be killed. - */ - remainder = tk->tkr_mono.xtime_nsec & ((1ULL << tk->tkr_mono.shift) - 1); - if (remainder != 0) { - tk->tkr_mono.xtime_nsec -= remainder; - tk->tkr_mono.xtime_nsec += 1ULL << tk->tkr_mono.shift; - tk->ntp_error += remainder << tk->ntp_error_shift; - tk->ntp_error -= (1ULL << tk->tkr_mono.shift) << tk->ntp_error_shift; - } -} -#else -#define old_vsyscall_fixup(tk) -#endif - static RAW_NOTIFIER_HEAD(pvclock_gtod_chain); static void update_pvclock_gtod(struct timekeeper *tk, bool was_set) @@ -2163,12 +2124,6 @@ void update_wall_time(void) /* correct the clock when NTP error is too big */ timekeeping_adjust(tk, offset); - /* - * XXX This can be killed once everyone converts - * to the new update_vsyscall. - */ - old_vsyscall_fixup(tk); - /* * Finally, make sure that after the rounding * xtime_nsec isn't larger than NSEC_PER_SEC -- cgit v1.2.3 From b9eaf18722221ef8b2bd6a67240ebe668622152a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 13:15:39 -0700 Subject: treewide: init_timer() -> setup_timer() This mechanically converts all remaining cases of ancient open-coded timer setup with the old setup_timer() API, which is the first step in timer conversions. This has no behavioral changes, since it ultimately just changes the order of assignment to fields of struct timer_list when finding variations of: init_timer(&t); f.function = timer_callback; t.data = timer_callback_arg; to be converted into: setup_timer(&t, timer_callback, timer_callback_arg); The conversion is done with the following Coccinelle script, which is an improved version of scripts/cocci/api/setup_timer.cocci, in the following ways: - assignments-before-init_timer() cases - limit the .data case removal to the specific struct timer_list instance - handling calls by dereference (timer->field vs timer.field) spatch --very-quiet --all-includes --include-headers \ -I ./arch/x86/include -I ./arch/x86/include/generated \ -I ./include -I ./arch/x86/include/uapi \ -I ./arch/x86/include/generated/uapi -I ./include/uapi \ -I ./include/generated/uapi --include ./include/linux/kconfig.h \ --dir . \ --cocci-file ~/src/data/setup_timer.cocci @fix_address_of@ expression e; @@ init_timer( -&(e) +&e , ...) // Match the common cases first to avoid Coccinelle parsing loops with // "... when" clauses. @match_immediate_function_data_after_init_timer@ expression e, func, da; @@ -init_timer +setup_timer ( \(&e\|e\) +, func, da ); ( -\(e.function\|e->function\) = func; -\(e.data\|e->data\) = da; | -\(e.data\|e->data\) = da; -\(e.function\|e->function\) = func; ) @match_immediate_function_data_before_init_timer@ expression e, func, da; @@ ( -\(e.function\|e->function\) = func; -\(e.data\|e->data\) = da; | -\(e.data\|e->data\) = da; -\(e.function\|e->function\) = func; ) -init_timer +setup_timer ( \(&e\|e\) +, func, da ); @match_function_and_data_after_init_timer@ expression e, e2, e3, e4, e5, func, da; @@ -init_timer +setup_timer ( \(&e\|e\) +, func, da ); ... when != func = e2 when != da = e3 ( -e.function = func; ... when != da = e4 -e.data = da; | -e->function = func; ... when != da = e4 -e->data = da; | -e.data = da; ... when != func = e5 -e.function = func; | -e->data = da; ... when != func = e5 -e->function = func; ) @match_function_and_data_before_init_timer@ expression e, e2, e3, e4, e5, func, da; @@ ( -e.function = func; ... when != da = e4 -e.data = da; | -e->function = func; ... when != da = e4 -e->data = da; | -e.data = da; ... when != func = e5 -e.function = func; | -e->data = da; ... when != func = e5 -e->function = func; ) ... when != func = e2 when != da = e3 -init_timer +setup_timer ( \(&e\|e\) +, func, da ); @r1 exists@ expression t; identifier f; position p; @@ f(...) { ... when any init_timer@p(\(&t\|t\)) ... when any } @r2 exists@ expression r1.t; identifier g != r1.f; expression e8; @@ g(...) { ... when any \(t.data\|t->data\) = e8 ... when any } // It is dangerous to use setup_timer if data field is initialized // in another function. @script:python depends on r2@ p << r1.p; @@ cocci.include_match(False) @r3@ expression r1.t, func, e7; position r1.p; @@ ( -init_timer@p(&t); +setup_timer(&t, func, 0UL); ... when != func = e7 -t.function = func; | -t.function = func; ... when != func = e7 -init_timer@p(&t); +setup_timer(&t, func, 0UL); | -init_timer@p(t); +setup_timer(t, func, 0UL); ... when != func = e7 -t->function = func; | -t->function = func; ... when != func = e7 -init_timer@p(t); +setup_timer(t, func, 0UL); ) Signed-off-by: Kees Cook --- kernel/time/clocksource.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 03918a19cf2d..5b51d5ba2a85 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -290,8 +290,7 @@ static inline void clocksource_start_watchdog(void) { if (watchdog_running || !watchdog || list_empty(&watchdog_list)) return; - init_timer(&watchdog_timer); - watchdog_timer.function = clocksource_watchdog; + setup_timer(&watchdog_timer, clocksource_watchdog, 0UL); watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask)); watchdog_running = 1; -- cgit v1.2.3 From e99e88a9d2b067465adaa9c111ada99a041bef9a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 16 Oct 2017 14:43:17 -0700 Subject: treewide: setup_timer() -> timer_setup() This converts all remaining cases of the old setup_timer() API into using timer_setup(), where the callback argument is the structure already holding the struct timer_list. These should have no behavioral changes, since they just change which pointer is passed into the callback with the same available pointers after conversion. It handles the following examples, in addition to some other variations. Casting from unsigned long: void my_callback(unsigned long data) { struct something *ptr = (struct something *)data; ... } ... setup_timer(&ptr->my_timer, my_callback, ptr); and forced object casts: void my_callback(struct something *ptr) { ... } ... setup_timer(&ptr->my_timer, my_callback, (unsigned long)ptr); become: void my_callback(struct timer_list *t) { struct something *ptr = from_timer(ptr, t, my_timer); ... } ... timer_setup(&ptr->my_timer, my_callback, 0); Direct function assignments: void my_callback(unsigned long data) { struct something *ptr = (struct something *)data; ... } ... ptr->my_timer.function = my_callback; have a temporary cast added, along with converting the args: void my_callback(struct timer_list *t) { struct something *ptr = from_timer(ptr, t, my_timer); ... } ... ptr->my_timer.function = (TIMER_FUNC_TYPE)my_callback; And finally, callbacks without a data assignment: void my_callback(unsigned long data) { ... } ... setup_timer(&ptr->my_timer, my_callback, 0); have their argument renamed to verify they're unused during conversion: void my_callback(struct timer_list *unused) { ... } ... timer_setup(&ptr->my_timer, my_callback, 0); The conversion is done with the following Coccinelle script: spatch --very-quiet --all-includes --include-headers \ -I ./arch/x86/include -I ./arch/x86/include/generated \ -I ./include -I ./arch/x86/include/uapi \ -I ./arch/x86/include/generated/uapi -I ./include/uapi \ -I ./include/generated/uapi --include ./include/linux/kconfig.h \ --dir . \ --cocci-file ~/src/data/timer_setup.cocci @fix_address_of@ expression e; @@ setup_timer( -&(e) +&e , ...) // Update any raw setup_timer() usages that have a NULL callback, but // would otherwise match change_timer_function_usage, since the latter // will update all function assignments done in the face of a NULL // function initialization in setup_timer(). @change_timer_function_usage_NULL@ expression _E; identifier _timer; type _cast_data; @@ ( -setup_timer(&_E->_timer, NULL, _E); +timer_setup(&_E->_timer, NULL, 0); | -setup_timer(&_E->_timer, NULL, (_cast_data)_E); +timer_setup(&_E->_timer, NULL, 0); | -setup_timer(&_E._timer, NULL, &_E); +timer_setup(&_E._timer, NULL, 0); | -setup_timer(&_E._timer, NULL, (_cast_data)&_E); +timer_setup(&_E._timer, NULL, 0); ) @change_timer_function_usage@ expression _E; identifier _timer; struct timer_list _stl; identifier _callback; type _cast_func, _cast_data; @@ ( -setup_timer(&_E->_timer, _callback, _E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, &_callback, _E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, _callback, (_cast_data)_E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, &_callback, (_cast_data)_E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, (_cast_func)_callback, _E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, (_cast_func)&_callback, _E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, (_cast_func)_callback, (_cast_data)_E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, (_cast_func)&_callback, (_cast_data)_E); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E._timer, _callback, (_cast_data)_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, _callback, (_cast_data)&_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, &_callback, (_cast_data)_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, &_callback, (_cast_data)&_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, (_cast_func)_callback, (_cast_data)_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, (_cast_func)_callback, (_cast_data)&_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, (_cast_func)&_callback, (_cast_data)_E); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, (_cast_func)&_callback, (_cast_data)&_E); +timer_setup(&_E._timer, _callback, 0); | _E->_timer@_stl.function = _callback; | _E->_timer@_stl.function = &_callback; | _E->_timer@_stl.function = (_cast_func)_callback; | _E->_timer@_stl.function = (_cast_func)&_callback; | _E._timer@_stl.function = _callback; | _E._timer@_stl.function = &_callback; | _E._timer@_stl.function = (_cast_func)_callback; | _E._timer@_stl.function = (_cast_func)&_callback; ) // callback(unsigned long arg) @change_callback_handle_cast depends on change_timer_function_usage@ identifier change_timer_function_usage._callback; identifier change_timer_function_usage._timer; type _origtype; identifier _origarg; type _handletype; identifier _handle; @@ void _callback( -_origtype _origarg +struct timer_list *t ) { ( ... when != _origarg _handletype *_handle = -(_handletype *)_origarg; +from_timer(_handle, t, _timer); ... when != _origarg | ... when != _origarg _handletype *_handle = -(void *)_origarg; +from_timer(_handle, t, _timer); ... when != _origarg | ... when != _origarg _handletype *_handle; ... when != _handle _handle = -(_handletype *)_origarg; +from_timer(_handle, t, _timer); ... when != _origarg | ... when != _origarg _handletype *_handle; ... when != _handle _handle = -(void *)_origarg; +from_timer(_handle, t, _timer); ... when != _origarg ) } // callback(unsigned long arg) without existing variable @change_callback_handle_cast_no_arg depends on change_timer_function_usage && !change_callback_handle_cast@ identifier change_timer_function_usage._callback; identifier change_timer_function_usage._timer; type _origtype; identifier _origarg; type _handletype; @@ void _callback( -_origtype _origarg +struct timer_list *t ) { + _handletype *_origarg = from_timer(_origarg, t, _timer); + ... when != _origarg - (_handletype *)_origarg + _origarg ... when != _origarg } // Avoid already converted callbacks. @match_callback_converted depends on change_timer_function_usage && !change_callback_handle_cast && !change_callback_handle_cast_no_arg@ identifier change_timer_function_usage._callback; identifier t; @@ void _callback(struct timer_list *t) { ... } // callback(struct something *handle) @change_callback_handle_arg depends on change_timer_function_usage && !match_callback_converted && !change_callback_handle_cast && !change_callback_handle_cast_no_arg@ identifier change_timer_function_usage._callback; identifier change_timer_function_usage._timer; type _handletype; identifier _handle; @@ void _callback( -_handletype *_handle +struct timer_list *t ) { + _handletype *_handle = from_timer(_handle, t, _timer); ... } // If change_callback_handle_arg ran on an empty function, remove // the added handler. @unchange_callback_handle_arg depends on change_timer_function_usage && change_callback_handle_arg@ identifier change_timer_function_usage._callback; identifier change_timer_function_usage._timer; type _handletype; identifier _handle; identifier t; @@ void _callback(struct timer_list *t) { - _handletype *_handle = from_timer(_handle, t, _timer); } // We only want to refactor the setup_timer() data argument if we've found // the matching callback. This undoes changes in change_timer_function_usage. @unchange_timer_function_usage depends on change_timer_function_usage && !change_callback_handle_cast && !change_callback_handle_cast_no_arg && !change_callback_handle_arg@ expression change_timer_function_usage._E; identifier change_timer_function_usage._timer; identifier change_timer_function_usage._callback; type change_timer_function_usage._cast_data; @@ ( -timer_setup(&_E->_timer, _callback, 0); +setup_timer(&_E->_timer, _callback, (_cast_data)_E); | -timer_setup(&_E._timer, _callback, 0); +setup_timer(&_E._timer, _callback, (_cast_data)&_E); ) // If we fixed a callback from a .function assignment, fix the // assignment cast now. @change_timer_function_assignment depends on change_timer_function_usage && (change_callback_handle_cast || change_callback_handle_cast_no_arg || change_callback_handle_arg)@ expression change_timer_function_usage._E; identifier change_timer_function_usage._timer; identifier change_timer_function_usage._callback; type _cast_func; typedef TIMER_FUNC_TYPE; @@ ( _E->_timer.function = -_callback +(TIMER_FUNC_TYPE)_callback ; | _E->_timer.function = -&_callback +(TIMER_FUNC_TYPE)_callback ; | _E->_timer.function = -(_cast_func)_callback; +(TIMER_FUNC_TYPE)_callback ; | _E->_timer.function = -(_cast_func)&_callback +(TIMER_FUNC_TYPE)_callback ; | _E._timer.function = -_callback +(TIMER_FUNC_TYPE)_callback ; | _E._timer.function = -&_callback; +(TIMER_FUNC_TYPE)_callback ; | _E._timer.function = -(_cast_func)_callback +(TIMER_FUNC_TYPE)_callback ; | _E._timer.function = -(_cast_func)&_callback +(TIMER_FUNC_TYPE)_callback ; ) // Sometimes timer functions are called directly. Replace matched args. @change_timer_function_calls depends on change_timer_function_usage && (change_callback_handle_cast || change_callback_handle_cast_no_arg || change_callback_handle_arg)@ expression _E; identifier change_timer_function_usage._timer; identifier change_timer_function_usage._callback; type _cast_data; @@ _callback( ( -(_cast_data)_E +&_E->_timer | -(_cast_data)&_E +&_E._timer | -_E +&_E->_timer ) ) // If a timer has been configured without a data argument, it can be // converted without regard to the callback argument, since it is unused. @match_timer_function_unused_data@ expression _E; identifier _timer; identifier _callback; @@ ( -setup_timer(&_E->_timer, _callback, 0); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, _callback, 0L); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E->_timer, _callback, 0UL); +timer_setup(&_E->_timer, _callback, 0); | -setup_timer(&_E._timer, _callback, 0); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, _callback, 0L); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_E._timer, _callback, 0UL); +timer_setup(&_E._timer, _callback, 0); | -setup_timer(&_timer, _callback, 0); +timer_setup(&_timer, _callback, 0); | -setup_timer(&_timer, _callback, 0L); +timer_setup(&_timer, _callback, 0); | -setup_timer(&_timer, _callback, 0UL); +timer_setup(&_timer, _callback, 0); | -setup_timer(_timer, _callback, 0); +timer_setup(_timer, _callback, 0); | -setup_timer(_timer, _callback, 0L); +timer_setup(_timer, _callback, 0); | -setup_timer(_timer, _callback, 0UL); +timer_setup(_timer, _callback, 0); ) @change_callback_unused_data depends on match_timer_function_unused_data@ identifier match_timer_function_unused_data._callback; type _origtype; identifier _origarg; @@ void _callback( -_origtype _origarg +struct timer_list *unused ) { ... when != _origarg } Signed-off-by: Kees Cook --- kernel/time/clocksource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 5b51d5ba2a85..65f9e3f24dde 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -171,7 +171,7 @@ void clocksource_mark_unstable(struct clocksource *cs) spin_unlock_irqrestore(&watchdog_lock, flags); } -static void clocksource_watchdog(unsigned long data) +static void clocksource_watchdog(struct timer_list *unused) { struct clocksource *cs; u64 csnow, wdnow, cslast, wdlast, delta; @@ -290,7 +290,7 @@ static inline void clocksource_start_watchdog(void) { if (watchdog_running || !watchdog || list_empty(&watchdog_list)) return; - setup_timer(&watchdog_timer, clocksource_watchdog, 0UL); + timer_setup(&watchdog_timer, clocksource_watchdog, 0); watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask)); watchdog_running = 1; -- cgit v1.2.3 From c1eba5bcb6430868427e0b9d1cd1205a07302f06 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 22 Oct 2017 18:18:19 -0700 Subject: timer: Pass timer_list pointer to callbacks unconditionally Now that all timer callbacks are already taking their struct timer_list pointer as the callback argument, just do this unconditionally and remove the .data field. Cc: Thomas Gleixner Cc: John Stultz Cc: Stephen Boyd Signed-off-by: Kees Cook --- kernel/time/timer.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timer.c b/kernel/time/timer.c index af0b8bae4502..a07eb124332f 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1107,12 +1107,12 @@ EXPORT_SYMBOL(timer_reduce); * add_timer - start a timer * @timer: the timer to be added * - * The kernel will do a ->function(->data) callback from the + * The kernel will do a ->function(@timer) callback from the * timer interrupt at the ->expires point in the future. The * current time is 'jiffies'. * - * The timer's ->expires, ->function (and if the handler uses it, ->data) - * fields must be set prior calling this function. + * The timer's ->expires, ->function fields must be set prior calling this + * function. * * Timers with an ->expires field in the past will be executed in the next * timer tick. @@ -1284,8 +1284,7 @@ int del_timer_sync(struct timer_list *timer) EXPORT_SYMBOL(del_timer_sync); #endif -static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), - unsigned long data) +static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long)) { int count = preempt_count(); @@ -1309,7 +1308,7 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), lock_map_acquire(&lockdep_map); trace_timer_expire_entry(timer); - fn(data); + fn((TIMER_DATA_TYPE)timer); trace_timer_expire_exit(timer); lock_map_release(&lockdep_map); @@ -1332,7 +1331,6 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) while (!hlist_empty(head)) { struct timer_list *timer; void (*fn)(unsigned long); - unsigned long data; timer = hlist_entry(head->first, struct timer_list, entry); @@ -1340,15 +1338,14 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) detach_timer(timer, true); fn = timer->function; - data = timer->data; if (timer->flags & TIMER_IRQSAFE) { raw_spin_unlock(&base->lock); - call_timer_fn(timer, fn, data); + call_timer_fn(timer, fn); raw_spin_lock(&base->lock); } else { raw_spin_unlock_irq(&base->lock); - call_timer_fn(timer, fn, data); + call_timer_fn(timer, fn); raw_spin_lock_irq(&base->lock); } } -- cgit v1.2.3 From 354b46b1a0adda1dd5b7f0bc2a5604cca091be5f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 22 Oct 2017 19:15:40 -0700 Subject: timer: Switch callback prototype to take struct timer_list * argument Since all callbacks have been converted, we can switch the core prototype to "struct timer_list *" now too. Cc: Thomas Gleixner Cc: John Stultz Cc: Stephen Boyd Signed-off-by: Kees Cook --- kernel/time/timer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timer.c b/kernel/time/timer.c index a07eb124332f..0f0d49a02d04 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1284,7 +1284,7 @@ int del_timer_sync(struct timer_list *timer) EXPORT_SYMBOL(del_timer_sync); #endif -static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long)) +static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list *)) { int count = preempt_count(); @@ -1308,7 +1308,7 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long)) lock_map_acquire(&lockdep_map); trace_timer_expire_entry(timer); - fn((TIMER_DATA_TYPE)timer); + fn(timer); trace_timer_expire_exit(timer); lock_map_release(&lockdep_map); @@ -1330,7 +1330,7 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) { while (!hlist_empty(head)) { struct timer_list *timer; - void (*fn)(unsigned long); + void (*fn)(struct timer_list *); timer = hlist_entry(head->first, struct timer_list, entry); -- cgit v1.2.3 From 188665b2d67db8953899551d1a9d4481b2a0ac60 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 22 Oct 2017 18:14:46 -0700 Subject: timer: Pass function down to initialization routines In preparation for removing more macros, pass the function down to the initialization routines instead of doing it in macros. Cc: Thomas Gleixner Cc: John Stultz Cc: Stephen Boyd Signed-off-by: Kees Cook --- kernel/time/timer.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 0f0d49a02d04..ffebcf878fba 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -707,14 +707,18 @@ static inline void debug_timer_assert_init(struct timer_list *timer) debug_object_assert_init(timer, &timer_debug_descr); } -static void do_init_timer(struct timer_list *timer, unsigned int flags, +static void do_init_timer(struct timer_list *timer, + void (*func)(struct timer_list *), + unsigned int flags, const char *name, struct lock_class_key *key); -void init_timer_on_stack_key(struct timer_list *timer, unsigned int flags, +void init_timer_on_stack_key(struct timer_list *timer, + void (*func)(struct timer_list *), + unsigned int flags, const char *name, struct lock_class_key *key) { debug_object_init_on_stack(timer, &timer_debug_descr); - do_init_timer(timer, flags, name, key); + do_init_timer(timer, func, flags, name, key); } EXPORT_SYMBOL_GPL(init_timer_on_stack_key); @@ -755,10 +759,13 @@ static inline void debug_assert_init(struct timer_list *timer) debug_timer_assert_init(timer); } -static void do_init_timer(struct timer_list *timer, unsigned int flags, +static void do_init_timer(struct timer_list *timer, + void (*func)(struct timer_list *), + unsigned int flags, const char *name, struct lock_class_key *key) { timer->entry.pprev = NULL; + timer->function = func; timer->flags = flags | raw_smp_processor_id(); lockdep_init_map(&timer->lockdep_map, name, key, 0); } @@ -766,6 +773,7 @@ static void do_init_timer(struct timer_list *timer, unsigned int flags, /** * init_timer_key - initialize a timer * @timer: the timer to be initialized + * @func: timer callback function * @flags: timer flags * @name: name of the timer * @key: lockdep class key of the fake lock used for tracking timer @@ -774,11 +782,12 @@ static void do_init_timer(struct timer_list *timer, unsigned int flags, * init_timer_key() must be done to a timer prior calling *any* of the * other timer functions. */ -void init_timer_key(struct timer_list *timer, unsigned int flags, +void init_timer_key(struct timer_list *timer, + void (*func)(struct timer_list *), unsigned int flags, const char *name, struct lock_class_key *key) { debug_init(timer); - do_init_timer(timer, flags, name, key); + do_init_timer(timer, func, flags, name, key); } EXPORT_SYMBOL(init_timer_key); -- cgit v1.2.3