From 571af55a31d3652ac1f758f116835a76d0335661 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 25 Aug 2015 14:42:53 +0800 Subject: time: Fix spelling in comments Signed-off-by: Zhen Lei Cc: Hanjun Guo Cc: John Stultz Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: Tianhong Ding Cc: Viresh Kumar Cc: Xinwei Hu Cc: Xunlei Pang Cc: Zefan Li Link: http://lkml.kernel.org/r/1440484973-13892-1-git-send-email-thunder.leizhen@huawei.com [ Fixed yet another typo in one of the sentences fixed. ] Signed-off-by: Ingo Molnar --- kernel/time/clocksource.c | 2 +- kernel/time/hrtimer.c | 2 +- kernel/time/timekeeping.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 841b72f720e8..174c594ff8ef 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -479,7 +479,7 @@ static u32 clocksource_max_adjustment(struct clocksource *cs) * return half the number of nanoseconds the hardware counter can technically * cover. This is done so that we can potentially detect problems caused by * delayed timers or bad hardware, which might result in time intervals that - * are larger then what the math used can handle without overflows. + * are larger than what the math used can handle without overflows. */ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc) { diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 457a373e2181..435b8850dd80 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -59,7 +59,7 @@ /* * The timer bases: * - * There are more clockids then hrtimer bases. Thus, we index + * There are more clockids than hrtimer bases. Thus, we index * into the timer bases by the hrtimer_base_type enum. When trying * to reach a base using a clockid, hrtimer_clockid_to_base() * is used to convert from clockid to the proper hrtimer_base_type. diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3739ac6aa473..125f16a29cf6 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1674,7 +1674,7 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) /** * accumulate_nsecs_to_secs - Accumulates nsecs into secs * - * Helper function that accumulates a the nsecs greater then a second + * Helper function that accumulates the nsecs greater than a second * from the xtime_nsec field to the xtime_secs field. * It also calls into the NTP code to handle leapsecond processing. * @@ -1726,7 +1726,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset, cycle_t interval = tk->cycle_interval << shift; u64 raw_nsecs; - /* If the offset is smaller then a shifted interval, do nothing */ + /* If the offset is smaller than a shifted interval, do nothing */ if (offset < interval) return offset; -- cgit v1.2.3 From 3ed769bdb2a2484fd7f9f7f3047413053aacbe21 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Fri, 18 Sep 2015 15:54:23 +0200 Subject: timers: Fix data race in timer_stats_account_timer() timer_stats_account_timer() reads timer->start_site, then checks it for NULL and then re-reads it again, while timer_stats_timer_clear_start_info() can concurrently reset timer->start_site to NULL. This should not lead to crashes, but can double number of entries in timer stats as start_site is used during comparison, the doubled entries will have unuseful NULL start_site. Read timer->start_site only once in timer_stats_account_timer(). The data race was found with KernelThreadSanitizer (KTSAN). Signed-off-by: Dmitry Vyukov Cc: andreyknvl@google.com Cc: glider@google.com Cc: kcc@google.com Cc: ktsan@googlegroups.com Cc: john.stultz@linaro.org Link: http://lkml.kernel.org/r/1442584463-69553-1-git-send-email-dvyukov@google.com Signed-off-by: Thomas Gleixner --- kernel/time/timer.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 84190f02b521..d3f5e92f722a 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -461,10 +461,17 @@ void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) static void timer_stats_account_timer(struct timer_list *timer) { - if (likely(!timer->start_site)) + void *site; + + /* + * start_site can be concurrently reset by + * timer_stats_timer_clear_start_info() + */ + site = READ_ONCE(timer->start_site); + if (likely(!site)) return; - timer_stats_update_stats(timer, timer->start_pid, timer->start_site, + timer_stats_update_stats(timer, timer->start_pid, site, timer->function, timer->start_comm, timer->flags); } -- cgit v1.2.3 From 7ec88e4be461590b5a3817460c34603f76d9b3ae Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 28 Sep 2015 22:21:28 +0200 Subject: ntp/pps: use timespec64 for hardpps() There is only one user of the hardpps function in the kernel, so it makes sense to atomically change it over to using 64-bit timestamps for y2038 safety. In the hardpps implementation, we also need to change the pps_normtime structure, which is similar to struct timespec and also requires a 64-bit seconds portion. This introduces two temporary variables in pps_kc_event() to do the conversion, they will be removed again in the next step, which seemed preferable to having a larger patch changing it all at the same time. Acked-by: Richard Cochran Acked-by: David S. Miller Reviewed-by: Thomas Gleixner Signed-off-by: Arnd Bergmann Signed-off-by: John Stultz --- kernel/time/ntp.c | 12 ++++++------ kernel/time/ntp_internal.h | 2 +- kernel/time/timekeeping.c | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index df68cb875248..bd4fa6271262 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -99,7 +99,7 @@ static time64_t ntp_next_leap_sec = TIME64_MAX; static int pps_valid; /* signal watchdog counter */ static long pps_tf[3]; /* phase median filter */ static long pps_jitter; /* current jitter (ns) */ -static struct timespec pps_fbase; /* beginning of the last freq interval */ +static struct timespec64 pps_fbase; /* beginning of the last freq interval */ static int pps_shift; /* current interval duration (s) (shift) */ static int pps_intcnt; /* interval counter */ static s64 pps_freq; /* frequency offset (scaled ns/s) */ @@ -773,13 +773,13 @@ int __do_adjtimex(struct timex *txc, struct timespec64 *ts, s32 *time_tai) * pps_normtime.nsec has a range of ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ] * while timespec.tv_nsec has a range of [0, NSEC_PER_SEC) */ struct pps_normtime { - __kernel_time_t sec; /* seconds */ + s64 sec; /* seconds */ long nsec; /* nanoseconds */ }; /* normalize the timestamp so that nsec is in the ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ] interval */ -static inline struct pps_normtime pps_normalize_ts(struct timespec ts) +static inline struct pps_normtime pps_normalize_ts(struct timespec64 ts) { struct pps_normtime norm = { .sec = ts.tv_sec, @@ -861,7 +861,7 @@ static long hardpps_update_freq(struct pps_normtime freq_norm) pps_errcnt++; pps_dec_freq_interval(); printk_deferred(KERN_ERR - "hardpps: PPSERROR: interval too long - %ld s\n", + "hardpps: PPSERROR: interval too long - %lld s\n", freq_norm.sec); return 0; } @@ -948,7 +948,7 @@ static void hardpps_update_phase(long error) * This code is based on David Mills's reference nanokernel * implementation. It was mostly rewritten but keeps the same idea. */ -void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) +void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts) { struct pps_normtime pts_norm, freq_norm; @@ -969,7 +969,7 @@ void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) } /* ok, now we have a base for frequency calculation */ - freq_norm = pps_normalize_ts(timespec_sub(*raw_ts, pps_fbase)); + freq_norm = pps_normalize_ts(timespec64_sub(*raw_ts, pps_fbase)); /* check that the signal is in the range * [1s - MAXFREQ us, 1s + MAXFREQ us], otherwise reject it */ diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h index 65430504ca26..af924470eac0 100644 --- a/kernel/time/ntp_internal.h +++ b/kernel/time/ntp_internal.h @@ -9,5 +9,5 @@ extern ktime_t ntp_get_next_leap(void); extern int second_overflow(unsigned long secs); extern int ntp_validate_timex(struct timex *); extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *); -extern void __hardpps(const struct timespec *, const struct timespec *); +extern void __hardpps(const struct timespec64 *, const struct timespec64 *); #endif /* _LINUX_NTP_INTERNAL_H */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3739ac6aa473..177188b11a2e 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2025,7 +2025,7 @@ int do_adjtimex(struct timex *txc) /** * hardpps() - Accessor function to NTP __hardpps function */ -void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) +void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts) { unsigned long flags; -- cgit v1.2.3 From 071eee45b1650d53d21c636d344bdcebd4577ed2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 28 Sep 2015 22:21:29 +0200 Subject: ntp/pps: replace getnstime_raw_and_real with 64-bit version There is exactly one caller of getnstime_raw_and_real in the kernel, which is the pps_get_ts function. This changes the caller and the implementation to work on timespec64 types rather than timespec, to avoid the time_t overflow on 32-bit architectures. For consistency with the other new functions (ktime_get_seconds, ktime_get_real_*, ...), I'm renaming the function to ktime_get_raw_and_real_ts64. We still need to convert from the internal 64-bit type to 32 bit types in the caller, but this conversion is now pushed out from getnstime_raw_and_real to pps_get_ts. A follow-up patch changes the remaining pps code to completely avoid the conversion. Acked-by: Richard Cochran Acked-by: David S. Miller Reviewed-by: Thomas Gleixner Signed-off-by: Arnd Bergmann Signed-off-by: John Stultz --- kernel/time/timekeeping.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 177188b11a2e..274ed5e88456 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -849,7 +849,7 @@ EXPORT_SYMBOL_GPL(ktime_get_real_seconds); #ifdef CONFIG_NTP_PPS /** - * getnstime_raw_and_real - get day and raw monotonic time in timespec format + * ktime_get_raw_and_real_ts64 - get day and raw monotonic time in timespec format * @ts_raw: pointer to the timespec to be set to raw monotonic time * @ts_real: pointer to the timespec to be set to the time of day * @@ -857,7 +857,7 @@ EXPORT_SYMBOL_GPL(ktime_get_real_seconds); * same time atomically and stores the resulting timestamps in timespec * format. */ -void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) +void ktime_get_raw_and_real_ts64(struct timespec64 *ts_raw, struct timespec64 *ts_real) { struct timekeeper *tk = &tk_core.timekeeper; unsigned long seq; @@ -868,7 +868,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) do { seq = read_seqcount_begin(&tk_core.seq); - *ts_raw = timespec64_to_timespec(tk->raw_time); + *ts_raw = tk->raw_time; ts_real->tv_sec = tk->xtime_sec; ts_real->tv_nsec = 0; @@ -877,10 +877,10 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real) } while (read_seqcount_retry(&tk_core.seq, seq)); - timespec_add_ns(ts_raw, nsecs_raw); - timespec_add_ns(ts_real, nsecs_real); + timespec64_add_ns(ts_raw, nsecs_raw); + timespec64_add_ns(ts_real, nsecs_real); } -EXPORT_SYMBOL(getnstime_raw_and_real); +EXPORT_SYMBOL(ktime_get_raw_and_real_ts64); #endif /* CONFIG_NTP_PPS */ -- cgit v1.2.3 From 5fd96c421ff2c76ec441aa4139c3b87dfea93e3a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 28 Sep 2015 22:21:30 +0200 Subject: ntp: use timespec64 in sync_cmos_clock The sync_cmos_clock has one use of struct timespec, which we want to eventually replace with timespec64 or similar in the kernel. There is no way this one can overflow, but the conversion to timespec64 is trivial and has no other dependencies. Acked-by: Richard Cochran Acked-by: David S. Miller Reviewed-by: Thomas Gleixner Signed-off-by: Arnd Bergmann Signed-off-by: John Stultz --- kernel/time/ntp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index bd4fa6271262..149cc8086aea 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -509,7 +509,7 @@ static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock); static void sync_cmos_clock(struct work_struct *work) { struct timespec64 now; - struct timespec next; + struct timespec64 next; int fail = 1; /* @@ -559,7 +559,7 @@ static void sync_cmos_clock(struct work_struct *work) next.tv_nsec -= NSEC_PER_SEC; } queue_delayed_work(system_power_efficient_wq, - &sync_cmos_work, timespec_to_jiffies(&next)); + &sync_cmos_work, timespec64_to_jiffies(&next)); } void ntp_notify_cmos_timer(void) -- cgit v1.2.3 From 67dfae0cd72fec5cd158b6e5fb1647b7dbe0834c Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 14 Sep 2015 18:05:20 -0700 Subject: clocksource: Fix abs() usage w/ 64bit values This patch fixes one cases where abs() was being used with 64-bit nanosecond values, where the result may be capped at 32-bits. This potentially could cause watchdog false negatives on 32-bit systems, so this patch addresses the issue by using abs64(). Signed-off-by: John Stultz Cc: Prarit Bhargava Cc: Richard Cochran Cc: Ingo Molnar Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1442279124-7309-2-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/clocksource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 841b72f720e8..3a38775b50c2 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -217,7 +217,7 @@ static void clocksource_watchdog(unsigned long data) continue; /* Check the deviation from the watchdog clocksource. */ - if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) { + if (abs64(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable because the skew is too large:\n", cs->name); pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n", -- cgit v1.2.3 From cfed432d7f4114e16e0163bcfe65e96f0c304493 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Wed, 23 Sep 2015 13:19:19 +0200 Subject: clocksource: Remove return statement from void functions Signed-off-by: Guillaume Gomez Cc: John Stultz Link: http://lkml.kernel.org/r/CAAOQCfSDgmqSWDBsetau%2ByF8x0%2BDagCF_pfFw0p5xH_BKkKEog@mail.gmail.com Signed-off-by: Thomas Gleixner --- kernel/time/clocksource.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 174c594ff8ef..de0ad66f2b21 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -595,16 +595,15 @@ static void __clocksource_select(bool skipcur) */ static void clocksource_select(void) { - return __clocksource_select(false); + __clocksource_select(false); } static void clocksource_select_fallback(void) { - return __clocksource_select(true); + __clocksource_select(true); } #else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */ - static inline void clocksource_select(void) { } static inline void clocksource_select_fallback(void) { } -- cgit v1.2.3 From 9fc4468d546b6eb55b0aa5b04b0c36238ebf57e7 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 2 Oct 2015 09:45:30 +0200 Subject: timers: Use __fls in apply_slack() In apply_slack(), find_last_bit() is applied to a bitmask consisting of precisely BITS_PER_LONG bits. Since mask is non-zero, we might as well eliminate the function call and use __fls() directly. On x86_64, this shaves 23 bytes of the only caller, mod_timer(). This also gets rid of Coverity CID 1192106, but that is a false positive: Coverity is not aware that mask != 0 implies that find_last_bit will not return BITS_PER_LONG. Signed-off-by: Rasmus Villemoes Cc: John Stultz Link: http://lkml.kernel.org/r/1443771931-6284-1-git-send-email-linux@rasmusvillemoes.dk Signed-off-by: Thomas Gleixner --- kernel/time/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/timer.c b/kernel/time/timer.c index d3f5e92f722a..74591ba9474f 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -874,7 +874,7 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires) if (mask == 0) return expires; - bit = find_last_bit(&mask, BITS_PER_LONG); + bit = __fls(mask); mask = (1UL << bit) - 1; -- cgit v1.2.3 From 7c177d994eb9637302b79e80d331f48dfbe26368 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Wed, 14 Oct 2015 12:07:53 -0700 Subject: posix_cpu_timer: Optimize fastpath_timer_check() In fastpath_timer_check(), the task_cputime() function is always called to compute the utime and stime values. However, this is not necessary if there are no per-thread timers to check for. This patch modifies the code such that we compute the task_cputime values only when there are per-thread timers set. Signed-off-by: Jason Low Reviewed-by: Oleg Nesterov Reviewed-by: Frederic Weisbecker Reviewed-by: Davidlohr Bueso Reviewed-by: George Spelvin Cc: Paul E. McKenney Cc: Steven Rostedt Cc: hideaki.kimura@hpe.com Cc: terry.rudd@hpe.com Cc: scott.norton@hpe.com Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444849677-29330-2-git-send-email-jason.low2@hp.com Signed-off-by: Thomas Gleixner --- kernel/time/posix-cpu-timers.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 892e3dae0aac..aa4b6f4f3b5e 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1117,17 +1117,12 @@ static inline int task_cputime_expired(const struct task_cputime *sample, static inline int fastpath_timer_check(struct task_struct *tsk) { struct signal_struct *sig; - cputime_t utime, stime; - - task_cputime(tsk, &utime, &stime); if (!task_cputime_zero(&tsk->cputime_expires)) { - struct task_cputime task_sample = { - .utime = utime, - .stime = stime, - .sum_exec_runtime = tsk->se.sum_exec_runtime - }; + struct task_cputime task_sample; + task_cputime(tsk, &task_sample.utime, &task_sample.stime); + task_sample.sum_exec_runtime = tsk->se.sum_exec_runtime; if (task_cputime_expired(&task_sample, &tsk->cputime_expires)) return 1; } -- cgit v1.2.3 From 934715a191e4be0c602d39455a7a74316f274d60 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Wed, 14 Oct 2015 12:07:54 -0700 Subject: posix_cpu_timer: Check thread timers only when there are active thread timers The fastpath_timer_check() contains logic to check for if any timers are set by checking if !task_cputime_zero(). Similarly, we can do this before calling check_thread_timers(). In the case where there are only process-wide timers, this will skip all of the computations for per-thread timers when there are no per-thread timers. As suggested by George, we can put the task_cputime_zero() check in check_thread_timers(), since that is more of an optization to the function. Similarly, we move the existing check of cputimer->running to check_process_timers(). Signed-off-by: Jason Low Reviewed-by: Oleg Nesterov Reviewed-by: George Spelvin Cc: Paul E. McKenney Cc: Frederic Weisbecker Cc: Davidlohr Bueso Cc: Steven Rostedt Cc: hideaki.kimura@hpe.com Cc: terry.rudd@hpe.com Cc: scott.norton@hpe.com Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444849677-29330-3-git-send-email-jason.low2@hp.com Signed-off-by: Thomas Gleixner --- kernel/time/posix-cpu-timers.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index aa4b6f4f3b5e..6f6e252ec761 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -864,6 +864,13 @@ static void check_thread_timers(struct task_struct *tsk, unsigned long long expires; unsigned long soft; + /* + * If cputime_expires is zero, then there are no active + * per thread CPU timers. + */ + if (task_cputime_zero(&tsk->cputime_expires)) + return; + expires = check_timers_list(timers, firing, prof_ticks(tsk)); tsk_expires->prof_exp = expires_to_cputime(expires); @@ -961,6 +968,13 @@ static void check_process_timers(struct task_struct *tsk, struct task_cputime cputime; unsigned long soft; + /* + * If cputimer is not running, then there are no active + * process wide timers (POSIX 1.b, itimers, RLIMIT_CPU). + */ + if (!READ_ONCE(tsk->signal->cputimer.running)) + return; + /* * Collect the current process totals. */ @@ -1169,12 +1183,8 @@ void run_posix_cpu_timers(struct task_struct *tsk) * put them on the firing list. */ check_thread_timers(tsk, &firing); - /* - * If there are any active process wide timers (POSIX 1.b, itimers, - * RLIMIT_CPU) cputimer must be running. - */ - if (READ_ONCE(tsk->signal->cputimer.running)) - check_process_timers(tsk, &firing); + + check_process_timers(tsk, &firing); /* * We must release these locks before taking any timer's lock. -- cgit v1.2.3 From d5c373eb5610686162ff50429f63f4c00c554799 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Wed, 14 Oct 2015 12:07:55 -0700 Subject: posix_cpu_timer: Convert cputimer->running to bool In the next patch in this series, a new field 'checking_timer' will be added to 'struct thread_group_cputimer'. Both this and the existing 'running' integer field are just used as boolean values. To save space in the structure, we can make both of these fields booleans. This is a preparatory patch to convert the existing running integer field to a boolean. Suggested-by: George Spelvin Signed-off-by: Jason Low Reviewed: George Spelvin Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Frederic Weisbecker Cc: Davidlohr Bueso Cc: Steven Rostedt Cc: hideaki.kimura@hpe.com Cc: terry.rudd@hpe.com Cc: scott.norton@hpe.com Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444849677-29330-4-git-send-email-jason.low2@hp.com Signed-off-by: Thomas Gleixner --- kernel/time/posix-cpu-timers.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 6f6e252ec761..2d58153074d9 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -249,7 +249,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) * but barriers are not required because update_gt_cputime() * can handle concurrent updates. */ - WRITE_ONCE(cputimer->running, 1); + WRITE_ONCE(cputimer->running, true); } sample_cputime_atomic(times, &cputimer->cputime_atomic); } @@ -918,7 +918,7 @@ static inline void stop_process_timers(struct signal_struct *sig) struct thread_group_cputimer *cputimer = &sig->cputimer; /* Turn off cputimer->running. This is done without locking. */ - WRITE_ONCE(cputimer->running, 0); + WRITE_ONCE(cputimer->running, false); } static u32 onecputick; -- cgit v1.2.3 From c8d75aa47dd585c9538a8205e9bb9847e12cfb84 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Wed, 14 Oct 2015 12:07:56 -0700 Subject: posix_cpu_timer: Reduce unnecessary sighand lock contention It was found while running a database workload on large systems that significant time was spent trying to acquire the sighand lock. The issue was that whenever an itimer expired, many threads ended up simultaneously trying to send the signal. Most of the time, nothing happened after acquiring the sighand lock because another thread had just already sent the signal and updated the "next expire" time. The fastpath_timer_check() didn't help much since the "next expire" time was updated after the threads exit fastpath_timer_check(). This patch addresses this by having the thread_group_cputimer structure maintain a boolean to signify when a thread in the group is already checking for process wide timers, and adds extra logic in the fastpath to check the boolean. Signed-off-by: Jason Low Reviewed-by: Oleg Nesterov Reviewed-by: George Spelvin Cc: Paul E. McKenney Cc: Frederic Weisbecker Cc: Davidlohr Bueso Cc: Steven Rostedt Cc: hideaki.kimura@hpe.com Cc: terry.rudd@hpe.com Cc: scott.norton@hpe.com Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444849677-29330-5-git-send-email-jason.low2@hp.com Signed-off-by: Thomas Gleixner --- kernel/time/posix-cpu-timers.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 2d58153074d9..f5e86d282d52 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -975,6 +975,12 @@ static void check_process_timers(struct task_struct *tsk, if (!READ_ONCE(tsk->signal->cputimer.running)) return; + /* + * Signify that a thread is checking for process timers. + * Write access to this field is protected by the sighand lock. + */ + sig->cputimer.checking_timer = true; + /* * Collect the current process totals. */ @@ -1029,6 +1035,8 @@ static void check_process_timers(struct task_struct *tsk, sig->cputime_expires.sched_exp = sched_expires; if (task_cputime_zero(&sig->cputime_expires)) stop_process_timers(sig); + + sig->cputimer.checking_timer = false; } /* @@ -1142,8 +1150,22 @@ static inline int fastpath_timer_check(struct task_struct *tsk) } sig = tsk->signal; - /* Check if cputimer is running. This is accessed without locking. */ - if (READ_ONCE(sig->cputimer.running)) { + /* + * Check if thread group timers expired when the cputimer is + * running and no other thread in the group is already checking + * for thread group cputimers. These fields are read without the + * sighand lock. However, this is fine because this is meant to + * be a fastpath heuristic to determine whether we should try to + * acquire the sighand lock to check/handle timers. + * + * In the worst case scenario, if 'running' or 'checking_timer' gets + * set but the current thread doesn't see the change yet, we'll wait + * until the next thread in the group gets a scheduler interrupt to + * handle the timer. This isn't an issue in practice because these + * types of delays with signals actually getting sent are expected. + */ + if (READ_ONCE(sig->cputimer.running) && + !READ_ONCE(sig->cputimer.checking_timer)) { struct task_cputime group_sample; sample_cputime_atomic(&group_sample, &sig->cputimer.cputime_atomic); -- cgit v1.2.3 From 56fd16cabac9cd8f15e2902898a9d0cc96e2fa70 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 16 Oct 2015 15:50:22 +0200 Subject: timekeeping: Increment clock_was_set_seq in timekeeping_init() timekeeping_init() can set the wall time offset, so we need to increment the clock_was_set_seq counter. That way hrtimers will pick up the early offset immediately. Otherwise on a machine which does not set wall time later in the boot process the hrtimer offset is stale at 0 and wall time timers are going to expire with a delay of 45 years. Fixes: 868a3e915f7f "hrtimer: Make offset update smarter" Reported-and-tested-by: Heiko Carstens Signed-off-by: Thomas Gleixner Cc: Stefan Liebler Cc: Peter Zijlstra Cc: John Stultz --- kernel/time/timekeeping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3739ac6aa473..44d2cc0436f4 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1251,7 +1251,7 @@ void __init timekeeping_init(void) set_normalized_timespec64(&tmp, -boot.tv_sec, -boot.tv_nsec); tk_set_wall_to_mono(tk, tmp); - timekeeping_update(tk, TK_MIRROR); + timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); write_seqcount_end(&tk_core.seq); raw_spin_unlock_irqrestore(&timekeeper_lock, flags); -- cgit v1.2.3 From 03f136a2074b2b8890da4a24df7104558ad0da48 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 14 Jul 2015 19:24:45 +0200 Subject: timeconst: Update path in comment Signed-off-by: Jason A. Donenfeld Cc: hofrat@osadl.org Link: http://lkml.kernel.org/r/1436894685-5868-1-git-send-email-Jason@zx2c4.com Signed-off-by: Thomas Gleixner --- kernel/time/timeconst.bc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/time') diff --git a/kernel/time/timeconst.bc b/kernel/time/timeconst.bc index c7388dee8635..c48688904f9f 100644 --- a/kernel/time/timeconst.bc +++ b/kernel/time/timeconst.bc @@ -39,7 +39,7 @@ define fmuls(b,n,d) { } define timeconst(hz) { - print "/* Automatically generated by kernel/timeconst.bc */\n" + print "/* Automatically generated by kernel/time/timeconst.bc */\n" print "/* Time conversion constants for HZ == ", hz, " */\n" print "\n" -- cgit v1.2.3 From 22b886dd1018093920c4250dee2a9a3cb7cff7b8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 4 Nov 2015 12:15:33 -0500 Subject: timers: Use proper base migration in add_timer_on() Regardless of the previous CPU a timer was on, add_timer_on() currently simply sets timer->flags to the new CPU. As the caller must be seeing the timer as idle, this is locally fine, but the timer leaving the old base while unlocked can lead to race conditions as follows. Let's say timer was on cpu 0. cpu 0 cpu 1 ----------------------------------------------------------------------------- del_timer(timer) succeeds del_timer(timer) lock_timer_base(timer) locks cpu_0_base add_timer_on(timer, 1) spin_lock(&cpu_1_base->lock) timer->flags set to cpu_1_base operates on @timer operates on @timer This triggered with mod_delayed_work_on() which contains "if (del_timer()) add_timer_on()" sequence eventually leading to the following oops. BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] detach_if_pending+0x69/0x1a0 ... Workqueue: wqthrash wqthrash_workfunc [wqthrash] task: ffff8800172ca680 ti: ffff8800172d0000 task.ti: ffff8800172d0000 RIP: 0010:[] [] detach_if_pending+0x69/0x1a0 ... Call Trace: [] del_timer+0x44/0x60 [] try_to_grab_pending+0xb6/0x160 [] mod_delayed_work_on+0x33/0x80 [] wqthrash_workfunc+0x61/0x90 [wqthrash] [] process_one_work+0x1e8/0x650 [] worker_thread+0x4e/0x450 [] kthread+0xef/0x110 [] ret_from_fork+0x3f/0x70 Fix it by updating add_timer_on() to perform proper migration as __mod_timer() does. Reported-and-tested-by: Jeff Layton Signed-off-by: Tejun Heo Cc: Chris Worley Cc: bfields@fieldses.org Cc: Michael Skralivetsky Cc: Trond Myklebust Cc: Shaohua Li Cc: Jeff Layton Cc: kernel-team@fb.com Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20151029103113.2f893924@tlielax.poochiereds.net Link: http://lkml.kernel.org/r/20151104171533.GI5749@mtj.duckdns.org Signed-off-by: Thomas Gleixner --- kernel/time/timer.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 74591ba9474f..bbc5d1114583 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -977,13 +977,29 @@ EXPORT_SYMBOL(add_timer); */ void add_timer_on(struct timer_list *timer, int cpu) { - struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu); + struct tvec_base *new_base = per_cpu_ptr(&tvec_bases, cpu); + struct tvec_base *base; unsigned long flags; timer_stats_timer_set_start_info(timer); BUG_ON(timer_pending(timer) || !timer->function); - spin_lock_irqsave(&base->lock, flags); - timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu; + + /* + * If @timer was on a different CPU, it should be migrated with the + * old base locked to prevent other operations proceeding with the + * wrong base locked. See lock_timer_base(). + */ + base = lock_timer_base(timer, &flags); + if (base != new_base) { + timer->flags |= TIMER_MIGRATING; + + spin_unlock(&base->lock); + base = new_base; + spin_lock(&base->lock); + WRITE_ONCE(timer->flags, + (timer->flags & ~TIMER_BASEMASK) | cpu); + } + debug_activate(timer, timer->expires); internal_add_timer(base, timer); spin_unlock_irqrestore(&base->lock, flags); -- cgit v1.2.3 From 79211c8ed19c055ca105502c8733800d442a0ae6 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 9 Nov 2015 14:58:13 -0800 Subject: remove abs64() Switch everything to the new and more capable implementation of abs(). Mainly to give the new abs() a bit of a workout. Cc: Michal Nazarewicz Cc: John Stultz Cc: Ingo Molnar Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Masami Hiramatsu Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/clocksource.c | 2 +- kernel/time/timekeeping.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/time') diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 0d8fe8b8f727..1347882d131e 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -217,7 +217,7 @@ static void clocksource_watchdog(unsigned long data) continue; /* Check the deviation from the watchdog clocksource. */ - if (abs64(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { + if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable because the skew is too large:\n", cs->name); pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n", diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b1356b7ae570..d563c1960302 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1614,7 +1614,7 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk, negative = (tick_error < 0); /* Sort out the magnitude of the correction */ - tick_error = abs64(tick_error); + tick_error = abs(tick_error); for (adj = 0; tick_error > interval; adj++) tick_error >>= 1; -- cgit v1.2.3