From 571af55a31d3652ac1f758f116835a76d0335661 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 25 Aug 2015 14:42:53 +0800
Subject: time: Fix spelling in comments

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Cc: Hanjun Guo <guohanjun@huawei.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tianhong Ding <dingtianhong@huawei.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Xinwei Hu <huxinwei@huawei.com>
Cc: Xunlei Pang <pang.xunlei@linaro.org>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/r/1440484973-13892-1-git-send-email-thunder.leizhen@huawei.com
[ Fixed yet another typo in one of the sentences fixed. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/clocksource.c | 2 +-
 kernel/time/hrtimer.c     | 2 +-
 kernel/time/timekeeping.c | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 841b72f720e8..174c594ff8ef 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -479,7 +479,7 @@ static u32 clocksource_max_adjustment(struct clocksource *cs)
  * return half the number of nanoseconds the hardware counter can technically
  * cover. This is done so that we can potentially detect problems caused by
  * delayed timers or bad hardware, which might result in time intervals that
- * are larger then what the math used can handle without overflows.
+ * are larger than what the math used can handle without overflows.
  */
 u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)
 {
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 457a373e2181..435b8850dd80 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -59,7 +59,7 @@
 /*
  * The timer bases:
  *
- * There are more clockids then hrtimer bases. Thus, we index
+ * There are more clockids than hrtimer bases. Thus, we index
  * into the timer bases by the hrtimer_base_type enum. When trying
  * to reach a base using a clockid, hrtimer_clockid_to_base()
  * is used to convert from clockid to the proper hrtimer_base_type.
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3739ac6aa473..125f16a29cf6 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1674,7 +1674,7 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
 /**
  * accumulate_nsecs_to_secs - Accumulates nsecs into secs
  *
- * Helper function that accumulates a the nsecs greater then a second
+ * Helper function that accumulates the nsecs greater than a second
  * from the xtime_nsec field to the xtime_secs field.
  * It also calls into the NTP code to handle leapsecond processing.
  *
@@ -1726,7 +1726,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
 	cycle_t interval = tk->cycle_interval << shift;
 	u64 raw_nsecs;
 
-	/* If the offset is smaller then a shifted interval, do nothing */
+	/* If the offset is smaller than a shifted interval, do nothing */
 	if (offset < interval)
 		return offset;
 
-- 
cgit v1.2.3


From 3ed769bdb2a2484fd7f9f7f3047413053aacbe21 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Fri, 18 Sep 2015 15:54:23 +0200
Subject: timers: Fix data race in timer_stats_account_timer()

timer_stats_account_timer() reads timer->start_site, then checks it
for NULL and then re-reads it again, while
timer_stats_timer_clear_start_info() can concurrently reset
timer->start_site to NULL. This should not lead to crashes, but can
double number of entries in timer stats as start_site is used during
comparison, the doubled entries will have unuseful NULL start_site.

Read timer->start_site only once in timer_stats_account_timer().

The data race was found with KernelThreadSanitizer (KTSAN).

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Cc: andreyknvl@google.com
Cc: glider@google.com
Cc: kcc@google.com
Cc: ktsan@googlegroups.com
Cc: john.stultz@linaro.org
Link: http://lkml.kernel.org/r/1442584463-69553-1-git-send-email-dvyukov@google.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timer.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 84190f02b521..d3f5e92f722a 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -461,10 +461,17 @@ void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
 
 static void timer_stats_account_timer(struct timer_list *timer)
 {
-	if (likely(!timer->start_site))
+	void *site;
+
+	/*
+	 * start_site can be concurrently reset by
+	 * timer_stats_timer_clear_start_info()
+	 */
+	site = READ_ONCE(timer->start_site);
+	if (likely(!site))
 		return;
 
-	timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
+	timer_stats_update_stats(timer, timer->start_pid, site,
 				 timer->function, timer->start_comm,
 				 timer->flags);
 }
-- 
cgit v1.2.3


From 7ec88e4be461590b5a3817460c34603f76d9b3ae Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 28 Sep 2015 22:21:28 +0200
Subject: ntp/pps: use timespec64 for hardpps()

There is only one user of the hardpps function in the kernel, so
it makes sense to atomically change it over to using 64-bit
timestamps for y2038 safety. In the hardpps implementation,
we also need to change the pps_normtime structure, which is
similar to struct timespec and also requires a 64-bit
seconds portion.

This introduces two temporary variables in pps_kc_event() to
do the conversion, they will be removed again in the next step,
which seemed preferable to having a larger patch changing it
all at the same time.

Acked-by: Richard Cochran <richardcochran@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/ntp.c          | 12 ++++++------
 kernel/time/ntp_internal.h |  2 +-
 kernel/time/timekeeping.c  |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index df68cb875248..bd4fa6271262 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -99,7 +99,7 @@ static time64_t			ntp_next_leap_sec = TIME64_MAX;
 static int pps_valid;		/* signal watchdog counter */
 static long pps_tf[3];		/* phase median filter */
 static long pps_jitter;		/* current jitter (ns) */
-static struct timespec pps_fbase; /* beginning of the last freq interval */
+static struct timespec64 pps_fbase; /* beginning of the last freq interval */
 static int pps_shift;		/* current interval duration (s) (shift) */
 static int pps_intcnt;		/* interval counter */
 static s64 pps_freq;		/* frequency offset (scaled ns/s) */
@@ -773,13 +773,13 @@ int __do_adjtimex(struct timex *txc, struct timespec64 *ts, s32 *time_tai)
  * pps_normtime.nsec has a range of ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ]
  * while timespec.tv_nsec has a range of [0, NSEC_PER_SEC) */
 struct pps_normtime {
-	__kernel_time_t	sec;	/* seconds */
+	s64		sec;	/* seconds */
 	long		nsec;	/* nanoseconds */
 };
 
 /* normalize the timestamp so that nsec is in the
    ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ] interval */
-static inline struct pps_normtime pps_normalize_ts(struct timespec ts)
+static inline struct pps_normtime pps_normalize_ts(struct timespec64 ts)
 {
 	struct pps_normtime norm = {
 		.sec = ts.tv_sec,
@@ -861,7 +861,7 @@ static long hardpps_update_freq(struct pps_normtime freq_norm)
 		pps_errcnt++;
 		pps_dec_freq_interval();
 		printk_deferred(KERN_ERR
-			"hardpps: PPSERROR: interval too long - %ld s\n",
+			"hardpps: PPSERROR: interval too long - %lld s\n",
 			freq_norm.sec);
 		return 0;
 	}
@@ -948,7 +948,7 @@ static void hardpps_update_phase(long error)
  * This code is based on David Mills's reference nanokernel
  * implementation. It was mostly rewritten but keeps the same idea.
  */
-void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
+void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts)
 {
 	struct pps_normtime pts_norm, freq_norm;
 
@@ -969,7 +969,7 @@ void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
 	}
 
 	/* ok, now we have a base for frequency calculation */
-	freq_norm = pps_normalize_ts(timespec_sub(*raw_ts, pps_fbase));
+	freq_norm = pps_normalize_ts(timespec64_sub(*raw_ts, pps_fbase));
 
 	/* check that the signal is in the range
 	 * [1s - MAXFREQ us, 1s + MAXFREQ us], otherwise reject it */
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index 65430504ca26..af924470eac0 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -9,5 +9,5 @@ extern ktime_t ntp_get_next_leap(void);
 extern int second_overflow(unsigned long secs);
 extern int ntp_validate_timex(struct timex *);
 extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *);
-extern void __hardpps(const struct timespec *, const struct timespec *);
+extern void __hardpps(const struct timespec64 *, const struct timespec64 *);
 #endif /* _LINUX_NTP_INTERNAL_H */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3739ac6aa473..177188b11a2e 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -2025,7 +2025,7 @@ int do_adjtimex(struct timex *txc)
 /**
  * hardpps() - Accessor function to NTP __hardpps function
  */
-void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
+void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts)
 {
 	unsigned long flags;
 
-- 
cgit v1.2.3


From 071eee45b1650d53d21c636d344bdcebd4577ed2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 28 Sep 2015 22:21:29 +0200
Subject: ntp/pps: replace getnstime_raw_and_real with 64-bit version

There is exactly one caller of getnstime_raw_and_real in the kernel,
which is the pps_get_ts function. This changes the caller and
the implementation to work on timespec64 types rather than timespec,
to avoid the time_t overflow on 32-bit architectures.

For consistency with the other new functions (ktime_get_seconds,
ktime_get_real_*, ...), I'm renaming the function to
ktime_get_raw_and_real_ts64.

We still need to convert from the internal 64-bit type to 32 bit
types in the caller, but this conversion is now pushed out from
getnstime_raw_and_real to pps_get_ts. A follow-up patch changes
the remaining pps code to completely avoid the conversion.

Acked-by: Richard Cochran <richardcochran@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/timekeeping.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 177188b11a2e..274ed5e88456 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -849,7 +849,7 @@ EXPORT_SYMBOL_GPL(ktime_get_real_seconds);
 #ifdef CONFIG_NTP_PPS
 
 /**
- * getnstime_raw_and_real - get day and raw monotonic time in timespec format
+ * ktime_get_raw_and_real_ts64 - get day and raw monotonic time in timespec format
  * @ts_raw:	pointer to the timespec to be set to raw monotonic time
  * @ts_real:	pointer to the timespec to be set to the time of day
  *
@@ -857,7 +857,7 @@ EXPORT_SYMBOL_GPL(ktime_get_real_seconds);
  * same time atomically and stores the resulting timestamps in timespec
  * format.
  */
-void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
+void ktime_get_raw_and_real_ts64(struct timespec64 *ts_raw, struct timespec64 *ts_real)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long seq;
@@ -868,7 +868,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 
-		*ts_raw = timespec64_to_timespec(tk->raw_time);
+		*ts_raw = tk->raw_time;
 		ts_real->tv_sec = tk->xtime_sec;
 		ts_real->tv_nsec = 0;
 
@@ -877,10 +877,10 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
-	timespec_add_ns(ts_raw, nsecs_raw);
-	timespec_add_ns(ts_real, nsecs_real);
+	timespec64_add_ns(ts_raw, nsecs_raw);
+	timespec64_add_ns(ts_real, nsecs_real);
 }
-EXPORT_SYMBOL(getnstime_raw_and_real);
+EXPORT_SYMBOL(ktime_get_raw_and_real_ts64);
 
 #endif /* CONFIG_NTP_PPS */
 
-- 
cgit v1.2.3


From 5fd96c421ff2c76ec441aa4139c3b87dfea93e3a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 28 Sep 2015 22:21:30 +0200
Subject: ntp: use timespec64 in sync_cmos_clock

The sync_cmos_clock has one use of struct timespec, which we want to
eventually replace with timespec64 or similar in the kernel. There
is no way this one can overflow, but the conversion to timespec64
is trivial and has no other dependencies.

Acked-by: Richard Cochran <richardcochran@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/ntp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index bd4fa6271262..149cc8086aea 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -509,7 +509,7 @@ static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock);
 static void sync_cmos_clock(struct work_struct *work)
 {
 	struct timespec64 now;
-	struct timespec next;
+	struct timespec64 next;
 	int fail = 1;
 
 	/*
@@ -559,7 +559,7 @@ static void sync_cmos_clock(struct work_struct *work)
 		next.tv_nsec -= NSEC_PER_SEC;
 	}
 	queue_delayed_work(system_power_efficient_wq,
-			   &sync_cmos_work, timespec_to_jiffies(&next));
+			   &sync_cmos_work, timespec64_to_jiffies(&next));
 }
 
 void ntp_notify_cmos_timer(void)
-- 
cgit v1.2.3


From 67dfae0cd72fec5cd158b6e5fb1647b7dbe0834c Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Mon, 14 Sep 2015 18:05:20 -0700
Subject: clocksource: Fix abs() usage w/ 64bit values

This patch fixes one cases where abs() was being used with 64-bit
nanosecond values, where the result may be capped at 32-bits.

This potentially could cause watchdog false negatives on 32-bit
systems, so this patch addresses the issue by using abs64().

Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1442279124-7309-2-git-send-email-john.stultz@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/clocksource.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 841b72f720e8..3a38775b50c2 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -217,7 +217,7 @@ static void clocksource_watchdog(unsigned long data)
 			continue;
 
 		/* Check the deviation from the watchdog clocksource. */
-		if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
+		if (abs64(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
 			pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable because the skew is too large:\n",
 				cs->name);
 			pr_warn("                      '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
-- 
cgit v1.2.3


From cfed432d7f4114e16e0163bcfe65e96f0c304493 Mon Sep 17 00:00:00 2001
From: Guillaume Gomez <guillaume1.gomez@gmail.com>
Date: Wed, 23 Sep 2015 13:19:19 +0200
Subject: clocksource: Remove return statement from void functions

Signed-off-by: Guillaume Gomez <guillaume1.gomez@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/CAAOQCfSDgmqSWDBsetau%2ByF8x0%2BDagCF_pfFw0p5xH_BKkKEog@mail.gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/clocksource.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 174c594ff8ef..de0ad66f2b21 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -595,16 +595,15 @@ static void __clocksource_select(bool skipcur)
  */
 static void clocksource_select(void)
 {
-	return __clocksource_select(false);
+	__clocksource_select(false);
 }
 
 static void clocksource_select_fallback(void)
 {
-	return __clocksource_select(true);
+	__clocksource_select(true);
 }
 
 #else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */
-
 static inline void clocksource_select(void) { }
 static inline void clocksource_select_fallback(void) { }
 
-- 
cgit v1.2.3


From 9fc4468d546b6eb55b0aa5b04b0c36238ebf57e7 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Fri, 2 Oct 2015 09:45:30 +0200
Subject: timers: Use __fls in apply_slack()

In apply_slack(), find_last_bit() is applied to a bitmask consisting
of precisely BITS_PER_LONG bits. Since mask is non-zero, we might as
well eliminate the function call and use __fls() directly. On x86_64,
this shaves 23 bytes of the only caller, mod_timer().

This also gets rid of Coverity CID 1192106, but that is a false
positive: Coverity is not aware that mask != 0 implies that
find_last_bit will not return BITS_PER_LONG.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/1443771931-6284-1-git-send-email-linux@rasmusvillemoes.dk
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index d3f5e92f722a..74591ba9474f 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -874,7 +874,7 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
 	if (mask == 0)
 		return expires;
 
-	bit = find_last_bit(&mask, BITS_PER_LONG);
+	bit = __fls(mask);
 
 	mask = (1UL << bit) - 1;
 
-- 
cgit v1.2.3


From 7c177d994eb9637302b79e80d331f48dfbe26368 Mon Sep 17 00:00:00 2001
From: Jason Low <jason.low2@hp.com>
Date: Wed, 14 Oct 2015 12:07:53 -0700
Subject: posix_cpu_timer: Optimize fastpath_timer_check()

In fastpath_timer_check(), the task_cputime() function is always
called to compute the utime and stime values. However, this is not
necessary if there are no per-thread timers to check for. This patch
modifies the code such that we compute the task_cputime values only
when there are per-thread timers set.

Signed-off-by: Jason Low <jason.low2@hp.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Frederic Weisbecker <fweisbec@gmail.com>
Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
Reviewed-by: George Spelvin <linux@horizon.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: hideaki.kimura@hpe.com
Cc: terry.rudd@hpe.com
Cc: scott.norton@hpe.com
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1444849677-29330-2-git-send-email-jason.low2@hp.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/posix-cpu-timers.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 892e3dae0aac..aa4b6f4f3b5e 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1117,17 +1117,12 @@ static inline int task_cputime_expired(const struct task_cputime *sample,
 static inline int fastpath_timer_check(struct task_struct *tsk)
 {
 	struct signal_struct *sig;
-	cputime_t utime, stime;
-
-	task_cputime(tsk, &utime, &stime);
 
 	if (!task_cputime_zero(&tsk->cputime_expires)) {
-		struct task_cputime task_sample = {
-			.utime = utime,
-			.stime = stime,
-			.sum_exec_runtime = tsk->se.sum_exec_runtime
-		};
+		struct task_cputime task_sample;
 
+		task_cputime(tsk, &task_sample.utime, &task_sample.stime);
+		task_sample.sum_exec_runtime = tsk->se.sum_exec_runtime;
 		if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
 			return 1;
 	}
-- 
cgit v1.2.3


From 934715a191e4be0c602d39455a7a74316f274d60 Mon Sep 17 00:00:00 2001
From: Jason Low <jason.low2@hp.com>
Date: Wed, 14 Oct 2015 12:07:54 -0700
Subject: posix_cpu_timer: Check thread timers only when there are active
 thread timers

The fastpath_timer_check() contains logic to check for if any timers
are set by checking if !task_cputime_zero(). Similarly, we can do this
before calling check_thread_timers(). In the case where there
are only process-wide timers, this will skip all of the computations for
per-thread timers when there are no per-thread timers.

As suggested by George, we can put the task_cputime_zero() check in
check_thread_timers(), since that is more of an optization to the
function. Similarly, we move the existing check of cputimer->running
to check_process_timers().

Signed-off-by: Jason Low <jason.low2@hp.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: George Spelvin <linux@horizon.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: hideaki.kimura@hpe.com
Cc: terry.rudd@hpe.com
Cc: scott.norton@hpe.com
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1444849677-29330-3-git-send-email-jason.low2@hp.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/posix-cpu-timers.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index aa4b6f4f3b5e..6f6e252ec761 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -864,6 +864,13 @@ static void check_thread_timers(struct task_struct *tsk,
 	unsigned long long expires;
 	unsigned long soft;
 
+	/*
+	 * If cputime_expires is zero, then there are no active
+	 * per thread CPU timers.
+	 */
+	if (task_cputime_zero(&tsk->cputime_expires))
+		return;
+
 	expires = check_timers_list(timers, firing, prof_ticks(tsk));
 	tsk_expires->prof_exp = expires_to_cputime(expires);
 
@@ -961,6 +968,13 @@ static void check_process_timers(struct task_struct *tsk,
 	struct task_cputime cputime;
 	unsigned long soft;
 
+	/*
+	 * If cputimer is not running, then there are no active
+	 * process wide timers (POSIX 1.b, itimers, RLIMIT_CPU).
+	 */
+	if (!READ_ONCE(tsk->signal->cputimer.running))
+		return;
+
 	/*
 	 * Collect the current process totals.
 	 */
@@ -1169,12 +1183,8 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 	 * put them on the firing list.
 	 */
 	check_thread_timers(tsk, &firing);
-	/*
-	 * If there are any active process wide timers (POSIX 1.b, itimers,
-	 * RLIMIT_CPU) cputimer must be running.
-	 */
-	if (READ_ONCE(tsk->signal->cputimer.running))
-		check_process_timers(tsk, &firing);
+
+	check_process_timers(tsk, &firing);
 
 	/*
 	 * We must release these locks before taking any timer's lock.
-- 
cgit v1.2.3


From d5c373eb5610686162ff50429f63f4c00c554799 Mon Sep 17 00:00:00 2001
From: Jason Low <jason.low2@hp.com>
Date: Wed, 14 Oct 2015 12:07:55 -0700
Subject: posix_cpu_timer: Convert cputimer->running to bool

In the next patch in this series, a new field 'checking_timer' will
be added to 'struct thread_group_cputimer'. Both this and the
existing 'running' integer field are just used as boolean values. To
save space in the structure, we can make both of these fields booleans.

This is a preparatory patch to convert the existing running integer
field to a boolean.

Suggested-by: George Spelvin <linux@horizon.com>
Signed-off-by: Jason Low <jason.low2@hp.com>
Reviewed: George Spelvin <linux@horizon.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: hideaki.kimura@hpe.com
Cc: terry.rudd@hpe.com
Cc: scott.norton@hpe.com
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1444849677-29330-4-git-send-email-jason.low2@hp.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/posix-cpu-timers.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 6f6e252ec761..2d58153074d9 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -249,7 +249,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
 		 * but barriers are not required because update_gt_cputime()
 		 * can handle concurrent updates.
 		 */
-		WRITE_ONCE(cputimer->running, 1);
+		WRITE_ONCE(cputimer->running, true);
 	}
 	sample_cputime_atomic(times, &cputimer->cputime_atomic);
 }
@@ -918,7 +918,7 @@ static inline void stop_process_timers(struct signal_struct *sig)
 	struct thread_group_cputimer *cputimer = &sig->cputimer;
 
 	/* Turn off cputimer->running. This is done without locking. */
-	WRITE_ONCE(cputimer->running, 0);
+	WRITE_ONCE(cputimer->running, false);
 }
 
 static u32 onecputick;
-- 
cgit v1.2.3


From c8d75aa47dd585c9538a8205e9bb9847e12cfb84 Mon Sep 17 00:00:00 2001
From: Jason Low <jason.low2@hp.com>
Date: Wed, 14 Oct 2015 12:07:56 -0700
Subject: posix_cpu_timer: Reduce unnecessary sighand lock contention

It was found while running a database workload on large systems that
significant time was spent trying to acquire the sighand lock.

The issue was that whenever an itimer expired, many threads ended up
simultaneously trying to send the signal. Most of the time, nothing
happened after acquiring the sighand lock because another thread
had just already sent the signal and updated the "next expire" time.
The fastpath_timer_check() didn't help much since the "next expire"
time was updated after the threads exit fastpath_timer_check().

This patch addresses this by having the thread_group_cputimer structure
maintain a boolean to signify when a thread in the group is already
checking for process wide timers, and adds extra logic in the fastpath
to check the boolean.

Signed-off-by: Jason Low <jason.low2@hp.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: George Spelvin <linux@horizon.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: hideaki.kimura@hpe.com
Cc: terry.rudd@hpe.com
Cc: scott.norton@hpe.com
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1444849677-29330-5-git-send-email-jason.low2@hp.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/posix-cpu-timers.c | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 2d58153074d9..f5e86d282d52 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -975,6 +975,12 @@ static void check_process_timers(struct task_struct *tsk,
 	if (!READ_ONCE(tsk->signal->cputimer.running))
 		return;
 
+        /*
+	 * Signify that a thread is checking for process timers.
+	 * Write access to this field is protected by the sighand lock.
+	 */
+	sig->cputimer.checking_timer = true;
+
 	/*
 	 * Collect the current process totals.
 	 */
@@ -1029,6 +1035,8 @@ static void check_process_timers(struct task_struct *tsk,
 	sig->cputime_expires.sched_exp = sched_expires;
 	if (task_cputime_zero(&sig->cputime_expires))
 		stop_process_timers(sig);
+
+	sig->cputimer.checking_timer = false;
 }
 
 /*
@@ -1142,8 +1150,22 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
 	}
 
 	sig = tsk->signal;
-	/* Check if cputimer is running. This is accessed without locking. */
-	if (READ_ONCE(sig->cputimer.running)) {
+	/*
+	 * Check if thread group timers expired when the cputimer is
+	 * running and no other thread in the group is already checking
+	 * for thread group cputimers. These fields are read without the
+	 * sighand lock. However, this is fine because this is meant to
+	 * be a fastpath heuristic to determine whether we should try to
+	 * acquire the sighand lock to check/handle timers.
+	 *
+	 * In the worst case scenario, if 'running' or 'checking_timer' gets
+	 * set but the current thread doesn't see the change yet, we'll wait
+	 * until the next thread in the group gets a scheduler interrupt to
+	 * handle the timer. This isn't an issue in practice because these
+	 * types of delays with signals actually getting sent are expected.
+	 */
+	if (READ_ONCE(sig->cputimer.running) &&
+	    !READ_ONCE(sig->cputimer.checking_timer)) {
 		struct task_cputime group_sample;
 
 		sample_cputime_atomic(&group_sample, &sig->cputimer.cputime_atomic);
-- 
cgit v1.2.3


From 56fd16cabac9cd8f15e2902898a9d0cc96e2fa70 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 16 Oct 2015 15:50:22 +0200
Subject: timekeeping: Increment clock_was_set_seq in timekeeping_init()

timekeeping_init() can set the wall time offset, so we need to
increment the clock_was_set_seq counter. That way hrtimers will pick
up the early offset immediately. Otherwise on a machine which does not
set wall time later in the boot process the hrtimer offset is stale at
0 and wall time timers are going to expire with a delay of 45 years.

Fixes: 868a3e915f7f "hrtimer: Make offset update smarter"
Reported-and-tested-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Stefan Liebler <stli@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: John Stultz <john.stultz@linaro.org>
---
 kernel/time/timekeeping.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3739ac6aa473..44d2cc0436f4 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1251,7 +1251,7 @@ void __init timekeeping_init(void)
 	set_normalized_timespec64(&tmp, -boot.tv_sec, -boot.tv_nsec);
 	tk_set_wall_to_mono(tk, tmp);
 
-	timekeeping_update(tk, TK_MIRROR);
+	timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
 
 	write_seqcount_end(&tk_core.seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
-- 
cgit v1.2.3


From 03f136a2074b2b8890da4a24df7104558ad0da48 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 14 Jul 2015 19:24:45 +0200
Subject: timeconst: Update path in comment

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: hofrat@osadl.org
Link: http://lkml.kernel.org/r/1436894685-5868-1-git-send-email-Jason@zx2c4.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timeconst.bc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timeconst.bc b/kernel/time/timeconst.bc
index c7388dee8635..c48688904f9f 100644
--- a/kernel/time/timeconst.bc
+++ b/kernel/time/timeconst.bc
@@ -39,7 +39,7 @@ define fmuls(b,n,d) {
 }
 
 define timeconst(hz) {
-	print "/* Automatically generated by kernel/timeconst.bc */\n"
+	print "/* Automatically generated by kernel/time/timeconst.bc */\n"
 	print "/* Time conversion constants for HZ == ", hz, " */\n"
 	print "\n"
 
-- 
cgit v1.2.3


From 22b886dd1018093920c4250dee2a9a3cb7cff7b8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 4 Nov 2015 12:15:33 -0500
Subject: timers: Use proper base migration in add_timer_on()

Regardless of the previous CPU a timer was on, add_timer_on()
currently simply sets timer->flags to the new CPU.  As the caller must
be seeing the timer as idle, this is locally fine, but the timer
leaving the old base while unlocked can lead to race conditions as
follows.

Let's say timer was on cpu 0.

  cpu 0					cpu 1
  -----------------------------------------------------------------------------
  del_timer(timer) succeeds
					del_timer(timer)
					  lock_timer_base(timer) locks cpu_0_base
  add_timer_on(timer, 1)
    spin_lock(&cpu_1_base->lock)
    timer->flags set to cpu_1_base
    operates on @timer			  operates on @timer

This triggered with mod_delayed_work_on() which contains
"if (del_timer()) add_timer_on()" sequence eventually leading to the
following oops.

  BUG: unable to handle kernel NULL pointer dereference at           (null)
  IP: [<ffffffff810ca6e9>] detach_if_pending+0x69/0x1a0
  ...
  Workqueue: wqthrash wqthrash_workfunc [wqthrash]
  task: ffff8800172ca680 ti: ffff8800172d0000 task.ti: ffff8800172d0000
  RIP: 0010:[<ffffffff810ca6e9>]  [<ffffffff810ca6e9>] detach_if_pending+0x69/0x1a0
  ...
  Call Trace:
   [<ffffffff810cb0b4>] del_timer+0x44/0x60
   [<ffffffff8106e836>] try_to_grab_pending+0xb6/0x160
   [<ffffffff8106e913>] mod_delayed_work_on+0x33/0x80
   [<ffffffffa0000081>] wqthrash_workfunc+0x61/0x90 [wqthrash]
   [<ffffffff8106dba8>] process_one_work+0x1e8/0x650
   [<ffffffff8106e05e>] worker_thread+0x4e/0x450
   [<ffffffff810746af>] kthread+0xef/0x110
   [<ffffffff8185980f>] ret_from_fork+0x3f/0x70

Fix it by updating add_timer_on() to perform proper migration as
__mod_timer() does.

Reported-and-tested-by: Jeff Layton <jlayton@poochiereds.net>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Chris Worley <chris.worley@primarydata.com>
Cc: bfields@fieldses.org
Cc: Michael Skralivetsky <michael.skralivetsky@primarydata.com>
Cc: Trond Myklebust <trond.myklebust@primarydata.com>
Cc: Shaohua Li <shli@fb.com>
Cc: Jeff Layton <jlayton@poochiereds.net>
Cc: kernel-team@fb.com
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20151029103113.2f893924@tlielax.poochiereds.net
Link: http://lkml.kernel.org/r/20151104171533.GI5749@mtj.duckdns.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/timer.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 74591ba9474f..bbc5d1114583 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -977,13 +977,29 @@ EXPORT_SYMBOL(add_timer);
  */
 void add_timer_on(struct timer_list *timer, int cpu)
 {
-	struct tvec_base *base = per_cpu_ptr(&tvec_bases, cpu);
+	struct tvec_base *new_base = per_cpu_ptr(&tvec_bases, cpu);
+	struct tvec_base *base;
 	unsigned long flags;
 
 	timer_stats_timer_set_start_info(timer);
 	BUG_ON(timer_pending(timer) || !timer->function);
-	spin_lock_irqsave(&base->lock, flags);
-	timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
+
+	/*
+	 * If @timer was on a different CPU, it should be migrated with the
+	 * old base locked to prevent other operations proceeding with the
+	 * wrong base locked.  See lock_timer_base().
+	 */
+	base = lock_timer_base(timer, &flags);
+	if (base != new_base) {
+		timer->flags |= TIMER_MIGRATING;
+
+		spin_unlock(&base->lock);
+		base = new_base;
+		spin_lock(&base->lock);
+		WRITE_ONCE(timer->flags,
+			   (timer->flags & ~TIMER_BASEMASK) | cpu);
+	}
+
 	debug_activate(timer, timer->expires);
 	internal_add_timer(base, timer);
 	spin_unlock_irqrestore(&base->lock, flags);
-- 
cgit v1.2.3


From 79211c8ed19c055ca105502c8733800d442a0ae6 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Mon, 9 Nov 2015 14:58:13 -0800
Subject: remove abs64()

Switch everything to the new and more capable implementation of abs().
Mainly to give the new abs() a bit of a workout.

Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/time/clocksource.c | 2 +-
 kernel/time/timekeeping.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 0d8fe8b8f727..1347882d131e 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -217,7 +217,7 @@ static void clocksource_watchdog(unsigned long data)
 			continue;
 
 		/* Check the deviation from the watchdog clocksource. */
-		if (abs64(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
+		if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
 			pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable because the skew is too large:\n",
 				cs->name);
 			pr_warn("                      '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index b1356b7ae570..d563c1960302 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1614,7 +1614,7 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk,
 	negative = (tick_error < 0);
 
 	/* Sort out the magnitude of the correction */
-	tick_error = abs64(tick_error);
+	tick_error = abs(tick_error);
 	for (adj = 0; tick_error > interval; adj++)
 		tick_error >>= 1;
 
-- 
cgit v1.2.3