From cb08e0353c249a27aed10c6f60a13871ae449d33 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sun, 23 Jul 2017 00:03:43 +0200
Subject: PM / timekeeping: Print debug messages when requested

The messages printed by tk_debug_account_sleep_time() are basically
useful for system sleep debugging, so print them only when the other
debug messages from the core suspend/hibernate code are enabled.

While at it, make it clear that the messages from
tk_debug_account_sleep_time() are about timekeeping suspend
duration, because in general timekeeping may be suspeded and
resumed for multiple times during one system suspend-resume cycle.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/time/timekeeping_debug.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
index 38bc4d2208e8..0754cadfa9e6 100644
--- a/kernel/time/timekeeping_debug.c
+++ b/kernel/time/timekeeping_debug.c
@@ -19,6 +19,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/seq_file.h>
+#include <linux/suspend.h>
 #include <linux/time.h>
 
 #include "timekeeping_internal.h"
@@ -75,7 +76,7 @@ void tk_debug_account_sleep_time(struct timespec64 *t)
 	int bin = min(fls(t->tv_sec), NUM_BINS-1);
 
 	sleep_time_bin[bin]++;
-	printk_deferred(KERN_INFO "Suspended for %lld.%03lu seconds\n",
-			(s64)t->tv_sec, t->tv_nsec / NSEC_PER_MSEC);
+	pm_deferred_pr_dbg("Timekeeping suspended for %lld.%03lu seconds\n",
+			   (s64)t->tv_sec, t->tv_nsec / NSEC_PER_MSEC);
 }
 
-- 
cgit v1.2.3


From 34f41c0316ed52b0b44542491d89278efdaa70e4 Mon Sep 17 00:00:00 2001
From: Matija Glavinic Pecotic <matija.glavinic-pecotic.ext@nokia.com>
Date: Tue, 1 Aug 2017 09:11:52 +0200
Subject: timers: Fix overflow in get_next_timer_interrupt

For e.g. HZ=100, timer being 430 jiffies in the future, and 32 bit
unsigned int, there is an overflow on unsigned int right-hand side
of the expression which results with wrong values being returned.

Type cast the multiplier to 64bit to avoid that issue.

Fixes: 46c8f0b077a8 ("timers: Fix get_next_timer_interrupt() computation")
Signed-off-by: Matija Glavinic Pecotic <matija.glavinic-pecotic.ext@nokia.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Alexander Sverdlin <alexander.sverdlin@nokia.com>
Cc: khilman@baylibre.com
Cc: akpm@linux-foundation.org
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/a7900f04-2a21-c9fd-67be-ab334d459ee5@nokia.com
---
 kernel/time/timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 71ce3f4eead3..8f5d1bf18854 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1495,7 +1495,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
 		base->is_idle = false;
 	} else {
 		if (!is_max_delta)
-			expires = basem + (nextevt - basej) * TICK_NSEC;
+			expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
 		/*
 		 * If we expect to sleep more than a tick, mark the base idle:
 		 */
-- 
cgit v1.2.3


From a529bea8fa6b6dded6179c72d3385e0f7d0a4fde Mon Sep 17 00:00:00 2001
From: Stafford Horne <shorne@gmail.com>
Date: Wed, 28 Jun 2017 22:21:35 +0900
Subject: timekeeping: Use proper timekeeper for debug code

When CONFIG_DEBUG_TIMEKEEPING is enabled the timekeeping_check_update()
function will update status like last_warning and underflow_seen on the
timekeeper.

If there are issues found this state is used to rate limit the warnings
that get printed.

This rate limiting doesn't really really work if stored in real_tk as
the shadow timekeeper is overwritten onto real_tk at the end of every
update_wall_time() call, resetting last_warning and other statuses.

Fix rate limiting by using the shadow_timekeeper for
timekeeping_check_update().

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Miroslav Lichvar <mlichvar@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Stephen Boyd <stephen.boyd@linaro.org>
Fixes: commit 57d05a93ada7 ("time: Rework debugging variables so they aren't global")
Signed-off-by: Stafford Horne <shorne@gmail.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/timekeeping.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index cedafa008de5..8f5866981883 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -2066,7 +2066,7 @@ void update_wall_time(void)
 		goto out;
 
 	/* Do some additional sanity checking */
-	timekeeping_check_update(real_tk, offset);
+	timekeeping_check_update(tk, offset);
 
 	/*
 	 * With NO_HZ we may have to accumulate many cycle_intervals
-- 
cgit v1.2.3


From 47b4a457e4cc816b3fdd2ee55c65fda8ea6de051 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 5 Jul 2017 14:08:35 +0200
Subject: alarmtimer: Fix unavailable wake-up source in sysfs

Currently the alarmtimer registers a wake-up source unconditionally,
regardless of the system having a (wake-up capable) RTC or not.
Hence the alarmtimer will always show up in
/sys/kernel/debug/wakeup_sources, even if it is not available, and thus
cannot be a wake-up source.

To fix this, postpone registration until a wake-up capable RTC device is
added.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Miroslav Lichvar <mlichvar@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Stephen Boyd <stephen.boyd@linaro.org>
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/alarmtimer.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 0b8ff7d257ea..73a2b476e59f 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -56,9 +56,9 @@ static ktime_t freezer_delta;
 static DEFINE_SPINLOCK(freezer_delta_lock);
 #endif
 
+#ifdef CONFIG_RTC_CLASS
 static struct wakeup_source *ws;
 
-#ifdef CONFIG_RTC_CLASS
 /* rtc timer and device for setting alarm wakeups at suspend */
 static struct rtc_timer		rtctimer;
 static struct rtc_device	*rtcdev;
@@ -89,6 +89,7 @@ static int alarmtimer_rtc_add_device(struct device *dev,
 {
 	unsigned long flags;
 	struct rtc_device *rtc = to_rtc_device(dev);
+	struct wakeup_source *__ws;
 
 	if (rtcdev)
 		return -EBUSY;
@@ -98,13 +99,20 @@ static int alarmtimer_rtc_add_device(struct device *dev,
 	if (!device_may_wakeup(rtc->dev.parent))
 		return -1;
 
+	__ws = wakeup_source_register("alarmtimer");
+
 	spin_lock_irqsave(&rtcdev_lock, flags);
 	if (!rtcdev) {
 		rtcdev = rtc;
 		/* hold a reference so it doesn't go away */
 		get_device(dev);
+		ws = __ws;
+		__ws = NULL;
 	}
 	spin_unlock_irqrestore(&rtcdev_lock, flags);
+
+	wakeup_source_unregister(__ws);
+
 	return 0;
 }
 
@@ -860,7 +868,6 @@ static int __init alarmtimer_init(void)
 		error = PTR_ERR(pdev);
 		goto out_drv;
 	}
-	ws = wakeup_source_register("alarmtimer");
 	return 0;
 
 out_drv:
-- 
cgit v1.2.3


From 3cf294962df8fcde710eb5e762e0929e2ba49947 Mon Sep 17 00:00:00 2001
From: Krzysztof Opasiak <k.opasiak@samsung.com>
Date: Wed, 5 Jul 2017 19:25:48 +0200
Subject: posix-cpu-timers: Use dedicated helper to access rlimit values

Use rlimit() and rlimit_max() helper instead of manually writing
whole chain from task to rlimit value

Signed-off-by: Krzysztof Opasiak <k.opasiak@samsung.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170705172548.7911-1-k.opasiak@samsung.com
---
 kernel/time/posix-cpu-timers.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index a3bd5dbe0dc4..8585ad6e472a 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -799,7 +799,6 @@ static void check_thread_timers(struct task_struct *tsk,
 				struct list_head *firing)
 {
 	struct list_head *timers = tsk->cpu_timers;
-	struct signal_struct *const sig = tsk->signal;
 	struct task_cputime *tsk_expires = &tsk->cputime_expires;
 	u64 expires;
 	unsigned long soft;
@@ -823,10 +822,9 @@ static void check_thread_timers(struct task_struct *tsk,
 	/*
 	 * Check for the special case thread timers.
 	 */
-	soft = READ_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_cur);
+	soft = task_rlimit(tsk, RLIMIT_RTTIME);
 	if (soft != RLIM_INFINITY) {
-		unsigned long hard =
-			READ_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_max);
+		unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
 
 		if (hard != RLIM_INFINITY &&
 		    tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
@@ -847,7 +845,8 @@ static void check_thread_timers(struct task_struct *tsk,
 			 */
 			if (soft < hard) {
 				soft += USEC_PER_SEC;
-				sig->rlim[RLIMIT_RTTIME].rlim_cur = soft;
+				tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur =
+					soft;
 			}
 			if (print_fatal_signals) {
 				pr_info("RT Watchdog Timeout (soft): %s[%d]\n",
@@ -938,11 +937,10 @@ static void check_process_timers(struct task_struct *tsk,
 			 SIGPROF);
 	check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime,
 			 SIGVTALRM);
-	soft = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
+	soft = task_rlimit(tsk, RLIMIT_CPU);
 	if (soft != RLIM_INFINITY) {
 		unsigned long psecs = div_u64(ptime, NSEC_PER_SEC);
-		unsigned long hard =
-			READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_max);
+		unsigned long hard = task_rlimit_max(tsk, RLIMIT_CPU);
 		u64 x;
 		if (psecs >= hard) {
 			/*
-- 
cgit v1.2.3


From 2fe59f507a65dbd734b990a11ebc7488f6f87a24 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 22 Aug 2017 18:43:48 +1000
Subject: timers: Fix excessive granularity of new timers after a nohz idle

When a timer base is idle, it is forwarded when a new timer is added
to ensure that granularity does not become excessive. When not idle,
the timer tick is expected to increment the base.

However there are several problems:

- If an existing timer is modified, the base is forwarded only after
  the index is calculated.

- The base is not forwarded by add_timer_on.

- There is a window after a timer is restarted from a nohz idle, after
  it is marked not-idle and before the timer tick on this CPU, where a
  timer may be added but the ancient base does not get forwarded.

These result in excessive granularity (a 1 jiffy timeout can blow out
to 100s of jiffies), which cause the rcu lockup detector to trigger,
among other things.

Fix this by keeping track of whether the timer base has been idle
since it was last run or forwarded, and if so then forward it before
adding a new timer.

There is still a case where mod_timer optimises the case of a pending
timer mod with the same expiry time, where the timer can see excessive
granularity relative to the new, shorter interval. A comment is added,
but it's not changed because it is an important fastpath for
networking.

This has been tested and found to fix the RCU softlockup messages.

Testing was also done with tracing to measure requested versus
achieved wakeup latencies for all non-deferrable timers in an idle
system (with no lockup watchdogs running). Wakeup latency relative to
absolute latency is calculated (note this suffers from round-up skew
at low absolute times) and analysed:

             max     avg      std
upstream   506.0    1.20     4.68
patched      2.0    1.08     0.15

The bug was noticed due to the lockup detector Kconfig changes
dropping it out of people's .configs and resulting in larger base
clk skew When the lockup detectors are enabled, no CPU can go idle for
longer than 4 seconds, which limits the granularity errors.
Sub-optimal timer behaviour is observable on a smaller scale in that
case:

	     max     avg      std
upstream     9.0    1.05     0.19
patched      2.0    1.04     0.11

Fixes: Fixes: a683f390b93f ("timers: Forward the wheel clock whenever possible")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Tested-by: David Miller <davem@davemloft.net>
Cc: dzickus@redhat.com
Cc: sfr@canb.auug.org.au
Cc: mpe@ellerman.id.au
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: linuxarm@huawei.com
Cc: abdhalee@linux.vnet.ibm.com
Cc: John Stultz <john.stultz@linaro.org>
Cc: akpm@linux-foundation.org
Cc: paulmck@linux.vnet.ibm.com
Cc: torvalds@linux-foundation.org
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20170822084348.21436-1-npiggin@gmail.com
---
 kernel/time/timer.c | 50 +++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 41 insertions(+), 9 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 8f5d1bf18854..f2674a056c26 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -203,6 +203,7 @@ struct timer_base {
 	bool			migration_enabled;
 	bool			nohz_active;
 	bool			is_idle;
+	bool			must_forward_clk;
 	DECLARE_BITMAP(pending_map, WHEEL_SIZE);
 	struct hlist_head	vectors[WHEEL_SIZE];
 } ____cacheline_aligned;
@@ -856,13 +857,19 @@ get_target_base(struct timer_base *base, unsigned tflags)
 
 static inline void forward_timer_base(struct timer_base *base)
 {
-	unsigned long jnow = READ_ONCE(jiffies);
+	unsigned long jnow;
 
 	/*
-	 * We only forward the base when it's idle and we have a delta between
-	 * base clock and jiffies.
+	 * We only forward the base when we are idle or have just come out of
+	 * idle (must_forward_clk logic), and have a delta between base clock
+	 * and jiffies. In the common case, run_timers will take care of it.
 	 */
-	if (!base->is_idle || (long) (jnow - base->clk) < 2)
+	if (likely(!base->must_forward_clk))
+		return;
+
+	jnow = READ_ONCE(jiffies);
+	base->must_forward_clk = base->is_idle;
+	if ((long)(jnow - base->clk) < 2)
 		return;
 
 	/*
@@ -938,6 +945,11 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
 	 * same array bucket then just return:
 	 */
 	if (timer_pending(timer)) {
+		/*
+		 * The downside of this optimization is that it can result in
+		 * larger granularity than you would get from adding a new
+		 * timer with this expiry.
+		 */
 		if (timer->expires == expires)
 			return 1;
 
@@ -948,6 +960,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
 		 * dequeue/enqueue dance.
 		 */
 		base = lock_timer_base(timer, &flags);
+		forward_timer_base(base);
 
 		clk = base->clk;
 		idx = calc_wheel_index(expires, clk);
@@ -964,6 +977,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
 		}
 	} else {
 		base = lock_timer_base(timer, &flags);
+		forward_timer_base(base);
 	}
 
 	ret = detach_if_pending(timer, base, false);
@@ -991,12 +1005,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
 			raw_spin_lock(&base->lock);
 			WRITE_ONCE(timer->flags,
 				   (timer->flags & ~TIMER_BASEMASK) | base->cpu);
+			forward_timer_base(base);
 		}
 	}
 
-	/* Try to forward a stale timer base clock */
-	forward_timer_base(base);
-
 	timer->expires = expires;
 	/*
 	 * If 'idx' was calculated above and the base time did not advance
@@ -1112,6 +1124,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
 		WRITE_ONCE(timer->flags,
 			   (timer->flags & ~TIMER_BASEMASK) | cpu);
 	}
+	forward_timer_base(base);
 
 	debug_activate(timer, timer->expires);
 	internal_add_timer(base, timer);
@@ -1497,10 +1510,16 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
 		if (!is_max_delta)
 			expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
 		/*
-		 * If we expect to sleep more than a tick, mark the base idle:
+		 * If we expect to sleep more than a tick, mark the base idle.
+		 * Also the tick is stopped so any added timer must forward
+		 * the base clk itself to keep granularity small. This idle
+		 * logic is only maintained for the BASE_STD base, deferrable
+		 * timers may still see large granularity skew (by design).
 		 */
-		if ((expires - basem) > TICK_NSEC)
+		if ((expires - basem) > TICK_NSEC) {
+			base->must_forward_clk = true;
 			base->is_idle = true;
+		}
 	}
 	raw_spin_unlock(&base->lock);
 
@@ -1611,6 +1630,19 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
 {
 	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
 
+	/*
+	 * must_forward_clk must be cleared before running timers so that any
+	 * timer functions that call mod_timer will not try to forward the
+	 * base. idle trcking / clock forwarding logic is only used with
+	 * BASE_STD timers.
+	 *
+	 * The deferrable base does not do idle tracking at all, so we do
+	 * not forward it. This can result in very large variations in
+	 * granularity for deferrable timers, but they can be deferred for
+	 * long periods due to idle.
+	 */
+	base->must_forward_clk = false;
+
 	__run_timers(base);
 	if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active)
 		__run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
-- 
cgit v1.2.3


From 0bcdc0987cce9880436b70836c6a92bb8e744fd1 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Fri, 25 Aug 2017 15:57:04 -0700
Subject: time: Fix ktime_get_raw() incorrect base accumulation

In comqit fc6eead7c1e2 ("time: Clean up CLOCK_MONOTONIC_RAW time
handling"), the following code got mistakenly added to the update of the
raw timekeeper:

 /* Update the monotonic raw base */
 seconds = tk->raw_sec;
 nsec = (u32)(tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift);
 tk->tkr_raw.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);

Which adds the raw_sec value and the shifted down raw xtime_nsec to the
base value.

But the read function adds the shifted down tk->tkr_raw.xtime_nsec value
another time, The result of this is that ktime_get_raw() users (which are
all internal users) see the raw time move faster then it should (the rate
at which can vary with the current size of tkr_raw.xtime_nsec), which has
resulted in at least problems with graphics rendering performance.

The change tried to match the monotonic base update logic:

 seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
 nsec = (u32) tk->wall_to_monotonic.tv_nsec;
 tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);

Which adds the wall_to_monotonic.tv_nsec value, but not the
tk->tkr_mono.xtime_nsec value to the base.

To fix this, simplify the tkr_raw.base accumulation to only accumulate the
raw_sec portion, and do not include the tkr_raw.xtime_nsec portion, which
will be added at read time.

Fixes: fc6eead7c1e2 ("time: Clean up CLOCK_MONOTONIC_RAW time handling")
Reported-and-tested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Kevin Brodsky <kevin.brodsky@arm.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Stephen Boyd <stephen.boyd@linaro.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Miroslav Lichvar <mlichvar@redhat.com>
Cc: Daniel Mentz <danielmentz@google.com>
Link: http://lkml.kernel.org/r/1503701824-1645-1-git-send-email-john.stultz@linaro.org
---
 kernel/time/timekeeping.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index cedafa008de5..7e7e61c00d61 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -637,9 +637,7 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
 	tk->ktime_sec = seconds;
 
 	/* Update the monotonic raw base */
-	seconds = tk->raw_sec;
-	nsec = (u32)(tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift);
-	tk->tkr_raw.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
+	tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC);
 }
 
 /* must hold timekeeper_lock */
-- 
cgit v1.2.3


From 51218298a25e6942957c5595f2abf130d47d5df9 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Mon, 21 Aug 2017 00:01:46 +0200
Subject: alarmtimer: Ensure RTC module is not unloaded

When registering the rtc device to be used to handle alarm timers,
get_device is used to ensure the device doesn't go away but the module can
still be unloaded.

Call try_module_get to ensure the rtc driver will not go away.

Reported-and-tested-by: Michal Simek <monstr@monstr.eu>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: John Stultz <john.stultz@linaro.org>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Link: http://lkml.kernel.org/r/20170820220146.30969-1-alexandre.belloni@free-electrons.com
---
 kernel/time/alarmtimer.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'kernel/time')

diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 73a2b476e59f..ec09ce9a6012 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -28,6 +28,7 @@
 #include <linux/workqueue.h>
 #include <linux/freezer.h>
 #include <linux/compat.h>
+#include <linux/module.h>
 
 #include "posix-timers.h"
 
@@ -103,6 +104,11 @@ static int alarmtimer_rtc_add_device(struct device *dev,
 
 	spin_lock_irqsave(&rtcdev_lock, flags);
 	if (!rtcdev) {
+		if (!try_module_get(rtc->owner)) {
+			spin_unlock_irqrestore(&rtcdev_lock, flags);
+			return -1;
+		}
+
 		rtcdev = rtc;
 		/* hold a reference so it doesn't go away */
 		get_device(dev);
-- 
cgit v1.2.3


From a2d818030135c293f878fbb772cf40e7a14c5acc Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Fri, 8 Sep 2017 16:17:19 -0700
Subject: drivers/pps: aesthetic tweaks to PPS-related content

Collection of aesthetic adjustments to various PPS-related files,
directories and Documentation, some quite minor just for the sake of
consistency, including:

 * Updated example of pps device tree node (courtesy Rodolfo G.)
 * "PPS-API" -> "PPS API"
 * "pps_source_info_s" -> "pps_source_info"
 * "ktimer driver" -> "pps-ktimer driver"
 * "ppstest /dev/pps0" -> "ppstest /dev/pps1" to match example
 * Add missing PPS-related entries to MAINTAINERS file
 * Other trivialities

Link: http://lkml.kernel.org/r/alpine.LFD.2.20.1708261048220.8106@localhost.localdomain
Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Acked-by: Rodolfo Giometti <giometti@enneenne.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/time/timekeeping.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 8ea4fb315719..2cafb49aa65e 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -2316,7 +2316,7 @@ void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts)
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 }
 EXPORT_SYMBOL(hardpps);
-#endif
+#endif /* CONFIG_NTP_PPS */
 
 /**
  * xtime_update() - advances the timekeeping infrastructure
-- 
cgit v1.2.3


From 5df32107f609c1f621bcdac0a685c23677ef671e Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Mon, 28 Aug 2017 08:21:53 -0400
Subject: timekeeping: Make fast accessors return 0 before timekeeping is
 initialized

printk timestamps will be extended to include mono and boot time by using
the fast timekeeping accessors ktime_get_mono|boot_fast_ns().  The
functions can return garbage before timekeeping is initialized resulting in
garbage timestamps.

Initialize the fast timekeepers with dummy clocks which guarantee a 0
readout up to timekeeping_init().

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/1503922914-10660-2-git-send-email-prarit@redhat.com
---
 kernel/time/timekeeping.c | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 2cafb49aa65e..6a92794427c9 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -60,8 +60,27 @@ struct tk_fast {
 	struct tk_read_base	base[2];
 };
 
-static struct tk_fast tk_fast_mono ____cacheline_aligned;
-static struct tk_fast tk_fast_raw  ____cacheline_aligned;
+/* Suspend-time cycles value for halted fast timekeeper. */
+static u64 cycles_at_suspend;
+
+static u64 dummy_clock_read(struct clocksource *cs)
+{
+	return cycles_at_suspend;
+}
+
+static struct clocksource dummy_clock = {
+	.read = dummy_clock_read,
+};
+
+static struct tk_fast tk_fast_mono ____cacheline_aligned = {
+	.base[0] = { .clock = &dummy_clock, },
+	.base[1] = { .clock = &dummy_clock, },
+};
+
+static struct tk_fast tk_fast_raw  ____cacheline_aligned = {
+	.base[0] = { .clock = &dummy_clock, },
+	.base[1] = { .clock = &dummy_clock, },
+};
 
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
@@ -477,18 +496,6 @@ u64 notrace ktime_get_boot_fast_ns(void)
 }
 EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
 
-/* Suspend-time cycles value for halted fast timekeeper. */
-static u64 cycles_at_suspend;
-
-static u64 dummy_clock_read(struct clocksource *cs)
-{
-	return cycles_at_suspend;
-}
-
-static struct clocksource dummy_clock = {
-	.read = dummy_clock_read,
-};
-
 /**
  * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
  * @tk: Timekeeper to snapshot.
-- 
cgit v1.2.3


From 4c3711d7fb4763c63b2654f2d07cbe21ca5aadd4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 31 Aug 2017 17:12:48 +0200
Subject: timekeeping: Provide NMI safe access to clock realtime

The configurable printk timestamping wants access to clock realtime. Right
now there is no ktime_get_real_fast_ns() accessor because reading the
monotonic base and the realtime offset cannot be done atomically. Contrary
to boot time this offset can change during runtime and cause half updated
readouts.

struct tk_read_base was fully packed when the fast timekeeper access was
implemented. commit ceea5e3771ed ("time: Fix clock->read(clock) race around
clocksource changes") removed the 'read' function pointer from the
structure, but of course left the comment stale.

So now the structure can fit a new 64bit member w/o violating the cache
line constraints.

Add real_base to tk_read_base and update it in the fast timekeeper update
sequence.

Implement an accessor which follows the same scheme as the accessor to
clock monotonic, but uses the new real_base to access clock real time.

The runtime overhead for updating real_base is minimal as it just adds two
cache hot values and stores them into an already dirtied cache line along
with the other fast timekeeper updates.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead,org>
Link: https://lkml.kernel.org/r/1505757060-2004-3-git-send-email-prarit@redhat.com
---
 kernel/time/timekeeping.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 6a92794427c9..8af77006e937 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -496,6 +496,39 @@ u64 notrace ktime_get_boot_fast_ns(void)
 }
 EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
 
+
+/*
+ * See comment for __ktime_get_fast_ns() vs. timestamp ordering
+ */
+static __always_inline u64 __ktime_get_real_fast_ns(struct tk_fast *tkf)
+{
+	struct tk_read_base *tkr;
+	unsigned int seq;
+	u64 now;
+
+	do {
+		seq = raw_read_seqcount_latch(&tkf->seq);
+		tkr = tkf->base + (seq & 0x01);
+		now = ktime_to_ns(tkr->base_real);
+
+		now += timekeeping_delta_to_ns(tkr,
+				clocksource_delta(
+					tk_clock_read(tkr),
+					tkr->cycle_last,
+					tkr->mask));
+	} while (read_seqcount_retry(&tkf->seq, seq));
+
+	return now;
+}
+
+/**
+ * ktime_get_real_fast_ns: - NMI safe and fast access to clock realtime.
+ */
+u64 ktime_get_real_fast_ns(void)
+{
+	return __ktime_get_real_fast_ns(&tk_fast_mono);
+}
+
 /**
  * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
  * @tk: Timekeeper to snapshot.
@@ -514,6 +547,7 @@ static void halt_fast_timekeeper(struct timekeeper *tk)
 	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
 	cycles_at_suspend = tk_clock_read(tkr);
 	tkr_dummy.clock = &dummy_clock;
+	tkr_dummy.base_real = tkr->base + tk->offs_real;
 	update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
 
 	tkr = &tk->tkr_raw;
@@ -661,6 +695,7 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
 	update_vsyscall(tk);
 	update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
 
+	tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real;
 	update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
 	update_fast_timekeeper(&tk->tkr_raw,  &tk_fast_raw);
 
-- 
cgit v1.2.3


From 58e1177b4cd10b0d358faf7d7ebb3779f98bc3ea Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 4 Oct 2017 16:26:55 -0700
Subject: timer: Convert schedule_timeout() to use from_timer()

In preparation for unconditionally passing the struct timer_list pointer to
all timer callbacks, switch to using the new from_timer() helper and passing
the timer pointer explicitly. Since this special timer is on the stack, it
needs to have a wrapper structure to carry state once .data is eliminated.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mips@linux-mips.org
Cc: Petr Mladek <pmladek@suse.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Sebastian Reichel <sre@kernel.org>
Cc: Kalle Valo <kvalo@qca.qualcomm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: linux1394-devel@lists.sourceforge.net
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: linux-s390@vger.kernel.org
Cc: linux-wireless@vger.kernel.org
Cc: "James E.J. Bottomley" <jejb@linux.vnet.ibm.com>
Cc: Wim Van Sebroeck <wim@iguana.be>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Ursula Braun <ubraun@linux.vnet.ibm.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Harish Patil <harish.patil@cavium.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Manish Chopra <manish.chopra@cavium.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: linux-pm@vger.kernel.org
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Mark Gross <mark.gross@intel.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: linux-watchdog@vger.kernel.org
Cc: linux-scsi@vger.kernel.org
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Stefan Richter <stefanr@s5r6.in-berlin.de>
Cc: Michael Reed <mdr@sgi.com>
Cc: netdev@vger.kernel.org
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Link: https://lkml.kernel.org/r/1507159627-127660-2-git-send-email-keescook@chromium.org
---
 kernel/time/timer.c | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index f2674a056c26..38613ced2324 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1668,9 +1668,20 @@ void run_local_timers(void)
 	raise_softirq(TIMER_SOFTIRQ);
 }
 
-static void process_timeout(unsigned long __data)
+/*
+ * Since schedule_timeout()'s timer is defined on the stack, it must store
+ * the target task on the stack as well.
+ */
+struct process_timer {
+	struct timer_list timer;
+	struct task_struct *task;
+};
+
+static void process_timeout(struct timer_list *t)
 {
-	wake_up_process((struct task_struct *)__data);
+	struct process_timer *timeout = from_timer(timeout, t, timer);
+
+	wake_up_process(timeout->task);
 }
 
 /**
@@ -1704,7 +1715,7 @@ static void process_timeout(unsigned long __data)
  */
 signed long __sched schedule_timeout(signed long timeout)
 {
-	struct timer_list timer;
+	struct process_timer timer;
 	unsigned long expire;
 
 	switch (timeout)
@@ -1738,13 +1749,14 @@ signed long __sched schedule_timeout(signed long timeout)
 
 	expire = timeout + jiffies;
 
-	setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
-	__mod_timer(&timer, expire, false);
+	timer.task = current;
+	timer_setup_on_stack(&timer.timer, process_timeout, 0);
+	__mod_timer(&timer.timer, expire, false);
 	schedule();
-	del_singleshot_timer_sync(&timer);
+	del_singleshot_timer_sync(&timer.timer);
 
 	/* Remove the timer from the object tracker */
-	destroy_timer_on_stack(&timer);
+	destroy_timer_on_stack(&timer.timer);
 
 	timeout = expire - jiffies;
 
-- 
cgit v1.2.3


From 62cb1188ed86a9cf082fd2f757d4dd9b54741f24 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 29 Aug 2017 15:07:54 +0200
Subject: sched/idle: Move quiet_vmstate() into the NOHZ code

quiet_vmstat() is an expensive function that only makes sense when we
go into NOHZ.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: aubrey.li@linux.intel.com
Cc: cl@linux.com
Cc: fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/tick-sched.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel/time')

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index c7a899c5ce64..7b258c59d78a 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -27,6 +27,7 @@
 #include <linux/irq_work.h>
 #include <linux/posix-timers.h>
 #include <linux/context_tracking.h>
+#include <linux/mm.h>
 
 #include <asm/irq_regs.h>
 
@@ -787,6 +788,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	if (!ts->tick_stopped) {
 		calc_load_nohz_start();
 		cpu_load_update_nohz_start();
+		quiet_vmstat();
 
 		ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
 		ts->tick_stopped = 1;
-- 
cgit v1.2.3


From fe460423438b62eb7440d994ab19a9f444e6280d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 13 Oct 2017 20:29:38 +0200
Subject: posix-stubs: Use get_timespec64() and put_timespec64()

This is a follow-up to commit 5c4994102fb5 ("posix-timers: Use
get_timespec64() and put_timespec64()"), which left two system call using
copy_from_user()/copy_to_user().

Change them as well for consistency.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: y2038@lists.linaro.org
Cc: John Stultz <john.stultz@linaro.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Deepa Dinamani <deepa.kernel@gmail.com>
Link: https://lkml.kernel.org/r/20171013183009.3442318-1-arnd@arndb.de
---
 kernel/time/posix-stubs.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c
index 06f34feb635e..b258bee13b02 100644
--- a/kernel/time/posix-stubs.c
+++ b/kernel/time/posix-stubs.c
@@ -117,8 +117,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
 		const struct timespec __user *, rqtp,
 		struct timespec __user *, rmtp)
 {
-	struct timespec64 t64;
-	struct timespec t;
+	struct timespec64 t;
 
 	switch (which_clock) {
 	case CLOCK_REALTIME:
@@ -129,16 +128,15 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
 		return -EINVAL;
 	}
 
-	if (copy_from_user(&t, rqtp, sizeof (struct timespec)))
+	if (get_timespec64(&t, rqtp))
 		return -EFAULT;
-	t64 = timespec_to_timespec64(t);
-	if (!timespec64_valid(&t64))
+	if (!timespec64_valid(&t))
 		return -EINVAL;
 	if (flags & TIMER_ABSTIME)
 		rmtp = NULL;
 	current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
 	current->restart_block.nanosleep.rmtp = rmtp;
-	return hrtimer_nanosleep(&t64, flags & TIMER_ABSTIME ?
+	return hrtimer_nanosleep(&t, flags & TIMER_ABSTIME ?
 				 HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
 				 which_clock);
 }
@@ -203,8 +201,7 @@ COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags,
 		       struct compat_timespec __user *, rqtp,
 		       struct compat_timespec __user *, rmtp)
 {
-	struct timespec64 t64;
-	struct timespec t;
+	struct timespec64 t;
 
 	switch (which_clock) {
 	case CLOCK_REALTIME:
@@ -215,16 +212,15 @@ COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags,
 		return -EINVAL;
 	}
 
-	if (compat_get_timespec(&t, rqtp))
+	if (compat_get_timespec64(&t, rqtp))
 		return -EFAULT;
-	t64 = timespec_to_timespec64(t);
-	if (!timespec64_valid(&t64))
+	if (!timespec64_valid(&t))
 		return -EINVAL;
 	if (flags & TIMER_ABSTIME)
 		rmtp = NULL;
 	current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
 	current->restart_block.nanosleep.compat_rmtp = rmtp;
-	return hrtimer_nanosleep(&t64, flags & TIMER_ABSTIME ?
+	return hrtimer_nanosleep(&t, flags & TIMER_ABSTIME ?
 				 HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
 				 which_clock);
 }
-- 
cgit v1.2.3


From 4eb1bca1793385b8caff4b2e1f19b31a013dd1ec Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 13 Oct 2017 20:34:35 +0200
Subject: time: Use do_settimeofday64() internally

do_settimeofday() is a wrapper around do_settimeofday64(), so that function
can be called directly. The wrapper can be removed once the last user is
gone.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: y2038@lists.linaro.org
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Deepa Dinamani <deepa.kernel@gmail.com>
Link: https://lkml.kernel.org/r/20171013183452.3635956-1-arnd@arndb.de
---
 kernel/time/time.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/time.c b/kernel/time/time.c
index 44a8c1402133..cfe3d3e4679f 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -82,7 +82,7 @@ SYSCALL_DEFINE1(time, time_t __user *, tloc)
 
 SYSCALL_DEFINE1(stime, time_t __user *, tptr)
 {
-	struct timespec tv;
+	struct timespec64 tv;
 	int err;
 
 	if (get_user(tv.tv_sec, tptr))
@@ -90,11 +90,11 @@ SYSCALL_DEFINE1(stime, time_t __user *, tptr)
 
 	tv.tv_nsec = 0;
 
-	err = security_settime(&tv, NULL);
+	err = security_settime64(&tv, NULL);
 	if (err)
 		return err;
 
-	do_settimeofday(&tv);
+	do_settimeofday64(&tv);
 	return 0;
 }
 
@@ -122,7 +122,7 @@ COMPAT_SYSCALL_DEFINE1(time, compat_time_t __user *, tloc)
 
 COMPAT_SYSCALL_DEFINE1(stime, compat_time_t __user *, tptr)
 {
-	struct timespec tv;
+	struct timespec64 tv;
 	int err;
 
 	if (get_user(tv.tv_sec, tptr))
@@ -130,11 +130,11 @@ COMPAT_SYSCALL_DEFINE1(stime, compat_time_t __user *, tptr)
 
 	tv.tv_nsec = 0;
 
-	err = security_settime(&tv, NULL);
+	err = security_settime64(&tv, NULL);
 	if (err)
 		return err;
 
-	do_settimeofday(&tv);
+	do_settimeofday64(&tv);
 	return 0;
 }
 
-- 
cgit v1.2.3


From c310ce4dcb9df9b2f1be82caff7dae609fe53f72 Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Date: Sun, 8 Oct 2017 20:55:59 -0700
Subject: timers: Avoid an unnecessary iteration in __run_timers()

If the base clock is behind jiffies in the soft irq expiry code then the
next timer is retrieved by get_next_timer_interrupt() to avoid incrementing
base clock one by one. If the next timer interrupt is past current jiffies
then the base clock is set to jiffies - 1. At the call site this is
incremented and another iteration through the expiry loop is executed which
checks empty hash buckets.

That's a pointless excercise because it's already known that the next timer
is past jiffies.

Set the base clock in that case to jiffies directly so it gets incremented
to jiffies + 1 at the call site resulting in immediate termination of the
expiry loop.

[ tglx: Massaged changelog and added comment to the code ]

Signed-off-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Cc: Joe Jin <joe.jin@oracle.com>
Cc: sboyd@codeaurora.org
Cc: Srinivas Reddy Eeda <srinivas.eeda@oracle.com>
Cc: john.stultz@linaro.org
Link: https://lkml.kernel.org/r/7086a857-f90c-4616-bbe8-f7696f21626c@default
---
 kernel/time/timer.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 38613ced2324..ee1a88d8afb2 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1560,8 +1560,11 @@ static int collect_expired_timers(struct timer_base *base,
 		 * jiffies, otherwise forward to the next expiry time:
 		 */
 		if (time_after(next, jiffies)) {
-			/* The call site will increment clock! */
-			base->clk = jiffies - 1;
+			/*
+			 * The call site will increment base->clk and then
+			 * terminate the expiry loop immediately.
+			 */
+			base->clk = jiffies;
 			return 0;
 		}
 		base->clk = next;
-- 
cgit v1.2.3


From ba16490eac146ebb178017e5de3d61c645552fab Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 18 Oct 2017 16:10:19 +0200
Subject: timer: Convert stub timer to timer_setup()

In preparation for unconditionally passing the struct timer_list pointer
to all timer callbacks, switch to using the new timer_setup() and
from_timer() to pass the timer pointer explicitly.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Kees Cook <keescook@chromium.org>
---
 kernel/time/timer.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index ee1a88d8afb2..fbb1f85327bf 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -610,7 +610,7 @@ static bool timer_fixup_init(void *addr, enum debug_obj_state state)
 }
 
 /* Stub timer callback for improperly used timers. */
-static void stub_timer(unsigned long data)
+static void stub_timer(struct timer_list *unused)
 {
 	WARN_ON(1);
 }
@@ -626,7 +626,7 @@ static bool timer_fixup_activate(void *addr, enum debug_obj_state state)
 
 	switch (state) {
 	case ODEBUG_STATE_NOTAVAILABLE:
-		setup_timer(timer, stub_timer, 0);
+		timer_setup(timer, stub_timer, 0);
 		return true;
 
 	case ODEBUG_STATE_ACTIVE:
@@ -665,7 +665,7 @@ static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state)
 
 	switch (state) {
 	case ODEBUG_STATE_NOTAVAILABLE:
-		setup_timer(timer, stub_timer, 0);
+		timer_setup(timer, stub_timer, 0);
 		return true;
 	default:
 		return false;
-- 
cgit v1.2.3


From 3a29ddb1c5986a6d3f941bfb1f434105203ce7f6 Mon Sep 17 00:00:00 2001
From: James Hogan <jhogan@kernel.org>
Date: Thu, 19 Oct 2017 15:17:23 +0100
Subject: clockevents: Retry programming min delta up to 10 times

When CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=n, the call path
hrtimer_reprogram -> clockevents_program_event ->
clockevents_program_min_delta will not retry if the clock event driver
returns -ETIME.

If the driver could not satisfy the program_min_delta for any reason, the
lack of a retry means the CPU may not receive a tick interrupt, potentially
until the counter does a full period. This leads to rcu_sched timeout
messages as the stalled CPU is detected by other CPUs, and other issues if
the CPU is holding locks or other resources at the point at which it
stalls.

There have been a couple of observed mechanisms through which a clock event
driver could not satisfy the requested min_delta and return -ETIME.

With the MIPS GIC driver, the shared execution resource within MT cores
means inconventient latency due to execution of instructions from other
hardware threads in the core, within gic_next_event, can result in an event
being set in the past.

Additionally under virtualisation it is possible to get unexpected latency
during a clockevent device's set_next_event() callback which can make it
return -ETIME even for a delta based on min_delta_ns.

It isn't appropriate to use MIN_ADJUST in the virtualisation case as
occasional hypervisor induced high latency will cause min_delta_ns to
quickly increase to the maximum.

Instead, borrow the retry pattern from the MIN_ADJUST case, but without
making adjustments. Retry up to 10 times, each time increasing the
attempted delta by min_delta, before giving up.

[ Matt: Reworked the loop and made retry increase the delta. ]

Signed-off-by: James Hogan <jhogan@kernel.org>
Signed-off-by: Matt Redfearn <matt.redfearn@mips.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-mips@linux-mips.org
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: "Martin Schwidefsky" <schwidefsky@de.ibm.com>
Cc: James Hogan <james.hogan@mips.com>
Link: https://lkml.kernel.org/r/1508422643-6075-1-git-send-email-matt.redfearn@mips.com
---
 kernel/time/clockevents.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 4237e0744e26..16c027e9cc73 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -280,17 +280,22 @@ static int clockevents_program_min_delta(struct clock_event_device *dev)
 static int clockevents_program_min_delta(struct clock_event_device *dev)
 {
 	unsigned long long clc;
-	int64_t delta;
+	int64_t delta = 0;
+	int i;
 
-	delta = dev->min_delta_ns;
-	dev->next_event = ktime_add_ns(ktime_get(), delta);
+	for (i = 0; i < 10; i++) {
+		delta += dev->min_delta_ns;
+		dev->next_event = ktime_add_ns(ktime_get(), delta);
 
-	if (clockevent_state_shutdown(dev))
-		return 0;
+		if (clockevent_state_shutdown(dev))
+			return 0;
 
-	dev->retries++;
-	clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
-	return dev->set_next_event((unsigned long) clc, dev);
+		dev->retries++;
+		clc = ((unsigned long long) delta * dev->mult) >> dev->shift;
+		if (dev->set_next_event((unsigned long) clc, dev) == 0)
+			return 0;
+	}
+	return -ETIME;
 }
 
 #endif /* CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST */
-- 
cgit v1.2.3


From 7863406143d8bbbbda07a61285c5f4c217908dfd Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Fri, 27 Oct 2017 04:42:28 +0200
Subject: sched/isolation: Move housekeeping related code to its own file

The housekeeping code is currently tied to the NOHZ code. As we are
planning to make housekeeping independent from it, start with moving
the relevant code to its own file.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wanpeng Li <kernellwp@gmail.com>
Link: http://lkml.kernel.org/r/1509072159-31808-2-git-send-email-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/tick-sched.c | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 7b258c59d78a..27d7d522ac4e 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -166,7 +166,6 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
 
 #ifdef CONFIG_NO_HZ_FULL
 cpumask_var_t tick_nohz_full_mask;
-cpumask_var_t housekeeping_mask;
 bool tick_nohz_full_running;
 static atomic_t tick_dep_mask;
 
@@ -438,13 +437,6 @@ void __init tick_nohz_init(void)
 			return;
 	}
 
-	if (!alloc_cpumask_var(&housekeeping_mask, GFP_KERNEL)) {
-		WARN(1, "NO_HZ: Can't allocate not-full dynticks cpumask\n");
-		cpumask_clear(tick_nohz_full_mask);
-		tick_nohz_full_running = false;
-		return;
-	}
-
 	/*
 	 * Full dynticks uses irq work to drive the tick rescheduling on safe
 	 * locking contexts. But then we need irq work to raise its own
@@ -453,7 +445,6 @@ void __init tick_nohz_init(void)
 	if (!arch_irq_work_has_interrupt()) {
 		pr_warn("NO_HZ: Can't run full dynticks because arch doesn't support irq work self-IPIs\n");
 		cpumask_clear(tick_nohz_full_mask);
-		cpumask_copy(housekeeping_mask, cpu_possible_mask);
 		tick_nohz_full_running = false;
 		return;
 	}
@@ -466,9 +457,6 @@ void __init tick_nohz_init(void)
 		cpumask_clear_cpu(cpu, tick_nohz_full_mask);
 	}
 
-	cpumask_andnot(housekeeping_mask,
-		       cpu_possible_mask, tick_nohz_full_mask);
-
 	for_each_cpu(cpu, tick_nohz_full_mask)
 		context_tracking_cpu_set(cpu);
 
@@ -478,12 +466,6 @@ void __init tick_nohz_init(void)
 	WARN_ON(ret < 0);
 	pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
 		cpumask_pr_args(tick_nohz_full_mask));
-
-	/*
-	 * We need at least one CPU to handle housekeeping work such
-	 * as timekeeping, unbound timers, workqueues, ...
-	 */
-	WARN_ON_ONCE(cpumask_empty(housekeeping_mask));
 }
 #endif
 
-- 
cgit v1.2.3


From 6f1982fedd59856bcc42a9b521be4c3ffd2f60a7 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Fri, 27 Oct 2017 04:42:36 +0200
Subject: sched/isolation: Handle the nohz_full= parameter

We want to centralize the isolation management, done by the housekeeping
subsystem. Therefore we need to handle the nohz_full= parameter from
there.

Since nohz_full= so far has involved unbound timers, watchdog, RCU
and tilegx NAPI isolation, we keep that default behaviour.

nohz_full= will be deprecated in the future. We want to control
the isolation features from the isolcpus= parameter.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wanpeng Li <kernellwp@gmail.com>
Link: http://lkml.kernel.org/r/1509072159-31808-10-git-send-email-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/tick-sched.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 27d7d522ac4e..69f3dbe38984 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -385,20 +385,13 @@ out:
 	local_irq_restore(flags);
 }
 
-/* Parse the boot-time nohz CPU list from the kernel parameters. */
-static int __init tick_nohz_full_setup(char *str)
+/* Get the boot-time nohz CPU list from the kernel parameters. */
+void __init tick_nohz_full_setup(cpumask_var_t cpumask)
 {
 	alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
-	if (cpulist_parse(str, tick_nohz_full_mask) < 0) {
-		pr_warn("NO_HZ: Incorrect nohz_full cpumask\n");
-		free_bootmem_cpumask_var(tick_nohz_full_mask);
-		return 1;
-	}
+	cpumask_copy(tick_nohz_full_mask, cpumask);
 	tick_nohz_full_running = true;
-
-	return 1;
 }
-__setup("nohz_full=", tick_nohz_full_setup);
 
 static int tick_nohz_cpu_down(unsigned int cpu)
 {
-- 
cgit v1.2.3


From 0f295b0650c90362b4111f46d7f9149a0a4191be Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Date: Fri, 13 Oct 2017 11:54:33 -0600
Subject: rtc: Allow rtc drivers to specify the tv_nsec value for ntp

ntp is currently hardwired to try and call the rtc set when wall clock
tv_nsec is 0.5 seconds. This historical behaviour works well with certain
PC RTCs, but is not universal to all rtc hardware.

Change how this works by introducing the driver specific concept of
set_offset_nsec, the delay between current wall clock time and the target
time to set (with a 0 tv_nsecs).

For x86-style CMOS set_offset_nsec should be -0.5 s which causes the last
second to be written 0.5 s after it has started.

For compat with the old rtc_set_ntp_time, the value is defaulted to
+ 0.5 s, which causes the next second to be written 0.5s before it starts,
as things were before this patch.

Testing shows many non-x86 RTCs would like set_offset_nsec ~= 0,
so ultimately each RTC driver should set the set_offset_nsec according
to its needs, and non x86 architectures should stop using
update_persistent_clock64 in order to access this feature.
Future patches will revise the drivers as needed.

Since CMOS and RTC now have very different handling they are split
into two dedicated code paths, sharing the support code, and ifdefs
are replaced with IS_ENABLED.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Miroslav Lichvar <mlichvar@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Stephen Boyd <stephen.boyd@linaro.org>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/ntp.c | 166 +++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 113 insertions(+), 53 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index edf19cc53140..bc19de1a0683 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -492,6 +492,67 @@ out:
 	return leap;
 }
 
+static void sync_hw_clock(struct work_struct *work);
+static DECLARE_DELAYED_WORK(sync_work, sync_hw_clock);
+
+static void sched_sync_hw_clock(struct timespec64 now,
+				unsigned long target_nsec, bool fail)
+
+{
+	struct timespec64 next;
+
+	getnstimeofday64(&next);
+	if (!fail)
+		next.tv_sec = 659;
+	else {
+		/*
+		 * Try again as soon as possible. Delaying long periods
+		 * decreases the accuracy of the work queue timer. Due to this
+		 * the algorithm is very likely to require a short-sleep retry
+		 * after the above long sleep to synchronize ts_nsec.
+		 */
+		next.tv_sec = 0;
+	}
+
+	/* Compute the needed delay that will get to tv_nsec == target_nsec */
+	next.tv_nsec = target_nsec - next.tv_nsec;
+	if (next.tv_nsec <= 0)
+		next.tv_nsec += NSEC_PER_SEC;
+	if (next.tv_nsec >= NSEC_PER_SEC) {
+		next.tv_sec++;
+		next.tv_nsec -= NSEC_PER_SEC;
+	}
+
+	queue_delayed_work(system_power_efficient_wq, &sync_work,
+			   timespec64_to_jiffies(&next));
+}
+
+static void sync_rtc_clock(void)
+{
+	unsigned long target_nsec;
+	struct timespec64 adjust, now;
+	int rc;
+
+	if (!IS_ENABLED(CONFIG_RTC_SYSTOHC))
+		return;
+
+	getnstimeofday64(&now);
+
+	adjust = now;
+	if (persistent_clock_is_local)
+		adjust.tv_sec -= (sys_tz.tz_minuteswest * 60);
+
+	/*
+	 * The current RTC in use will provide the target_nsec it wants to be
+	 * called at, and does rtc_tv_nsec_ok internally.
+	 */
+	rc = rtc_set_ntp_time(adjust, &target_nsec);
+	if (rc == -ENODEV)
+		return;
+
+	sched_sync_hw_clock(now, target_nsec, rc);
+}
+
 #ifdef CONFIG_GENERIC_CMOS_UPDATE
 int __weak update_persistent_clock(struct timespec now)
 {
@@ -507,76 +568,75 @@ int __weak update_persistent_clock64(struct timespec64 now64)
 }
 #endif
 
-#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC)
-static void sync_cmos_clock(struct work_struct *work);
-
-static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock);
-
-static void sync_cmos_clock(struct work_struct *work)
+static bool sync_cmos_clock(void)
 {
+	static bool no_cmos;
 	struct timespec64 now;
-	struct timespec64 next;
-	int fail = 1;
+	struct timespec64 adjust;
+	int rc = -EPROTO;
+	long target_nsec = NSEC_PER_SEC / 2;
+
+	if (!IS_ENABLED(CONFIG_GENERIC_CMOS_UPDATE))
+		return false;
+
+	if (no_cmos)
+		return false;
 
 	/*
-	 * If we have an externally synchronized Linux clock, then update
-	 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
-	 * called as close as possible to 500 ms before the new second starts.
-	 * This code is run on a timer.  If the clock is set, that timer
-	 * may not expire at the correct time.  Thus, we adjust...
-	 * We want the clock to be within a couple of ticks from the target.
+	 * Historically update_persistent_clock64() has followed x86
+	 * semantics, which match the MC146818A/etc RTC. This RTC will store
+	 * 'adjust' and then in .5s it will advance once second.
+	 *
+	 * Architectures are strongly encouraged to use rtclib and not
+	 * implement this legacy API.
 	 */
-	if (!ntp_synced()) {
-		/*
-		 * Not synced, exit, do not restart a timer (if one is
-		 * running, let it run out).
-		 */
-		return;
-	}
-
 	getnstimeofday64(&now);
-	if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec * 5) {
-		struct timespec64 adjust = now;
-
-		fail = -ENODEV;
+	if (rtc_tv_nsec_ok(-1 * target_nsec, &adjust, &now)) {
 		if (persistent_clock_is_local)
 			adjust.tv_sec -= (sys_tz.tz_minuteswest * 60);
-#ifdef CONFIG_GENERIC_CMOS_UPDATE
-		fail = update_persistent_clock64(adjust);
-#endif
-
-#ifdef CONFIG_RTC_SYSTOHC
-		if (fail == -ENODEV)
-			fail = rtc_set_ntp_time(adjust);
-#endif
+		rc = update_persistent_clock64(adjust);
+		/*
+		 * The machine does not support update_persistent_clock64 even
+		 * though it defines CONFIG_GENERIC_CMOS_UPDATE.
+		 */
+		if (rc == -ENODEV) {
+			no_cmos = true;
+			return false;
+		}
 	}
 
-	next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec - (TICK_NSEC / 2);
-	if (next.tv_nsec <= 0)
-		next.tv_nsec += NSEC_PER_SEC;
+	sched_sync_hw_clock(now, target_nsec, rc);
+	return true;
+}
 
-	if (!fail || fail == -ENODEV)
-		next.tv_sec = 659;
-	else
-		next.tv_sec = 0;
+/*
+ * If we have an externally synchronized Linux clock, then update RTC clock
+ * accordingly every ~11 minutes. Generally RTCs can only store second
+ * precision, but many RTCs will adjust the phase of their second tick to
+ * match the moment of update. This infrastructure arranges to call to the RTC
+ * set at the correct moment to phase synchronize the RTC second tick over
+ * with the kernel clock.
+ */
+static void sync_hw_clock(struct work_struct *work)
+{
+	if (!ntp_synced())
+		return;
 
-	if (next.tv_nsec >= NSEC_PER_SEC) {
-		next.tv_sec++;
-		next.tv_nsec -= NSEC_PER_SEC;
-	}
-	queue_delayed_work(system_power_efficient_wq,
-			   &sync_cmos_work, timespec64_to_jiffies(&next));
+	if (sync_cmos_clock())
+		return;
+
+	sync_rtc_clock();
 }
 
 void ntp_notify_cmos_timer(void)
 {
-	queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0);
-}
-
-#else
-void ntp_notify_cmos_timer(void) { }
-#endif
+	if (!ntp_synced())
+		return;
 
+	if (IS_ENABLED(CONFIG_GENERIC_CMOS_UPDATE) ||
+	    IS_ENABLED(CONFIG_RTC_SYSTOHC))
+		queue_delayed_work(system_power_efficient_wq, &sync_work, 0);
+}
 
 /*
  * Propagate a new txc->status value into the NTP state:
-- 
cgit v1.2.3


From e0956dcc4ba74ec4b17e32fc9a156fcba1ef6610 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 19 Oct 2017 13:14:44 +0200
Subject: timekeeping: Consolidate timekeeping_inject_offset code

The code to check the adjtimex() or clock_adjtime() arguments is spread
out across multiple files for presumably only historic reasons. As a
preparatation for a rework to get rid of the use of 'struct timeval'
and 'struct timespec' in there, this moves all the portions into
kernel/time/timekeeping.c and marks them as 'static'.

The warp_clock() function here is not as closely related as the others,
but I feel it still makes sense to move it here in order to consolidate
all callers of timekeeping_inject_offset().

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Miroslav Lichvar <mlichvar@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Stephen Boyd <stephen.boyd@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
[jstultz: Whitespace fixup]
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/ntp.c          |  61 ----------------------
 kernel/time/ntp_internal.h |   1 -
 kernel/time/time.c         |  36 +------------
 kernel/time/timekeeping.c  | 123 ++++++++++++++++++++++++++++++++++++++++++++-
 kernel/time/timekeeping.h  |   2 +-
 5 files changed, 123 insertions(+), 100 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index bc19de1a0683..90f84582a076 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -713,67 +713,6 @@ static inline void process_adjtimex_modes(struct timex *txc,
 }
 
 
-
-/**
- * ntp_validate_timex - Ensures the timex is ok for use in do_adjtimex
- */
-int ntp_validate_timex(struct timex *txc)
-{
-	if (txc->modes & ADJ_ADJTIME) {
-		/* singleshot must not be used with any other mode bits */
-		if (!(txc->modes & ADJ_OFFSET_SINGLESHOT))
-			return -EINVAL;
-		if (!(txc->modes & ADJ_OFFSET_READONLY) &&
-		    !capable(CAP_SYS_TIME))
-			return -EPERM;
-	} else {
-		/* In order to modify anything, you gotta be super-user! */
-		 if (txc->modes && !capable(CAP_SYS_TIME))
-			return -EPERM;
-		/*
-		 * if the quartz is off by more than 10% then
-		 * something is VERY wrong!
-		 */
-		if (txc->modes & ADJ_TICK &&
-		    (txc->tick <  900000/USER_HZ ||
-		     txc->tick > 1100000/USER_HZ))
-			return -EINVAL;
-	}
-
-	if (txc->modes & ADJ_SETOFFSET) {
-		/* In order to inject time, you gotta be super-user! */
-		if (!capable(CAP_SYS_TIME))
-			return -EPERM;
-
-		if (txc->modes & ADJ_NANO) {
-			struct timespec ts;
-
-			ts.tv_sec = txc->time.tv_sec;
-			ts.tv_nsec = txc->time.tv_usec;
-			if (!timespec_inject_offset_valid(&ts))
-				return -EINVAL;
-
-		} else {
-			if (!timeval_inject_offset_valid(&txc->time))
-				return -EINVAL;
-		}
-	}
-
-	/*
-	 * Check for potential multiplication overflows that can
-	 * only happen on 64-bit systems:
-	 */
-	if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) {
-		if (LLONG_MIN / PPM_SCALE > txc->freq)
-			return -EINVAL;
-		if (LLONG_MAX / PPM_SCALE < txc->freq)
-			return -EINVAL;
-	}
-
-	return 0;
-}
-
-
 /*
  * adjtimex mainly allows reading (and writing, if superuser) of
  * kernel time-keeping variables. used by xntpd.
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index d8a7c11fa71a..74b52cd48209 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -7,7 +7,6 @@ extern void ntp_clear(void);
 extern u64 ntp_tick_length(void);
 extern ktime_t ntp_get_next_leap(void);
 extern int second_overflow(time64_t secs);
-extern int ntp_validate_timex(struct timex *);
 extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *);
 extern void __hardpps(const struct timespec64 *, const struct timespec64 *);
 #endif /* _LINUX_NTP_INTERNAL_H */
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 44a8c1402133..04684e294f00 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -157,40 +157,6 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv,
 	return 0;
 }
 
-/*
- * Indicates if there is an offset between the system clock and the hardware
- * clock/persistent clock/rtc.
- */
-int persistent_clock_is_local;
-
-/*
- * Adjust the time obtained from the CMOS to be UTC time instead of
- * local time.
- *
- * This is ugly, but preferable to the alternatives.  Otherwise we
- * would either need to write a program to do it in /etc/rc (and risk
- * confusion if the program gets run more than once; it would also be
- * hard to make the program warp the clock precisely n hours)  or
- * compile in the timezone information into the kernel.  Bad, bad....
- *
- *						- TYT, 1992-01-01
- *
- * The best thing to do is to keep the CMOS clock in universal time (UTC)
- * as real UNIX machines always do it. This avoids all headaches about
- * daylight saving times and warping kernel clocks.
- */
-static inline void warp_clock(void)
-{
-	if (sys_tz.tz_minuteswest != 0) {
-		struct timespec adjust;
-
-		persistent_clock_is_local = 1;
-		adjust.tv_sec = sys_tz.tz_minuteswest * 60;
-		adjust.tv_nsec = 0;
-		timekeeping_inject_offset(&adjust);
-	}
-}
-
 /*
  * In case for some reason the CMOS clock has not already been running
  * in UTC, but in some local time: The first time we set the timezone,
@@ -224,7 +190,7 @@ int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz
 		if (firsttime) {
 			firsttime = 0;
 			if (!tv)
-				warp_clock();
+				timekeeping_warp_clock();
 		}
 	}
 	if (tv)
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 2cafb49aa65e..7d8e0e842484 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1258,13 +1258,39 @@ out:
 }
 EXPORT_SYMBOL(do_settimeofday64);
 
+/*
+ * Validates if a timespec/timeval used to inject a time offset is valid.
+ * Offsets can be postive or negative. The value of the timeval/timespec
+ * is the sum of its fields, but *NOTE*: the field tv_usec/tv_nsec must
+ * always be non-negative.
+ */
+static inline bool timeval_inject_offset_valid(const struct timeval *tv)
+{
+	/* We don't check the tv_sec as it can be positive or negative */
+
+	/* Can't have more microseconds then a second */
+	if (tv->tv_usec < 0 || tv->tv_usec >= USEC_PER_SEC)
+		return false;
+	return true;
+}
+
+static inline bool timespec_inject_offset_valid(const struct timespec *ts)
+{
+	/* We don't check the tv_sec as it can be positive or negative */
+
+	/* Can't have more nanoseconds then a second */
+	if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC)
+		return false;
+	return true;
+}
+
 /**
  * timekeeping_inject_offset - Adds or subtracts from the current time.
  * @tv:		pointer to the timespec variable containing the offset
  *
  * Adds or subtracts an offset value from the current time.
  */
-int timekeeping_inject_offset(struct timespec *ts)
+static int timekeeping_inject_offset(struct timespec *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long flags;
@@ -1303,7 +1329,40 @@ error: /* even if we error out, we forwarded the time, so call update */
 
 	return ret;
 }
-EXPORT_SYMBOL(timekeeping_inject_offset);
+
+/*
+ * Indicates if there is an offset between the system clock and the hardware
+ * clock/persistent clock/rtc.
+ */
+int persistent_clock_is_local;
+
+/*
+ * Adjust the time obtained from the CMOS to be UTC time instead of
+ * local time.
+ *
+ * This is ugly, but preferable to the alternatives.  Otherwise we
+ * would either need to write a program to do it in /etc/rc (and risk
+ * confusion if the program gets run more than once; it would also be
+ * hard to make the program warp the clock precisely n hours)  or
+ * compile in the timezone information into the kernel.  Bad, bad....
+ *
+ *						- TYT, 1992-01-01
+ *
+ * The best thing to do is to keep the CMOS clock in universal time (UTC)
+ * as real UNIX machines always do it. This avoids all headaches about
+ * daylight saving times and warping kernel clocks.
+ */
+void timekeeping_warp_clock(void)
+{
+	if (sys_tz.tz_minuteswest != 0) {
+		struct timespec adjust;
+
+		persistent_clock_is_local = 1;
+		adjust.tv_sec = sys_tz.tz_minuteswest * 60;
+		adjust.tv_nsec = 0;
+		timekeeping_inject_offset(&adjust);
+	}
+}
 
 /**
  * __timekeeping_set_tai_offset - Sets the TAI offset from UTC and monotonic
@@ -2247,6 +2306,66 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
 	return base;
 }
 
+/**
+ * ntp_validate_timex - Ensures the timex is ok for use in do_adjtimex
+ */
+static int ntp_validate_timex(struct timex *txc)
+{
+	if (txc->modes & ADJ_ADJTIME) {
+		/* singleshot must not be used with any other mode bits */
+		if (!(txc->modes & ADJ_OFFSET_SINGLESHOT))
+			return -EINVAL;
+		if (!(txc->modes & ADJ_OFFSET_READONLY) &&
+		    !capable(CAP_SYS_TIME))
+			return -EPERM;
+	} else {
+		/* In order to modify anything, you gotta be super-user! */
+		if (txc->modes && !capable(CAP_SYS_TIME))
+			return -EPERM;
+		/*
+		 * if the quartz is off by more than 10% then
+		 * something is VERY wrong!
+		 */
+		if (txc->modes & ADJ_TICK &&
+		    (txc->tick <  900000/USER_HZ ||
+		     txc->tick > 1100000/USER_HZ))
+			return -EINVAL;
+	}
+
+	if (txc->modes & ADJ_SETOFFSET) {
+		/* In order to inject time, you gotta be super-user! */
+		if (!capable(CAP_SYS_TIME))
+			return -EPERM;
+
+		if (txc->modes & ADJ_NANO) {
+			struct timespec ts;
+
+			ts.tv_sec = txc->time.tv_sec;
+			ts.tv_nsec = txc->time.tv_usec;
+			if (!timespec_inject_offset_valid(&ts))
+				return -EINVAL;
+
+		} else {
+			if (!timeval_inject_offset_valid(&txc->time))
+				return -EINVAL;
+		}
+	}
+
+	/*
+	 * Check for potential multiplication overflows that can
+	 * only happen on 64-bit systems:
+	 */
+	if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) {
+		if (LLONG_MIN / PPM_SCALE > txc->freq)
+			return -EINVAL;
+		if (LLONG_MAX / PPM_SCALE < txc->freq)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+
 /**
  * do_adjtimex() - Accessor function to NTP __do_adjtimex function
  */
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
index d0914676d4c5..44aec7893cdd 100644
--- a/kernel/time/timekeeping.h
+++ b/kernel/time/timekeeping.h
@@ -10,7 +10,7 @@ extern ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq,
 
 extern int timekeeping_valid_for_hres(void);
 extern u64 timekeeping_max_deferment(void);
-extern int timekeeping_inject_offset(struct timespec *ts);
+extern void timekeeping_warp_clock(void);
 extern int timekeeping_suspend(void);
 extern void timekeeping_resume(void);
 
-- 
cgit v1.2.3


From 1572fa03784831b81ec26ec379374cf6bdec04fb Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 19 Oct 2017 13:14:45 +0200
Subject: timekeeping: Use timespec64 in timekeeping_inject_offset

As part of changing all the timekeeping code to use 64-bit
time_t consistently, this removes the uses of timeval
and timespec as much as possible from do_adjtimex() and
timekeeping_inject_offset(). The timeval_inject_offset_valid()
and timespec_inject_offset_valid() just complicate this,
so I'm folding them into the respective callers.

This leaves the actual 'struct timex' definition, which
is part of the user-space ABI and should be dealt with
separately when we have agreed on the ABI change.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Miroslav Lichvar <mlichvar@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Stephen Boyd <stephen.boyd@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/timekeeping.c | 72 ++++++++++++++++-------------------------------
 1 file changed, 25 insertions(+), 47 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 7d8e0e842484..c6a35fb3cf76 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1258,65 +1258,37 @@ out:
 }
 EXPORT_SYMBOL(do_settimeofday64);
 
-/*
- * Validates if a timespec/timeval used to inject a time offset is valid.
- * Offsets can be postive or negative. The value of the timeval/timespec
- * is the sum of its fields, but *NOTE*: the field tv_usec/tv_nsec must
- * always be non-negative.
- */
-static inline bool timeval_inject_offset_valid(const struct timeval *tv)
-{
-	/* We don't check the tv_sec as it can be positive or negative */
-
-	/* Can't have more microseconds then a second */
-	if (tv->tv_usec < 0 || tv->tv_usec >= USEC_PER_SEC)
-		return false;
-	return true;
-}
-
-static inline bool timespec_inject_offset_valid(const struct timespec *ts)
-{
-	/* We don't check the tv_sec as it can be positive or negative */
-
-	/* Can't have more nanoseconds then a second */
-	if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC)
-		return false;
-	return true;
-}
-
 /**
  * timekeeping_inject_offset - Adds or subtracts from the current time.
  * @tv:		pointer to the timespec variable containing the offset
  *
  * Adds or subtracts an offset value from the current time.
  */
-static int timekeeping_inject_offset(struct timespec *ts)
+static int timekeeping_inject_offset(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long flags;
-	struct timespec64 ts64, tmp;
+	struct timespec64 tmp;
 	int ret = 0;
 
-	if (!timespec_inject_offset_valid(ts))
+	if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC)
 		return -EINVAL;
 
-	ts64 = timespec_to_timespec64(*ts);
-
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
 	write_seqcount_begin(&tk_core.seq);
 
 	timekeeping_forward_now(tk);
 
 	/* Make sure the proposed value is valid */
-	tmp = timespec64_add(tk_xtime(tk),  ts64);
-	if (timespec64_compare(&tk->wall_to_monotonic, &ts64) > 0 ||
+	tmp = timespec64_add(tk_xtime(tk), *ts);
+	if (timespec64_compare(&tk->wall_to_monotonic, ts) > 0 ||
 	    !timespec64_valid_strict(&tmp)) {
 		ret = -EINVAL;
 		goto error;
 	}
 
-	tk_xtime_add(tk, &ts64);
-	tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, ts64));
+	tk_xtime_add(tk, ts);
+	tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *ts));
 
 error: /* even if we error out, we forwarded the time, so call update */
 	timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
@@ -1355,7 +1327,7 @@ int persistent_clock_is_local;
 void timekeeping_warp_clock(void)
 {
 	if (sys_tz.tz_minuteswest != 0) {
-		struct timespec adjust;
+		struct timespec64 adjust;
 
 		persistent_clock_is_local = 1;
 		adjust.tv_sec = sys_tz.tz_minuteswest * 60;
@@ -2307,9 +2279,9 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
 }
 
 /**
- * ntp_validate_timex - Ensures the timex is ok for use in do_adjtimex
+ * timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex
  */
-static int ntp_validate_timex(struct timex *txc)
+static int timekeeping_validate_timex(struct timex *txc)
 {
 	if (txc->modes & ADJ_ADJTIME) {
 		/* singleshot must not be used with any other mode bits */
@@ -2337,16 +2309,22 @@ static int ntp_validate_timex(struct timex *txc)
 		if (!capable(CAP_SYS_TIME))
 			return -EPERM;
 
-		if (txc->modes & ADJ_NANO) {
-			struct timespec ts;
+		/*
+		 * Validate if a timespec/timeval used to inject a time
+		 * offset is valid.  Offsets can be postive or negative, so
+		 * we don't check tv_sec. The value of the timeval/timespec
+		 * is the sum of its fields,but *NOTE*:
+		 * The field tv_usec/tv_nsec must always be non-negative and
+		 * we can't have more nanoseconds/microseconds than a second.
+		 */
+		if (txc->time.tv_usec < 0)
+			return -EINVAL;
 
-			ts.tv_sec = txc->time.tv_sec;
-			ts.tv_nsec = txc->time.tv_usec;
-			if (!timespec_inject_offset_valid(&ts))
+		if (txc->modes & ADJ_NANO) {
+			if (txc->time.tv_usec >= NSEC_PER_SEC)
 				return -EINVAL;
-
 		} else {
-			if (!timeval_inject_offset_valid(&txc->time))
+			if (txc->time.tv_usec >= USEC_PER_SEC)
 				return -EINVAL;
 		}
 	}
@@ -2378,12 +2356,12 @@ int do_adjtimex(struct timex *txc)
 	int ret;
 
 	/* Validate the data before disabling interrupts */
-	ret = ntp_validate_timex(txc);
+	ret = timekeeping_validate_timex(txc);
 	if (ret)
 		return ret;
 
 	if (txc->modes & ADJ_SETOFFSET) {
-		struct timespec delta;
+		struct timespec64 delta;
 		delta.tv_sec  = txc->time.tv_sec;
 		delta.tv_nsec = txc->time.tv_usec;
 		if (!(txc->modes & ADJ_NANO))
-- 
cgit v1.2.3


From 85bf19e7df2479140eff2348a4e6a9c19b5c3960 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 19 Oct 2017 13:14:46 +0200
Subject: time: Remove unused functions

The (slow but) ongoing work on conversion from timespec to timespec64
has led some timespec based helper functions to become unused.

No new code should use them, so we can remove the functions entirely.
I'm planning to obsolete additional interfaces next and remove
more of these.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Miroslav Lichvar <mlichvar@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Stephen Boyd <stephen.boyd@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/time.c | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/time.c b/kernel/time/time.c
index 04684e294f00..947fb614c78f 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -818,24 +818,6 @@ unsigned long nsecs_to_jiffies(u64 n)
 }
 EXPORT_SYMBOL_GPL(nsecs_to_jiffies);
 
-/*
- * Add two timespec values and do a safety check for overflow.
- * It's assumed that both values are valid (>= 0)
- */
-struct timespec timespec_add_safe(const struct timespec lhs,
-				  const struct timespec rhs)
-{
-	struct timespec res;
-
-	set_normalized_timespec(&res, lhs.tv_sec + rhs.tv_sec,
-				lhs.tv_nsec + rhs.tv_nsec);
-
-	if (res.tv_sec < lhs.tv_sec || res.tv_sec < rhs.tv_sec)
-		res.tv_sec = TIME_T_MAX;
-
-	return res;
-}
-
 /*
  * Add two timespec64 values and do a safety check for overflow.
  * It's assumed that both values are valid (>= 0).
-- 
cgit v1.2.3


From abc8f96e3eb846fcf6333395ee1f6ed4a734576c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 19 Oct 2017 13:14:48 +0200
Subject: time: Move time_t conversion helpers to time32.h

On 64-bit architectures, the timespec64 based helpers in linux/time.h
are defined as macros pointing to their timespec based counterparts.
This made sense when they were first introduced, but as we are migrating
away from timespec in general, it's much less intuitive now.

This changes the macros to work in the exact opposite way: we always
provide the timespec64 based helpers and define the old interfaces as
macros for them. Now we can move those macros into linux/time32.h, which
already contains the respective helpers for 32-bit architectures.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Miroslav Lichvar <mlichvar@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Stephen Boyd <stephen.boyd@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/time.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/time.c b/kernel/time/time.c
index 947fb614c78f..fe60ebd301cf 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -407,6 +407,7 @@ time64_t mktime64(const unsigned int year0, const unsigned int mon0,
 }
 EXPORT_SYMBOL(mktime64);
 
+#if __BITS_PER_LONG == 32
 /**
  * set_normalized_timespec - set timespec sec and nsec parts and normalize
  *
@@ -467,6 +468,7 @@ struct timespec ns_to_timespec(const s64 nsec)
 	return ts;
 }
 EXPORT_SYMBOL(ns_to_timespec);
+#endif
 
 /**
  * ns_to_timeval - Convert nanoseconds to timeval
@@ -486,7 +488,6 @@ struct timeval ns_to_timeval(const s64 nsec)
 }
 EXPORT_SYMBOL(ns_to_timeval);
 
-#if BITS_PER_LONG == 32
 /**
  * set_normalized_timespec - set timespec sec and nsec parts and normalize
  *
@@ -547,7 +548,7 @@ struct timespec64 ns_to_timespec64(const s64 nsec)
 	return ts;
 }
 EXPORT_SYMBOL(ns_to_timespec64);
-#endif
+
 /**
  * msecs_to_jiffies: - convert milliseconds to jiffies
  * @m:	time in milliseconds
-- 
cgit v1.2.3


From 39c82caff8610d57ffe32157cb3130dfabe12fbe Mon Sep 17 00:00:00 2001
From: Prasad Sodagudi <psodagud@codeaurora.org>
Date: Thu, 26 Oct 2017 11:37:22 -0700
Subject: clockevents: Update clockevents device next_event on stop

clockevent_device::next_event holds the next timer event of a clock event
device. The value is updated in clockevents_program_event(), i.e. when the
hardware timer is armed for the next expiry.

When there are no software timers armed on a CPU, the corresponding per CPU
clockevent device is brought into ONESHOT_STOPPED state, but
clockevent_device::next_event is not updated, because
clockevents_program_event() is not called.

So the content of clockevent_device::next_event is stale, which is not an
issue when real hardware is used. But the hrtimer broadcast device relies
on that information and the stale value causes spurious wakeups.

Update clockevent_device::next_event to KTIME_MAX when it has been brought
into ONESHOT_STOPPED state to avoid spurious wakeups. This reflects the
proper expiry time of the stopped timer: infinity.

[ tglx: Massaged changelog ]

Signed-off-by: Prasad Sodagudi <psodagud@codeaurora.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: viresh.kumar@linaro.org
Link: https://lkml.kernel.org/r/1509043042-32486-1-git-send-email-psodagud@codeaurora.org
---
 kernel/time/tick-oneshot.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel/time')

diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 6b009c207671..c1f518e7aa80 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -33,6 +33,7 @@ int tick_program_event(ktime_t expires, int force)
 		 * We don't need the clock event device any more, stop it.
 		 */
 		clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED);
+		dev->next_event = KTIME_MAX;
 		return 0;
 	}
 
-- 
cgit v1.2.3


From b24413180f5600bcb3bb70fbed5cf186b60864bd Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 1 Nov 2017 15:07:57 +0100
Subject: License cleanup: add SPDX GPL-2.0 license identifier to files with no
 license

Many source files in the tree are missing licensing information, which
makes it harder for compliance tools to determine the correct license.

By default all files without license information are under the default
license of the kernel, which is GPL version 2.

Update the files which contain no license information with the 'GPL-2.0'
SPDX license identifier.  The SPDX identifier is a legally binding
shorthand, which can be used instead of the full boiler plate text.

This patch is based on work done by Thomas Gleixner and Kate Stewart and
Philippe Ombredanne.

How this work was done:

Patches were generated and checked against linux-4.14-rc6 for a subset of
the use cases:
 - file had no licensing information it it.
 - file was a */uapi/* one with no licensing information in it,
 - file was a */uapi/* one with existing licensing information,

Further patches will be generated in subsequent months to fix up cases
where non-standard license headers were used, and references to license
had to be inferred by heuristics based on keywords.

The analysis to determine which SPDX License Identifier to be applied to
a file was done in a spreadsheet of side by side results from of the
output of two independent scanners (ScanCode & Windriver) producing SPDX
tag:value files created by Philippe Ombredanne.  Philippe prepared the
base worksheet, and did an initial spot review of a few 1000 files.

The 4.13 kernel was the starting point of the analysis with 60,537 files
assessed.  Kate Stewart did a file by file comparison of the scanner
results in the spreadsheet to determine which SPDX license identifier(s)
to be applied to the file. She confirmed any determination that was not
immediately clear with lawyers working with the Linux Foundation.

Criteria used to select files for SPDX license identifier tagging was:
 - Files considered eligible had to be source code files.
 - Make and config files were included as candidates if they contained >5
   lines of source
 - File already had some variant of a license header in it (even if <5
   lines).

All documentation files were explicitly excluded.

The following heuristics were used to determine which SPDX license
identifiers to apply.

 - when both scanners couldn't find any license traces, file was
   considered to have no license information in it, and the top level
   COPYING file license applied.

   For non */uapi/* files that summary was:

   SPDX license identifier                            # files
   ---------------------------------------------------|-------
   GPL-2.0                                              11139

   and resulted in the first patch in this series.

   If that file was a */uapi/* path one, it was "GPL-2.0 WITH
   Linux-syscall-note" otherwise it was "GPL-2.0".  Results of that was:

   SPDX license identifier                            # files
   ---------------------------------------------------|-------
   GPL-2.0 WITH Linux-syscall-note                        930

   and resulted in the second patch in this series.

 - if a file had some form of licensing information in it, and was one
   of the */uapi/* ones, it was denoted with the Linux-syscall-note if
   any GPL family license was found in the file or had no licensing in
   it (per prior point).  Results summary:

   SPDX license identifier                            # files
   ---------------------------------------------------|------
   GPL-2.0 WITH Linux-syscall-note                       270
   GPL-2.0+ WITH Linux-syscall-note                      169
   ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause)    21
   ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)    17
   LGPL-2.1+ WITH Linux-syscall-note                      15
   GPL-1.0+ WITH Linux-syscall-note                       14
   ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause)    5
   LGPL-2.0+ WITH Linux-syscall-note                       4
   LGPL-2.1 WITH Linux-syscall-note                        3
   ((GPL-2.0 WITH Linux-syscall-note) OR MIT)              3
   ((GPL-2.0 WITH Linux-syscall-note) AND MIT)             1

   and that resulted in the third patch in this series.

 - when the two scanners agreed on the detected license(s), that became
   the concluded license(s).

 - when there was disagreement between the two scanners (one detected a
   license but the other didn't, or they both detected different
   licenses) a manual inspection of the file occurred.

 - In most cases a manual inspection of the information in the file
   resulted in a clear resolution of the license that should apply (and
   which scanner probably needed to revisit its heuristics).

 - When it was not immediately clear, the license identifier was
   confirmed with lawyers working with the Linux Foundation.

 - If there was any question as to the appropriate license identifier,
   the file was flagged for further research and to be revisited later
   in time.

In total, over 70 hours of logged manual review was done on the
spreadsheet to determine the SPDX license identifiers to apply to the
source files by Kate, Philippe, Thomas and, in some cases, confirmation
by lawyers working with the Linux Foundation.

Kate also obtained a third independent scan of the 4.13 code base from
FOSSology, and compared selected files where the other two scanners
disagreed against that SPDX file, to see if there was new insights.  The
Windriver scanner is based on an older version of FOSSology in part, so
they are related.

Thomas did random spot checks in about 500 files from the spreadsheets
for the uapi headers and agreed with SPDX license identifier in the
files he inspected. For the non-uapi files Thomas did random spot checks
in about 15000 files.

In initial set of patches against 4.14-rc6, 3 files were found to have
copy/paste license identifier errors, and have been fixed to reflect the
correct identifier.

Additionally Philippe spent 10 hours this week doing a detailed manual
inspection and review of the 12,461 patched files from the initial patch
version early this week with:
 - a full scancode scan run, collecting the matched texts, detected
   license ids and scores
 - reviewing anything where there was a license detected (about 500+
   files) to ensure that the applied SPDX license was correct
 - reviewing anything where there was no detection but the patch license
   was not GPL-2.0 WITH Linux-syscall-note to ensure that the applied
   SPDX license was correct

This produced a worksheet with 20 files needing minor correction.  This
worksheet was then exported into 3 different .csv files for the
different types of files to be modified.

These .csv files were then reviewed by Greg.  Thomas wrote a script to
parse the csv files and add the proper SPDX tag to the file, in the
format that the file expected.  This script was further refined by Greg
based on the output to detect more types of files automatically and to
distinguish between header and source .c files (which need different
comment types.)  Finally Greg ran the script using the .csv files to
generate the patches.

Reviewed-by: Kate Stewart <kstewart@linuxfoundation.org>
Reviewed-by: Philippe Ombredanne <pombredanne@nexb.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/time/Makefile                 | 1 +
 kernel/time/itimer.c                 | 1 +
 kernel/time/ntp.c                    | 1 +
 kernel/time/ntp_internal.h           | 1 +
 kernel/time/posix-cpu-timers.c       | 1 +
 kernel/time/posix-timers.h           | 1 +
 kernel/time/tick-broadcast-hrtimer.c | 1 +
 kernel/time/tick-internal.h          | 1 +
 kernel/time/tick-sched.h             | 1 +
 kernel/time/timekeeping.h            | 1 +
 kernel/time/timekeeping_internal.h   | 1 +
 11 files changed, 11 insertions(+)

(limited to 'kernel/time')

diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 938dbf33ef49..f1e46f338a9c 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 obj-y += time.o timer.o hrtimer.o
 obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o
 obj-y += timeconv.o timecounter.o alarmtimer.o
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index 2ef98a02376a..f26acef5d7b4 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * linux/kernel/itimer.c
  *
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index edf19cc53140..99e03bec68e4 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * NTP state machine interfaces and logic.
  *
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index d8a7c11fa71a..0a53e6ea47b1 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _LINUX_NTP_INTERNAL_H
 #define _LINUX_NTP_INTERNAL_H
 
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 8585ad6e472a..5b117110b55b 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Implement CPU time clocks for the POSIX clock interface.
  */
diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h
index fb303c3be4d3..151e28f5bf30 100644
--- a/kernel/time/posix-timers.h
+++ b/kernel/time/posix-timers.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #define TIMER_RETRY 1
 
 struct k_clock {
diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c
index a7bb8f33ae07..58045eb976c3 100644
--- a/kernel/time/tick-broadcast-hrtimer.c
+++ b/kernel/time/tick-broadcast-hrtimer.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * linux/kernel/time/tick-broadcast-hrtimer.c
  * This file emulates a local clock event device
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index be0ac01f2e12..f8e1845aa464 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * tick internal variable and functions used by low/high res code
  */
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 075444e3d48e..954b43dbf21c 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _TICK_SCHED_H
 #define _TICK_SCHED_H
 
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
index d0914676d4c5..c9f9af339914 100644
--- a/kernel/time/timekeeping.h
+++ b/kernel/time/timekeeping.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _KERNEL_TIME_TIMEKEEPING_H
 #define _KERNEL_TIME_TIMEKEEPING_H
 /*
diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h
index 9a18f121f399..fdbeeb02dde9 100644
--- a/kernel/time/timekeeping_internal.h
+++ b/kernel/time/timekeeping_internal.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _TIMEKEEPING_INTERNAL_H
 #define _TIMEKEEPING_INTERNAL_H
 /*
-- 
cgit v1.2.3


From 6082a6e44434a17f194048b7d48df56f148ec6d4 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 1 Nov 2017 11:04:51 -0700
Subject: kernel/time/Kconfig: Fix typo in comment

Fix typo in Kconfig comment text.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: Jiri Kosina <trivial@kernel.org>
Link: https://lkml.kernel.org/r/0e586dd4-2b27-864e-c252-bc72df52fd01@infradead.org
---
 kernel/time/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/time')

diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index ac09bc29eb08..d689a9557e17 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -56,7 +56,7 @@ menu "Timers subsystem"
 
 # Core internal switch. Selected by NO_HZ_COMMON / HIGH_RES_TIMERS. This is
 # only related to the tick functionality. Oneshot clockevent devices
-# are supported independ of this.
+# are supported independent of this.
 config TICK_ONESHOT
 	bool
 
-- 
cgit v1.2.3


From ebf3adbad012b89c4a51a3beae718a587d988a3a Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Mon, 6 Nov 2017 16:01:20 +0100
Subject: timers/nohz: Use lockdep to assert IRQs are disabled/enabled

Use lockdep to check that IRQs are enabled or disabled as expected. This
way the sanity check only shows overhead when concurrency correctness
debug code is enabled.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: David S . Miller <davem@davemloft.net>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1509980490-4285-5-git-send-email-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/tick-sched.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index c7a899c5ce64..dd4b7b492c9b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -198,7 +198,7 @@ static bool check_tick_dependency(atomic_t *dep)
 
 static bool can_stop_full_tick(int cpu, struct tick_sched *ts)
 {
-	WARN_ON_ONCE(!irqs_disabled());
+	lockdep_assert_irqs_disabled();
 
 	if (unlikely(!cpu_online(cpu)))
 		return false;
@@ -960,8 +960,7 @@ void tick_nohz_idle_enter(void)
 {
 	struct tick_sched *ts;
 
-	WARN_ON_ONCE(irqs_disabled());
-
+	lockdep_assert_irqs_enabled();
 	/*
 	 * Update the idle state in the scheduler domain hierarchy
 	 * when tick_nohz_stop_sched_tick() is called from the idle loop.
-- 
cgit v1.2.3


From 53bef3fd47f69e40b52c9f9acd3551dfff9f8702 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Mon, 6 Nov 2017 16:01:21 +0100
Subject: timers/hrtimer: Use lockdep to assert IRQs are disabled/enabled

Use lockdep to check that IRQs are enabled or disabled as expected. This
way the sanity check only shows overhead when concurrency correctness
debug code is enabled.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: David S . Miller <davem@davemloft.net>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1509980490-4285-6-git-send-email-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/hrtimer.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 88f75f92ef36..d32520840fde 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -758,9 +758,7 @@ void clock_was_set(void)
  */
 void hrtimers_resume(void)
 {
-	WARN_ONCE(!irqs_disabled(),
-		  KERN_INFO "hrtimers_resume() called with IRQs enabled!");
-
+	lockdep_assert_irqs_disabled();
 	/* Retrigger on the local CPU */
 	retrigger_next_event(NULL);
 	/* And schedule a retrigger for all others */
-- 
cgit v1.2.3


From a69682200db9c2c26594188f81dd2df560af4683 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Mon, 6 Nov 2017 16:01:28 +0100
Subject: timers/posix-cpu-timers: Use lockdep to assert IRQs are
 disabled/enabled

Use lockdep to check that IRQs are enabled or disabled as expected. This
way the sanity check only shows overhead when concurrency correctness
debug code is enabled.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: David S . Miller <davem@davemloft.net>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/1509980490-4285-13-git-send-email-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/posix-cpu-timers.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 5b117110b55b..1f27887aa194 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -603,7 +603,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
 	/*
 	 * Disarm any old timer after extracting its expiry time.
 	 */
-	WARN_ON_ONCE(!irqs_disabled());
+	lockdep_assert_irqs_disabled();
 
 	ret = 0;
 	old_incr = timer->it.cpu.incr;
@@ -1034,7 +1034,7 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer)
 	/*
 	 * Now re-arm for the new expiry time.
 	 */
-	WARN_ON_ONCE(!irqs_disabled());
+	lockdep_assert_irqs_disabled();
 	arm_timer(timer);
 unlock:
 	unlock_task_sighand(p, &flags);
@@ -1125,7 +1125,7 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 	struct k_itimer *timer, *next;
 	unsigned long flags;
 
-	WARN_ON_ONCE(!irqs_disabled());
+	lockdep_assert_irqs_disabled();
 
 	/*
 	 * The fast path checks that there are no expired thread or thread
-- 
cgit v1.2.3


From df27067e6040b51188184876253d93da002433aa Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 10 Nov 2017 16:25:04 +0100
Subject: pstore: Use ktime_get_real_fast_ns() instead of __getnstimeofday()

__getnstimeofday() is a rather odd interface, with a number of quirks:

- The caller may come from NMI context, but the implementation is not NMI safe,
  one way to get there from NMI is

      NMI handler:
        something bad
          panic()
            kmsg_dump()
              pstore_dump()
                 pstore_record_init()
                   __getnstimeofday()

- The calling conventions are different from any other timekeeping functions,
  to deal with returning an error code during suspended timekeeping.

Address the above issues by using a completely different method to get the
time: ktime_get_real_fast_ns() is NMI safe and has a reasonable behavior
when timekeeping is suspended: it returns the time at which it got
suspended. As Thomas Gleixner explained, this is safe, as
ktime_get_real_fast_ns() does not call into the clocksource driver that
might be suspended.

The result can easily be transformed into a timespec structure. Since
ktime_get_real_fast_ns() was not exported to modules, add the export.

The pstore behavior for the suspended case changes slightly, as it now
stores the timestamp at which timekeeping was suspended instead of storing
a zero timestamp.

This change is not addressing y2038-safety, that's subject to a more
complex follow up patch.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Anton Vorontsov <anton@enomsg.org>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Colin Cross <ccross@android.com>
Link: https://lkml.kernel.org/r/20171110152530.1926955-1-arnd@arndb.de
---
 kernel/time/timekeeping.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 353f7bd1eeb0..198afa78bf69 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -528,6 +528,7 @@ u64 ktime_get_real_fast_ns(void)
 {
 	return __ktime_get_real_fast_ns(&tk_fast_mono);
 }
+EXPORT_SYMBOL_GPL(ktime_get_real_fast_ns);
 
 /**
  * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
-- 
cgit v1.2.3


From b24591e2fcf852ad7ad2ccf745c8220bf378d312 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 9 Nov 2017 12:35:07 +0000
Subject: timers: Add a function to start/reduce a timer

Add a function, similar to mod_timer(), that will start a timer if it isn't
running and will modify it if it is running and has an expiry time longer
than the new time.  If the timer is running with an expiry time that's the
same or sooner, no change is made.

The function looks like:

	int timer_reduce(struct timer_list *timer, unsigned long expires);

This can be used by code such as networking code to make it easier to share
a timer for multiple timeouts.  For instance, in upcoming AF_RXRPC code,
the rxrpc_call struct will maintain a number of timeouts:

	unsigned long	ack_at;
	unsigned long	resend_at;
	unsigned long	ping_at;
	unsigned long	expect_rx_by;
	unsigned long	expect_req_by;
	unsigned long	expect_term_by;

each of which is set independently of the others.  With timer reduction
available, when the code needs to set one of the timeouts, it only needs to
look at that timeout and then call timer_reduce() to modify the timer,
starting it or bringing it forward if necessary.  There is no need to refer
to the other timeouts to see which is earliest and no need to take any lock
other than, potentially, the timer lock inside timer_reduce().

Note, that this does not protect against concurrent invocations of any of
the timer functions.

As an example, the expect_rx_by timeout above, which terminates a call if
we don't get a packet from the server within a certain time window, would
be set something like this:

	unsigned long now = jiffies;
	unsigned long expect_rx_by = now + packet_receive_timeout;
	WRITE_ONCE(call->expect_rx_by, expect_rx_by);
	timer_reduce(&call->timer, expect_rx_by);

The timer service code (which might, say, be in a work function) would then
check all the timeouts to see which, if any, had triggered, deal with
those:

	t = READ_ONCE(call->ack_at);
	if (time_after_eq(now, t)) {
		cmpxchg(&call->ack_at, t, now + MAX_JIFFY_OFFSET);
		set_bit(RXRPC_CALL_EV_ACK, &call->events);
	}

and then restart the timer if necessary by finding the soonest timeout that
hasn't yet passed and then calling timer_reduce().

The disadvantage of doing things this way rather than comparing the timers
each time and calling mod_timer() is that you *will* take timer events
unless you can finish what you're doing and delete the timer in time.

The advantage of doing things this way is that you don't need to use a lock
to work out when the next timer should be set, other than the timer's own
lock - which you might not have to take.

[ tglx: Fixed weird formatting and adopted it to pending changes ]

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: keyrings@vger.kernel.org
Cc: linux-afs@lists.infradead.org
Link: https://lkml.kernel.org/r/151023090769.23050.1801643667223880753.stgit@warthog.procyon.org.uk
---
 kernel/time/timer.c | 45 ++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 38 insertions(+), 7 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index fbb1f85327bf..af0b8bae4502 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -929,8 +929,11 @@ static struct timer_base *lock_timer_base(struct timer_list *timer,
 	}
 }
 
+#define MOD_TIMER_PENDING_ONLY		0x01
+#define MOD_TIMER_REDUCE		0x02
+
 static inline int
-__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
+__mod_timer(struct timer_list *timer, unsigned long expires, unsigned int options)
 {
 	struct timer_base *base, *new_base;
 	unsigned int idx = UINT_MAX;
@@ -950,7 +953,11 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
 		 * larger granularity than you would get from adding a new
 		 * timer with this expiry.
 		 */
-		if (timer->expires == expires)
+		long diff = timer->expires - expires;
+
+		if (!diff)
+			return 1;
+		if (options & MOD_TIMER_REDUCE && diff <= 0)
 			return 1;
 
 		/*
@@ -962,6 +969,12 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
 		base = lock_timer_base(timer, &flags);
 		forward_timer_base(base);
 
+		if (timer_pending(timer) && (options & MOD_TIMER_REDUCE) &&
+		    time_before_eq(timer->expires, expires)) {
+			ret = 1;
+			goto out_unlock;
+		}
+
 		clk = base->clk;
 		idx = calc_wheel_index(expires, clk);
 
@@ -971,7 +984,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
 		 * subsequent call will exit in the expires check above.
 		 */
 		if (idx == timer_get_idx(timer)) {
-			timer->expires = expires;
+			if (!(options & MOD_TIMER_REDUCE))
+				timer->expires = expires;
+			else if (time_after(timer->expires, expires))
+				timer->expires = expires;
 			ret = 1;
 			goto out_unlock;
 		}
@@ -981,7 +997,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
 	}
 
 	ret = detach_if_pending(timer, base, false);
-	if (!ret && pending_only)
+	if (!ret && (options & MOD_TIMER_PENDING_ONLY))
 		goto out_unlock;
 
 	debug_activate(timer, expires);
@@ -1042,7 +1058,7 @@ out_unlock:
  */
 int mod_timer_pending(struct timer_list *timer, unsigned long expires)
 {
-	return __mod_timer(timer, expires, true);
+	return __mod_timer(timer, expires, MOD_TIMER_PENDING_ONLY);
 }
 EXPORT_SYMBOL(mod_timer_pending);
 
@@ -1068,10 +1084,25 @@ EXPORT_SYMBOL(mod_timer_pending);
  */
 int mod_timer(struct timer_list *timer, unsigned long expires)
 {
-	return __mod_timer(timer, expires, false);
+	return __mod_timer(timer, expires, 0);
 }
 EXPORT_SYMBOL(mod_timer);
 
+/**
+ * timer_reduce - Modify a timer's timeout if it would reduce the timeout
+ * @timer:	The timer to be modified
+ * @expires:	New timeout in jiffies
+ *
+ * timer_reduce() is very similar to mod_timer(), except that it will only
+ * modify a running timer if that would reduce the expiration time (it will
+ * start a timer that isn't running).
+ */
+int timer_reduce(struct timer_list *timer, unsigned long expires)
+{
+	return __mod_timer(timer, expires, MOD_TIMER_REDUCE);
+}
+EXPORT_SYMBOL(timer_reduce);
+
 /**
  * add_timer - start a timer
  * @timer: the timer to be added
@@ -1754,7 +1785,7 @@ signed long __sched schedule_timeout(signed long timeout)
 
 	timer.task = current;
 	timer_setup_on_stack(&timer.timer, process_timeout, 0);
-	__mod_timer(&timer.timer, expire, false);
+	__mod_timer(&timer.timer, expire, 0);
 	schedule();
 	del_singleshot_timer_sync(&timer.timer);
 
-- 
cgit v1.2.3


From 8e7df2b5b7f245c9bd11064712db5cb69044a362 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 13 Nov 2017 07:15:41 +0100
Subject: timer/debug: Change /proc/timer_list from 0444 to 0400

While it uses %pK, there's still few reasons to read this file
as non-root.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/timer_list.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 0e7f5428a148..0ed768b56c60 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -389,7 +389,7 @@ static int __init init_timer_list_procfs(void)
 {
 	struct proc_dir_entry *pe;
 
-	pe = proc_create("timer_list", 0444, NULL, &timer_list_fops);
+	pe = proc_create("timer_list", 0400, NULL, &timer_list_fops);
 	if (!pe)
 		return -ENOMEM;
 	return 0;
-- 
cgit v1.2.3


From aea3706cfc4d952ed6d32b6d5845b5ecd99ed7f5 Mon Sep 17 00:00:00 2001
From: Miroslav Lichvar <mlichvar@redhat.com>
Date: Mon, 13 Nov 2017 14:51:31 -0800
Subject: timekeeping: Remove CONFIG_GENERIC_TIME_VSYSCALL_OLD

As of d4d1fc61eb38f (ia64: Update fsyscall gettime to use modern
vsyscall_update)the last user of CONFIG_GENERIC_TIME_VSYSCALL_OLD
have been updated, the legacy support for old-style vsyscall
implementations can be removed from the timekeeping code.

(Thanks again to Tony Luck for helping remove the last user!)

[jstultz: Commit message rework]

Signed-off-by: Miroslav Lichvar <mlichvar@redhat.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Stephen Boyd <stephen.boyd@linaro.org>
Link: https://lkml.kernel.org/r/1510613491-16695-1-git-send-email-john.stultz@linaro.org
---
 kernel/time/Kconfig       |  4 ----
 kernel/time/timekeeping.c | 45 ---------------------------------------------
 2 files changed, 49 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index d689a9557e17..e776fc8cc1df 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -21,10 +21,6 @@ config CLOCKSOURCE_VALIDATE_LAST_CYCLE
 config GENERIC_TIME_VSYSCALL
 	bool
 
-# Timekeeping vsyscall support
-config GENERIC_TIME_VSYSCALL_OLD
-	bool
-
 # Old style timekeeping
 config ARCH_USES_GETTIMEOFFSET
 	bool
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 198afa78bf69..cd03317e7b57 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -557,45 +557,6 @@ static void halt_fast_timekeeper(struct timekeeper *tk)
 	update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
 }
 
-#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
-#warning Please contact your maintainers, as GENERIC_TIME_VSYSCALL_OLD compatibity will disappear soon.
-
-static inline void update_vsyscall(struct timekeeper *tk)
-{
-	struct timespec xt, wm;
-
-	xt = timespec64_to_timespec(tk_xtime(tk));
-	wm = timespec64_to_timespec(tk->wall_to_monotonic);
-	update_vsyscall_old(&xt, &wm, tk->tkr_mono.clock, tk->tkr_mono.mult,
-			    tk->tkr_mono.cycle_last);
-}
-
-static inline void old_vsyscall_fixup(struct timekeeper *tk)
-{
-	s64 remainder;
-
-	/*
-	* Store only full nanoseconds into xtime_nsec after rounding
-	* it up and add the remainder to the error difference.
-	* XXX - This is necessary to avoid small 1ns inconsistnecies caused
-	* by truncating the remainder in vsyscalls. However, it causes
-	* additional work to be done in timekeeping_adjust(). Once
-	* the vsyscall implementations are converted to use xtime_nsec
-	* (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
-	* users are removed, this can be killed.
-	*/
-	remainder = tk->tkr_mono.xtime_nsec & ((1ULL << tk->tkr_mono.shift) - 1);
-	if (remainder != 0) {
-		tk->tkr_mono.xtime_nsec -= remainder;
-		tk->tkr_mono.xtime_nsec += 1ULL << tk->tkr_mono.shift;
-		tk->ntp_error += remainder << tk->ntp_error_shift;
-		tk->ntp_error -= (1ULL << tk->tkr_mono.shift) << tk->ntp_error_shift;
-	}
-}
-#else
-#define old_vsyscall_fixup(tk)
-#endif
-
 static RAW_NOTIFIER_HEAD(pvclock_gtod_chain);
 
 static void update_pvclock_gtod(struct timekeeper *tk, bool was_set)
@@ -2163,12 +2124,6 @@ void update_wall_time(void)
 	/* correct the clock when NTP error is too big */
 	timekeeping_adjust(tk, offset);
 
-	/*
-	 * XXX This can be killed once everyone converts
-	 * to the new update_vsyscall.
-	 */
-	old_vsyscall_fixup(tk);
-
 	/*
 	 * Finally, make sure that after the rounding
 	 * xtime_nsec isn't larger than NSEC_PER_SEC
-- 
cgit v1.2.3


From b9eaf18722221ef8b2bd6a67240ebe668622152a Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 16 Oct 2017 13:15:39 -0700
Subject: treewide: init_timer() -> setup_timer()

This mechanically converts all remaining cases of ancient open-coded timer
setup with the old setup_timer() API, which is the first step in timer
conversions. This has no behavioral changes, since it ultimately just
changes the order of assignment to fields of struct timer_list when
finding variations of:

    init_timer(&t);
    f.function = timer_callback;
    t.data = timer_callback_arg;

to be converted into:

    setup_timer(&t, timer_callback, timer_callback_arg);

The conversion is done with the following Coccinelle script, which
is an improved version of scripts/cocci/api/setup_timer.cocci, in the
following ways:
 - assignments-before-init_timer() cases
 - limit the .data case removal to the specific struct timer_list instance
 - handling calls by dereference (timer->field vs timer.field)

spatch --very-quiet --all-includes --include-headers \
	-I ./arch/x86/include -I ./arch/x86/include/generated \
	-I ./include -I ./arch/x86/include/uapi \
	-I ./arch/x86/include/generated/uapi -I ./include/uapi \
	-I ./include/generated/uapi --include ./include/linux/kconfig.h \
	--dir . \
	--cocci-file ~/src/data/setup_timer.cocci

@fix_address_of@
expression e;
@@

 init_timer(
-&(e)
+&e
 , ...)

// Match the common cases first to avoid Coccinelle parsing loops with
// "... when" clauses.

@match_immediate_function_data_after_init_timer@
expression e, func, da;
@@

-init_timer
+setup_timer
 ( \(&e\|e\)
+, func, da
 );
(
-\(e.function\|e->function\) = func;
-\(e.data\|e->data\) = da;
|
-\(e.data\|e->data\) = da;
-\(e.function\|e->function\) = func;
)

@match_immediate_function_data_before_init_timer@
expression e, func, da;
@@

(
-\(e.function\|e->function\) = func;
-\(e.data\|e->data\) = da;
|
-\(e.data\|e->data\) = da;
-\(e.function\|e->function\) = func;
)
-init_timer
+setup_timer
 ( \(&e\|e\)
+, func, da
 );

@match_function_and_data_after_init_timer@
expression e, e2, e3, e4, e5, func, da;
@@

-init_timer
+setup_timer
 ( \(&e\|e\)
+, func, da
 );
 ... when != func = e2
     when != da = e3
(
-e.function = func;
... when != da = e4
-e.data = da;
|
-e->function = func;
... when != da = e4
-e->data = da;
|
-e.data = da;
... when != func = e5
-e.function = func;
|
-e->data = da;
... when != func = e5
-e->function = func;
)

@match_function_and_data_before_init_timer@
expression e, e2, e3, e4, e5, func, da;
@@
(
-e.function = func;
... when != da = e4
-e.data = da;
|
-e->function = func;
... when != da = e4
-e->data = da;
|
-e.data = da;
... when != func = e5
-e.function = func;
|
-e->data = da;
... when != func = e5
-e->function = func;
)
... when != func = e2
    when != da = e3
-init_timer
+setup_timer
 ( \(&e\|e\)
+, func, da
 );

@r1 exists@
expression t;
identifier f;
position p;
@@

f(...) { ... when any
  init_timer@p(\(&t\|t\))
  ... when any
}

@r2 exists@
expression r1.t;
identifier g != r1.f;
expression e8;
@@

g(...) { ... when any
  \(t.data\|t->data\) = e8
  ... when any
}

// It is dangerous to use setup_timer if data field is initialized
// in another function.
@script:python depends on r2@
p << r1.p;
@@

cocci.include_match(False)

@r3@
expression r1.t, func, e7;
position r1.p;
@@

(
-init_timer@p(&t);
+setup_timer(&t, func, 0UL);
... when != func = e7
-t.function = func;
|
-t.function = func;
... when != func = e7
-init_timer@p(&t);
+setup_timer(&t, func, 0UL);
|
-init_timer@p(t);
+setup_timer(t, func, 0UL);
... when != func = e7
-t->function = func;
|
-t->function = func;
... when != func = e7
-init_timer@p(t);
+setup_timer(t, func, 0UL);
)

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 kernel/time/clocksource.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 03918a19cf2d..5b51d5ba2a85 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -290,8 +290,7 @@ static inline void clocksource_start_watchdog(void)
 {
 	if (watchdog_running || !watchdog || list_empty(&watchdog_list))
 		return;
-	init_timer(&watchdog_timer);
-	watchdog_timer.function = clocksource_watchdog;
+	setup_timer(&watchdog_timer, clocksource_watchdog, 0UL);
 	watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
 	add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));
 	watchdog_running = 1;
-- 
cgit v1.2.3


From e99e88a9d2b067465adaa9c111ada99a041bef9a Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 16 Oct 2017 14:43:17 -0700
Subject: treewide: setup_timer() -> timer_setup()

This converts all remaining cases of the old setup_timer() API into using
timer_setup(), where the callback argument is the structure already
holding the struct timer_list. These should have no behavioral changes,
since they just change which pointer is passed into the callback with
the same available pointers after conversion. It handles the following
examples, in addition to some other variations.

Casting from unsigned long:

    void my_callback(unsigned long data)
    {
        struct something *ptr = (struct something *)data;
    ...
    }
    ...
    setup_timer(&ptr->my_timer, my_callback, ptr);

and forced object casts:

    void my_callback(struct something *ptr)
    {
    ...
    }
    ...
    setup_timer(&ptr->my_timer, my_callback, (unsigned long)ptr);

become:

    void my_callback(struct timer_list *t)
    {
        struct something *ptr = from_timer(ptr, t, my_timer);
    ...
    }
    ...
    timer_setup(&ptr->my_timer, my_callback, 0);

Direct function assignments:

    void my_callback(unsigned long data)
    {
        struct something *ptr = (struct something *)data;
    ...
    }
    ...
    ptr->my_timer.function = my_callback;

have a temporary cast added, along with converting the args:

    void my_callback(struct timer_list *t)
    {
        struct something *ptr = from_timer(ptr, t, my_timer);
    ...
    }
    ...
    ptr->my_timer.function = (TIMER_FUNC_TYPE)my_callback;

And finally, callbacks without a data assignment:

    void my_callback(unsigned long data)
    {
    ...
    }
    ...
    setup_timer(&ptr->my_timer, my_callback, 0);

have their argument renamed to verify they're unused during conversion:

    void my_callback(struct timer_list *unused)
    {
    ...
    }
    ...
    timer_setup(&ptr->my_timer, my_callback, 0);

The conversion is done with the following Coccinelle script:

spatch --very-quiet --all-includes --include-headers \
	-I ./arch/x86/include -I ./arch/x86/include/generated \
	-I ./include -I ./arch/x86/include/uapi \
	-I ./arch/x86/include/generated/uapi -I ./include/uapi \
	-I ./include/generated/uapi --include ./include/linux/kconfig.h \
	--dir . \
	--cocci-file ~/src/data/timer_setup.cocci

@fix_address_of@
expression e;
@@

 setup_timer(
-&(e)
+&e
 , ...)

// Update any raw setup_timer() usages that have a NULL callback, but
// would otherwise match change_timer_function_usage, since the latter
// will update all function assignments done in the face of a NULL
// function initialization in setup_timer().
@change_timer_function_usage_NULL@
expression _E;
identifier _timer;
type _cast_data;
@@

(
-setup_timer(&_E->_timer, NULL, _E);
+timer_setup(&_E->_timer, NULL, 0);
|
-setup_timer(&_E->_timer, NULL, (_cast_data)_E);
+timer_setup(&_E->_timer, NULL, 0);
|
-setup_timer(&_E._timer, NULL, &_E);
+timer_setup(&_E._timer, NULL, 0);
|
-setup_timer(&_E._timer, NULL, (_cast_data)&_E);
+timer_setup(&_E._timer, NULL, 0);
)

@change_timer_function_usage@
expression _E;
identifier _timer;
struct timer_list _stl;
identifier _callback;
type _cast_func, _cast_data;
@@

(
-setup_timer(&_E->_timer, _callback, _E);
+timer_setup(&_E->_timer, _callback, 0);
|
-setup_timer(&_E->_timer, &_callback, _E);
+timer_setup(&_E->_timer, _callback, 0);
|
-setup_timer(&_E->_timer, _callback, (_cast_data)_E);
+timer_setup(&_E->_timer, _callback, 0);
|
-setup_timer(&_E->_timer, &_callback, (_cast_data)_E);
+timer_setup(&_E->_timer, _callback, 0);
|
-setup_timer(&_E->_timer, (_cast_func)_callback, _E);
+timer_setup(&_E->_timer, _callback, 0);
|
-setup_timer(&_E->_timer, (_cast_func)&_callback, _E);
+timer_setup(&_E->_timer, _callback, 0);
|
-setup_timer(&_E->_timer, (_cast_func)_callback, (_cast_data)_E);
+timer_setup(&_E->_timer, _callback, 0);
|
-setup_timer(&_E->_timer, (_cast_func)&_callback, (_cast_data)_E);
+timer_setup(&_E->_timer, _callback, 0);
|
-setup_timer(&_E._timer, _callback, (_cast_data)_E);
+timer_setup(&_E._timer, _callback, 0);
|
-setup_timer(&_E._timer, _callback, (_cast_data)&_E);
+timer_setup(&_E._timer, _callback, 0);
|
-setup_timer(&_E._timer, &_callback, (_cast_data)_E);
+timer_setup(&_E._timer, _callback, 0);
|
-setup_timer(&_E._timer, &_callback, (_cast_data)&_E);
+timer_setup(&_E._timer, _callback, 0);
|
-setup_timer(&_E._timer, (_cast_func)_callback, (_cast_data)_E);
+timer_setup(&_E._timer, _callback, 0);
|
-setup_timer(&_E._timer, (_cast_func)_callback, (_cast_data)&_E);
+timer_setup(&_E._timer, _callback, 0);
|
-setup_timer(&_E._timer, (_cast_func)&_callback, (_cast_data)_E);
+timer_setup(&_E._timer, _callback, 0);
|
-setup_timer(&_E._timer, (_cast_func)&_callback, (_cast_data)&_E);
+timer_setup(&_E._timer, _callback, 0);
|
 _E->_timer@_stl.function = _callback;
|
 _E->_timer@_stl.function = &_callback;
|
 _E->_timer@_stl.function = (_cast_func)_callback;
|
 _E->_timer@_stl.function = (_cast_func)&_callback;
|
 _E._timer@_stl.function = _callback;
|
 _E._timer@_stl.function = &_callback;
|
 _E._timer@_stl.function = (_cast_func)_callback;
|
 _E._timer@_stl.function = (_cast_func)&_callback;
)

// callback(unsigned long arg)
@change_callback_handle_cast
 depends on change_timer_function_usage@
identifier change_timer_function_usage._callback;
identifier change_timer_function_usage._timer;
type _origtype;
identifier _origarg;
type _handletype;
identifier _handle;
@@

 void _callback(
-_origtype _origarg
+struct timer_list *t
 )
 {
(
	... when != _origarg
	_handletype *_handle =
-(_handletype *)_origarg;
+from_timer(_handle, t, _timer);
	... when != _origarg
|
	... when != _origarg
	_handletype *_handle =
-(void *)_origarg;
+from_timer(_handle, t, _timer);
	... when != _origarg
|
	... when != _origarg
	_handletype *_handle;
	... when != _handle
	_handle =
-(_handletype *)_origarg;
+from_timer(_handle, t, _timer);
	... when != _origarg
|
	... when != _origarg
	_handletype *_handle;
	... when != _handle
	_handle =
-(void *)_origarg;
+from_timer(_handle, t, _timer);
	... when != _origarg
)
 }

// callback(unsigned long arg) without existing variable
@change_callback_handle_cast_no_arg
 depends on change_timer_function_usage &&
                     !change_callback_handle_cast@
identifier change_timer_function_usage._callback;
identifier change_timer_function_usage._timer;
type _origtype;
identifier _origarg;
type _handletype;
@@

 void _callback(
-_origtype _origarg
+struct timer_list *t
 )
 {
+	_handletype *_origarg = from_timer(_origarg, t, _timer);
+
	... when != _origarg
-	(_handletype *)_origarg
+	_origarg
	... when != _origarg
 }

// Avoid already converted callbacks.
@match_callback_converted
 depends on change_timer_function_usage &&
            !change_callback_handle_cast &&
	    !change_callback_handle_cast_no_arg@
identifier change_timer_function_usage._callback;
identifier t;
@@

 void _callback(struct timer_list *t)
 { ... }

// callback(struct something *handle)
@change_callback_handle_arg
 depends on change_timer_function_usage &&
	    !match_callback_converted &&
            !change_callback_handle_cast &&
            !change_callback_handle_cast_no_arg@
identifier change_timer_function_usage._callback;
identifier change_timer_function_usage._timer;
type _handletype;
identifier _handle;
@@

 void _callback(
-_handletype *_handle
+struct timer_list *t
 )
 {
+	_handletype *_handle = from_timer(_handle, t, _timer);
	...
 }

// If change_callback_handle_arg ran on an empty function, remove
// the added handler.
@unchange_callback_handle_arg
 depends on change_timer_function_usage &&
	    change_callback_handle_arg@
identifier change_timer_function_usage._callback;
identifier change_timer_function_usage._timer;
type _handletype;
identifier _handle;
identifier t;
@@

 void _callback(struct timer_list *t)
 {
-	_handletype *_handle = from_timer(_handle, t, _timer);
 }

// We only want to refactor the setup_timer() data argument if we've found
// the matching callback. This undoes changes in change_timer_function_usage.
@unchange_timer_function_usage
 depends on change_timer_function_usage &&
            !change_callback_handle_cast &&
            !change_callback_handle_cast_no_arg &&
	    !change_callback_handle_arg@
expression change_timer_function_usage._E;
identifier change_timer_function_usage._timer;
identifier change_timer_function_usage._callback;
type change_timer_function_usage._cast_data;
@@

(
-timer_setup(&_E->_timer, _callback, 0);
+setup_timer(&_E->_timer, _callback, (_cast_data)_E);
|
-timer_setup(&_E._timer, _callback, 0);
+setup_timer(&_E._timer, _callback, (_cast_data)&_E);
)

// If we fixed a callback from a .function assignment, fix the
// assignment cast now.
@change_timer_function_assignment
 depends on change_timer_function_usage &&
            (change_callback_handle_cast ||
             change_callback_handle_cast_no_arg ||
             change_callback_handle_arg)@
expression change_timer_function_usage._E;
identifier change_timer_function_usage._timer;
identifier change_timer_function_usage._callback;
type _cast_func;
typedef TIMER_FUNC_TYPE;
@@

(
 _E->_timer.function =
-_callback
+(TIMER_FUNC_TYPE)_callback
 ;
|
 _E->_timer.function =
-&_callback
+(TIMER_FUNC_TYPE)_callback
 ;
|
 _E->_timer.function =
-(_cast_func)_callback;
+(TIMER_FUNC_TYPE)_callback
 ;
|
 _E->_timer.function =
-(_cast_func)&_callback
+(TIMER_FUNC_TYPE)_callback
 ;
|
 _E._timer.function =
-_callback
+(TIMER_FUNC_TYPE)_callback
 ;
|
 _E._timer.function =
-&_callback;
+(TIMER_FUNC_TYPE)_callback
 ;
|
 _E._timer.function =
-(_cast_func)_callback
+(TIMER_FUNC_TYPE)_callback
 ;
|
 _E._timer.function =
-(_cast_func)&_callback
+(TIMER_FUNC_TYPE)_callback
 ;
)

// Sometimes timer functions are called directly. Replace matched args.
@change_timer_function_calls
 depends on change_timer_function_usage &&
            (change_callback_handle_cast ||
             change_callback_handle_cast_no_arg ||
             change_callback_handle_arg)@
expression _E;
identifier change_timer_function_usage._timer;
identifier change_timer_function_usage._callback;
type _cast_data;
@@

 _callback(
(
-(_cast_data)_E
+&_E->_timer
|
-(_cast_data)&_E
+&_E._timer
|
-_E
+&_E->_timer
)
 )

// If a timer has been configured without a data argument, it can be
// converted without regard to the callback argument, since it is unused.
@match_timer_function_unused_data@
expression _E;
identifier _timer;
identifier _callback;
@@

(
-setup_timer(&_E->_timer, _callback, 0);
+timer_setup(&_E->_timer, _callback, 0);
|
-setup_timer(&_E->_timer, _callback, 0L);
+timer_setup(&_E->_timer, _callback, 0);
|
-setup_timer(&_E->_timer, _callback, 0UL);
+timer_setup(&_E->_timer, _callback, 0);
|
-setup_timer(&_E._timer, _callback, 0);
+timer_setup(&_E._timer, _callback, 0);
|
-setup_timer(&_E._timer, _callback, 0L);
+timer_setup(&_E._timer, _callback, 0);
|
-setup_timer(&_E._timer, _callback, 0UL);
+timer_setup(&_E._timer, _callback, 0);
|
-setup_timer(&_timer, _callback, 0);
+timer_setup(&_timer, _callback, 0);
|
-setup_timer(&_timer, _callback, 0L);
+timer_setup(&_timer, _callback, 0);
|
-setup_timer(&_timer, _callback, 0UL);
+timer_setup(&_timer, _callback, 0);
|
-setup_timer(_timer, _callback, 0);
+timer_setup(_timer, _callback, 0);
|
-setup_timer(_timer, _callback, 0L);
+timer_setup(_timer, _callback, 0);
|
-setup_timer(_timer, _callback, 0UL);
+timer_setup(_timer, _callback, 0);
)

@change_callback_unused_data
 depends on match_timer_function_unused_data@
identifier match_timer_function_unused_data._callback;
type _origtype;
identifier _origarg;
@@

 void _callback(
-_origtype _origarg
+struct timer_list *unused
 )
 {
	... when != _origarg
 }

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 kernel/time/clocksource.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 5b51d5ba2a85..65f9e3f24dde 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -171,7 +171,7 @@ void clocksource_mark_unstable(struct clocksource *cs)
 	spin_unlock_irqrestore(&watchdog_lock, flags);
 }
 
-static void clocksource_watchdog(unsigned long data)
+static void clocksource_watchdog(struct timer_list *unused)
 {
 	struct clocksource *cs;
 	u64 csnow, wdnow, cslast, wdlast, delta;
@@ -290,7 +290,7 @@ static inline void clocksource_start_watchdog(void)
 {
 	if (watchdog_running || !watchdog || list_empty(&watchdog_list))
 		return;
-	setup_timer(&watchdog_timer, clocksource_watchdog, 0UL);
+	timer_setup(&watchdog_timer, clocksource_watchdog, 0);
 	watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
 	add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));
 	watchdog_running = 1;
-- 
cgit v1.2.3


From c1eba5bcb6430868427e0b9d1cd1205a07302f06 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sun, 22 Oct 2017 18:18:19 -0700
Subject: timer: Pass timer_list pointer to callbacks unconditionally

Now that all timer callbacks are already taking their struct timer_list
pointer as the callback argument, just do this unconditionally and remove
the .data field.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 kernel/time/timer.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index af0b8bae4502..a07eb124332f 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1107,12 +1107,12 @@ EXPORT_SYMBOL(timer_reduce);
  * add_timer - start a timer
  * @timer: the timer to be added
  *
- * The kernel will do a ->function(->data) callback from the
+ * The kernel will do a ->function(@timer) callback from the
  * timer interrupt at the ->expires point in the future. The
  * current time is 'jiffies'.
  *
- * The timer's ->expires, ->function (and if the handler uses it, ->data)
- * fields must be set prior calling this function.
+ * The timer's ->expires, ->function fields must be set prior calling this
+ * function.
  *
  * Timers with an ->expires field in the past will be executed in the next
  * timer tick.
@@ -1284,8 +1284,7 @@ int del_timer_sync(struct timer_list *timer)
 EXPORT_SYMBOL(del_timer_sync);
 #endif
 
-static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
-			  unsigned long data)
+static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long))
 {
 	int count = preempt_count();
 
@@ -1309,7 +1308,7 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
 	lock_map_acquire(&lockdep_map);
 
 	trace_timer_expire_entry(timer);
-	fn(data);
+	fn((TIMER_DATA_TYPE)timer);
 	trace_timer_expire_exit(timer);
 
 	lock_map_release(&lockdep_map);
@@ -1332,7 +1331,6 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head)
 	while (!hlist_empty(head)) {
 		struct timer_list *timer;
 		void (*fn)(unsigned long);
-		unsigned long data;
 
 		timer = hlist_entry(head->first, struct timer_list, entry);
 
@@ -1340,15 +1338,14 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head)
 		detach_timer(timer, true);
 
 		fn = timer->function;
-		data = timer->data;
 
 		if (timer->flags & TIMER_IRQSAFE) {
 			raw_spin_unlock(&base->lock);
-			call_timer_fn(timer, fn, data);
+			call_timer_fn(timer, fn);
 			raw_spin_lock(&base->lock);
 		} else {
 			raw_spin_unlock_irq(&base->lock);
-			call_timer_fn(timer, fn, data);
+			call_timer_fn(timer, fn);
 			raw_spin_lock_irq(&base->lock);
 		}
 	}
-- 
cgit v1.2.3


From 354b46b1a0adda1dd5b7f0bc2a5604cca091be5f Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sun, 22 Oct 2017 19:15:40 -0700
Subject: timer: Switch callback prototype to take struct timer_list * argument

Since all callbacks have been converted, we can switch the core
prototype to "struct timer_list *" now too.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 kernel/time/timer.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index a07eb124332f..0f0d49a02d04 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1284,7 +1284,7 @@ int del_timer_sync(struct timer_list *timer)
 EXPORT_SYMBOL(del_timer_sync);
 #endif
 
-static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long))
+static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list *))
 {
 	int count = preempt_count();
 
@@ -1308,7 +1308,7 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long))
 	lock_map_acquire(&lockdep_map);
 
 	trace_timer_expire_entry(timer);
-	fn((TIMER_DATA_TYPE)timer);
+	fn(timer);
 	trace_timer_expire_exit(timer);
 
 	lock_map_release(&lockdep_map);
@@ -1330,7 +1330,7 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head)
 {
 	while (!hlist_empty(head)) {
 		struct timer_list *timer;
-		void (*fn)(unsigned long);
+		void (*fn)(struct timer_list *);
 
 		timer = hlist_entry(head->first, struct timer_list, entry);
 
-- 
cgit v1.2.3


From 188665b2d67db8953899551d1a9d4481b2a0ac60 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sun, 22 Oct 2017 18:14:46 -0700
Subject: timer: Pass function down to initialization routines

In preparation for removing more macros, pass the function down to the
initialization routines instead of doing it in macros.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 kernel/time/timer.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 0f0d49a02d04..ffebcf878fba 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -707,14 +707,18 @@ static inline void debug_timer_assert_init(struct timer_list *timer)
 	debug_object_assert_init(timer, &timer_debug_descr);
 }
 
-static void do_init_timer(struct timer_list *timer, unsigned int flags,
+static void do_init_timer(struct timer_list *timer,
+			  void (*func)(struct timer_list *),
+			  unsigned int flags,
 			  const char *name, struct lock_class_key *key);
 
-void init_timer_on_stack_key(struct timer_list *timer, unsigned int flags,
+void init_timer_on_stack_key(struct timer_list *timer,
+			     void (*func)(struct timer_list *),
+			     unsigned int flags,
 			     const char *name, struct lock_class_key *key)
 {
 	debug_object_init_on_stack(timer, &timer_debug_descr);
-	do_init_timer(timer, flags, name, key);
+	do_init_timer(timer, func, flags, name, key);
 }
 EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
 
@@ -755,10 +759,13 @@ static inline void debug_assert_init(struct timer_list *timer)
 	debug_timer_assert_init(timer);
 }
 
-static void do_init_timer(struct timer_list *timer, unsigned int flags,
+static void do_init_timer(struct timer_list *timer,
+			  void (*func)(struct timer_list *),
+			  unsigned int flags,
 			  const char *name, struct lock_class_key *key)
 {
 	timer->entry.pprev = NULL;
+	timer->function = func;
 	timer->flags = flags | raw_smp_processor_id();
 	lockdep_init_map(&timer->lockdep_map, name, key, 0);
 }
@@ -766,6 +773,7 @@ static void do_init_timer(struct timer_list *timer, unsigned int flags,
 /**
  * init_timer_key - initialize a timer
  * @timer: the timer to be initialized
+ * @func: timer callback function
  * @flags: timer flags
  * @name: name of the timer
  * @key: lockdep class key of the fake lock used for tracking timer
@@ -774,11 +782,12 @@ static void do_init_timer(struct timer_list *timer, unsigned int flags,
  * init_timer_key() must be done to a timer prior calling *any* of the
  * other timer functions.
  */
-void init_timer_key(struct timer_list *timer, unsigned int flags,
+void init_timer_key(struct timer_list *timer,
+		    void (*func)(struct timer_list *), unsigned int flags,
 		    const char *name, struct lock_class_key *key)
 {
 	debug_init(timer);
-	do_init_timer(timer, flags, name, key);
+	do_init_timer(timer, func, flags, name, key);
 }
 EXPORT_SYMBOL(init_timer_key);
 
-- 
cgit v1.2.3