From bd624d75db21ea5402f9ecf4450b311794d80352 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 13 Feb 2015 08:54:56 +0800
Subject: clockevents: Introduce mode specific callbacks

It is not possible for the clockevents core to know which modes (other than
those with a corresponding feature flag) are supported by a particular
implementation. And drivers are expected to handle transition to all modes
elegantly, as ->set_mode() would be issued for them unconditionally.

Now, adding support for a new mode complicates things a bit if we want to use
the legacy ->set_mode() callback. We need to closely review all clockevents
drivers to see if they would break on addition of a new mode. And after such
reviews, it is found that we have to do non-trivial changes to most of the
drivers [1].

Introduce mode-specific set_mode_*() callbacks, some of which the drivers may or
may not implement. A missing callback would clearly convey the message that the
corresponding mode isn't supported.

A driver may still choose to keep supporting the legacy ->set_mode() callback,
but ->set_mode() wouldn't be supporting any new modes beyond RESUME. If a driver
wants to benefit from using a new mode, it would be required to migrate to
the mode specific callbacks.

The legacy ->set_mode() callback and the newly introduced mode-specific
callbacks are mutually exclusive. Only one of them should be supported by the
driver.

Sanity check is done at the time of registration to distinguish between optional
and required callbacks and to make error recovery and handling simpler. If the
legacy ->set_mode() callback is provided, all mode specific ones would be
ignored by the core but a warning is thrown if they are present.

Call sites calling ->set_mode() directly are also updated to use
__clockevents_set_mode() instead, as ->set_mode() may not be available anymore
for few drivers.

 [1] https://lkml.org/lkml/2014/12/9/605
 [2] https://lkml.org/lkml/2015/1/23/255

Suggested-by: Thomas Gleixner <tglx@linutronix.de> [2]
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Kevin Hilman <khilman@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: linaro-kernel@lists.linaro.org
Cc: linaro-networking@linaro.org
Link: http://lkml.kernel.org/r/792d59a40423f0acffc9bb0bec9de1341a06fa02.1423788565.git.viresh.kumar@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/clockevents.c | 88 +++++++++++++++++++++++++++++++++++++++++++++--
 kernel/time/timer_list.c  | 32 +++++++++++++++--
 2 files changed, 115 insertions(+), 5 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 55449909f114..489642b08d64 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -94,6 +94,57 @@ u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
 }
 EXPORT_SYMBOL_GPL(clockevent_delta2ns);
 
+static int __clockevents_set_mode(struct clock_event_device *dev,
+				  enum clock_event_mode mode)
+{
+	/* Transition with legacy set_mode() callback */
+	if (dev->set_mode) {
+		/* Legacy callback doesn't support new modes */
+		if (mode > CLOCK_EVT_MODE_RESUME)
+			return -ENOSYS;
+		dev->set_mode(mode, dev);
+		return 0;
+	}
+
+	if (dev->features & CLOCK_EVT_FEAT_DUMMY)
+		return 0;
+
+	/* Transition with new mode-specific callbacks */
+	switch (mode) {
+	case CLOCK_EVT_MODE_UNUSED:
+		/*
+		 * This is an internal state, which is guaranteed to go from
+		 * SHUTDOWN to UNUSED. No driver interaction required.
+		 */
+		return 0;
+
+	case CLOCK_EVT_MODE_SHUTDOWN:
+		return dev->set_mode_shutdown(dev);
+
+	case CLOCK_EVT_MODE_PERIODIC:
+		/* Core internal bug */
+		if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC))
+			return -ENOSYS;
+		return dev->set_mode_periodic(dev);
+
+	case CLOCK_EVT_MODE_ONESHOT:
+		/* Core internal bug */
+		if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
+			return -ENOSYS;
+		return dev->set_mode_oneshot(dev);
+
+	case CLOCK_EVT_MODE_RESUME:
+		/* Optional callback */
+		if (dev->set_mode_resume)
+			return dev->set_mode_resume(dev);
+		else
+			return 0;
+
+	default:
+		return -ENOSYS;
+	}
+}
+
 /**
  * clockevents_set_mode - set the operating mode of a clock event device
  * @dev:	device to modify
@@ -105,7 +156,9 @@ void clockevents_set_mode(struct clock_event_device *dev,
 				 enum clock_event_mode mode)
 {
 	if (dev->mode != mode) {
-		dev->set_mode(mode, dev);
+		if (__clockevents_set_mode(dev, mode))
+			return;
+
 		dev->mode = mode;
 
 		/*
@@ -373,6 +426,35 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu)
 }
 EXPORT_SYMBOL_GPL(clockevents_unbind);
 
+/* Sanity check of mode transition callbacks */
+static int clockevents_sanity_check(struct clock_event_device *dev)
+{
+	/* Legacy set_mode() callback */
+	if (dev->set_mode) {
+		/* We shouldn't be supporting new modes now */
+		WARN_ON(dev->set_mode_periodic || dev->set_mode_oneshot ||
+			dev->set_mode_shutdown || dev->set_mode_resume);
+		return 0;
+	}
+
+	if (dev->features & CLOCK_EVT_FEAT_DUMMY)
+		return 0;
+
+	/* New mode-specific callbacks */
+	if (!dev->set_mode_shutdown)
+		return -EINVAL;
+
+	if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
+	    !dev->set_mode_periodic)
+		return -EINVAL;
+
+	if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) &&
+	    !dev->set_mode_oneshot)
+		return -EINVAL;
+
+	return 0;
+}
+
 /**
  * clockevents_register_device - register a clock event device
  * @dev:	device to register
@@ -382,6 +464,8 @@ void clockevents_register_device(struct clock_event_device *dev)
 	unsigned long flags;
 
 	BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+	BUG_ON(clockevents_sanity_check(dev));
+
 	if (!dev->cpumask) {
 		WARN_ON(num_possible_cpus() > 1);
 		dev->cpumask = cpumask_of(smp_processor_id());
@@ -449,7 +533,7 @@ int __clockevents_update_freq(struct clock_event_device *dev, u32 freq)
 		return clockevents_program_event(dev, dev->next_event, false);
 
 	if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
-		dev->set_mode(CLOCK_EVT_MODE_PERIODIC, dev);
+		return __clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
 
 	return 0;
 }
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 61ed862cdd37..2cfd19485824 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -228,9 +228,35 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
 	print_name_offset(m, dev->set_next_event);
 	SEQ_printf(m, "\n");
 
-	SEQ_printf(m, " set_mode:       ");
-	print_name_offset(m, dev->set_mode);
-	SEQ_printf(m, "\n");
+	if (dev->set_mode) {
+		SEQ_printf(m, " set_mode:       ");
+		print_name_offset(m, dev->set_mode);
+		SEQ_printf(m, "\n");
+	} else {
+		if (dev->set_mode_shutdown) {
+			SEQ_printf(m, " shutdown: ");
+			print_name_offset(m, dev->set_mode_shutdown);
+			SEQ_printf(m, "\n");
+		}
+
+		if (dev->set_mode_periodic) {
+			SEQ_printf(m, " periodic: ");
+			print_name_offset(m, dev->set_mode_periodic);
+			SEQ_printf(m, "\n");
+		}
+
+		if (dev->set_mode_oneshot) {
+			SEQ_printf(m, " oneshot:  ");
+			print_name_offset(m, dev->set_mode_oneshot);
+			SEQ_printf(m, "\n");
+		}
+
+		if (dev->set_mode_resume) {
+			SEQ_printf(m, " resume:   ");
+			print_name_offset(m, dev->set_mode_resume);
+			SEQ_printf(m, "\n");
+		}
+	}
 
 	SEQ_printf(m, " event_handler:  ");
 	print_name_offset(m, dev->event_handler);
-- 
cgit v1.2.3


From 6086e346fdea1ae64d974c94c1acacc2605567ae Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 11 Mar 2015 21:16:29 -0700
Subject: clocksource: Simplify the clocks_calc_max_nsecs() logic

The previous clocks_calc_max_nsecs() code had some unecessarily
complex bit logic to find the max interval that could cause
multiplication overflows. Since this is not in the hot
path, just do the divide to make it easier to read.

The previous implementation also had a subtle issue
that it avoided overflows with signed 64-bit values, where
as the intervals are always unsigned. This resulted in
overly conservative intervals, which other safety margins
were then added to, reducing the intended interval length.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1426133800-29329-2-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/clocksource.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 4892352f0e49..2148f413256c 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -476,19 +476,10 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
 
 	/*
 	 * Calculate the maximum number of cycles that we can pass to the
-	 * cyc2ns function without overflowing a 64-bit signed result. The
-	 * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj)
-	 * which is equivalent to the below.
-	 * max_cycles < (2^63)/(mult + maxadj)
-	 * max_cycles < 2^(log2((2^63)/(mult + maxadj)))
-	 * max_cycles < 2^(log2(2^63) - log2(mult + maxadj))
-	 * max_cycles < 2^(63 - log2(mult + maxadj))
-	 * max_cycles < 1 << (63 - log2(mult + maxadj))
-	 * Please note that we add 1 to the result of the log2 to account for
-	 * any rounding errors, ensure the above inequality is satisfied and
-	 * no overflow will occur.
+	 * cyc2ns() function without overflowing a 64-bit result.
 	 */
-	max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1));
+	max_cycles = ULLONG_MAX;
+	do_div(max_cycles, mult+maxadj);
 
 	/*
 	 * The actual maximum number of cycles we can defer the clocksource is
-- 
cgit v1.2.3


From 362fde0410377e468ca00ad363fdf3e3ec42eb6a Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 11 Mar 2015 21:16:30 -0700
Subject: clocksource: Simplify the logic around clocksource wrapping safety
 margins

The clocksource logic has a number of places where we try to
include a safety margin. Most of these are 12% safety margins,
but they are inconsistently applied and sometimes are applied
on top of each other.

Additionally, in the previous patch, we corrected an issue
where we unintentionally in effect created a 50% safety margin,
which these 12.5% margins where then added to.

So to simplify the logic here, this patch removes the various
12.5% margins, and consolidates adding the margin in one place:
clocks_calc_max_nsecs().

Additionally, Linus prefers a 50% safety margin, as it allows
bad clock values to be more easily caught. This should really
have no net effect, due to the corrected issue earlier which
caused greater then 50% margins to be used w/o issue.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Acked-by: Stephen Boyd <sboyd@codeaurora.org> (for the sched_clock.c bit)
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1426133800-29329-3-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/clocksource.c | 26 ++++++++++++--------------
 kernel/time/sched_clock.c |  4 ++--
 2 files changed, 14 insertions(+), 16 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 2148f413256c..ace95763b3a6 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -469,6 +469,9 @@ static u32 clocksource_max_adjustment(struct clocksource *cs)
  * @shift:	cycle to nanosecond divisor (power of two)
  * @maxadj:	maximum adjustment value to mult (~11%)
  * @mask:	bitmask for two's complement subtraction of non 64 bit counters
+ *
+ * NOTE: This function includes a safety margin of 50%, so that bad clock values
+ * can be detected.
  */
 u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
 {
@@ -490,11 +493,14 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
 	max_cycles = min(max_cycles, mask);
 	max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
 
+	/* Return 50% of the actual maximum, so we can detect bad values */
+	max_nsecs >>= 1;
+
 	return max_nsecs;
 }
 
 /**
- * clocksource_max_deferment - Returns max time the clocksource can be deferred
+ * clocksource_max_deferment - Returns max time the clocksource should be deferred
  * @cs:         Pointer to clocksource
  *
  */
@@ -504,13 +510,7 @@ static u64 clocksource_max_deferment(struct clocksource *cs)
 
 	max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj,
 					  cs->mask);
-	/*
-	 * To ensure that the clocksource does not wrap whilst we are idle,
-	 * limit the time the clocksource can be deferred by 12.5%. Please
-	 * note a margin of 12.5% is used because this can be computed with
-	 * a shift, versus say 10% which would require division.
-	 */
-	return max_nsecs - (max_nsecs >> 3);
+	return max_nsecs;
 }
 
 #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
@@ -659,10 +659,9 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
 	 * conversion precision. 10 minutes is still a reasonable
 	 * amount. That results in a shift value of 24 for a
 	 * clocksource with mask >= 40bit and f >= 4GHz. That maps to
-	 * ~ 0.06ppm granularity for NTP. We apply the same 12.5%
-	 * margin as we do in clocksource_max_deferment()
+	 * ~ 0.06ppm granularity for NTP.
 	 */
-	sec = (cs->mask - (cs->mask >> 3));
+	sec = cs->mask;
 	do_div(sec, freq);
 	do_div(sec, scale);
 	if (!sec)
@@ -674,9 +673,8 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
 			       NSEC_PER_SEC / scale, sec * scale);
 
 	/*
-	 * for clocksources that have large mults, to avoid overflow.
-	 * Since mult may be adjusted by ntp, add an safety extra margin
-	 *
+	 * Ensure clocksources that have large 'mult' values don't overflow
+	 * when adjusted.
 	 */
 	cs->maxadj = clocksource_max_adjustment(cs);
 	while ((cs->mult + cs->maxadj < cs->mult)
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 01d2d15aa662..3b8ae45020c1 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -125,9 +125,9 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
 
 	new_mask = CLOCKSOURCE_MASK(bits);
 
-	/* calculate how many ns until we wrap */
+	/* calculate how many nanosecs until we risk wrapping */
 	wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask);
-	new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3));
+	new_wrap_kt = ns_to_ktime(wrap);
 
 	/* update epoch for new counter and update epoch_ns from old counter*/
 	new_epoch = read();
-- 
cgit v1.2.3


From fb82fe2fe8588745edd73aa3a6229facac5c1e15 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 11 Mar 2015 21:16:31 -0700
Subject: clocksource: Add 'max_cycles' to 'struct clocksource'

In order to facilitate clocksource validation, add a
'max_cycles' field to the clocksource structure which
will hold the maximum cycle value that can safely be
multiplied without potentially causing an overflow.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1426133800-29329-4-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/clocksource.c | 28 ++++++++++++++++------------
 kernel/time/sched_clock.c |  2 +-
 2 files changed, 17 insertions(+), 13 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index ace95763b3a6..fc2a9de43ca1 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -469,11 +469,13 @@ static u32 clocksource_max_adjustment(struct clocksource *cs)
  * @shift:	cycle to nanosecond divisor (power of two)
  * @maxadj:	maximum adjustment value to mult (~11%)
  * @mask:	bitmask for two's complement subtraction of non 64 bit counters
+ * @max_cyc:	maximum cycle value before potential overflow (does not include
+ *		any safety margin)
  *
  * NOTE: This function includes a safety margin of 50%, so that bad clock values
  * can be detected.
  */
-u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
+u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)
 {
 	u64 max_nsecs, max_cycles;
 
@@ -493,6 +495,10 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
 	max_cycles = min(max_cycles, mask);
 	max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
 
+	/* return the max_cycles value as well if requested */
+	if (max_cyc)
+		*max_cyc = max_cycles;
+
 	/* Return 50% of the actual maximum, so we can detect bad values */
 	max_nsecs >>= 1;
 
@@ -500,17 +506,15 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
 }
 
 /**
- * clocksource_max_deferment - Returns max time the clocksource should be deferred
- * @cs:         Pointer to clocksource
+ * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles
+ * @cs:         Pointer to clocksource to be updated
  *
  */
-static u64 clocksource_max_deferment(struct clocksource *cs)
+static inline void clocksource_update_max_deferment(struct clocksource *cs)
 {
-	u64 max_nsecs;
-
-	max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj,
-					  cs->mask);
-	return max_nsecs;
+	cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,
+						cs->maxadj, cs->mask,
+						&cs->max_cycles);
 }
 
 #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
@@ -684,7 +688,7 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
 		cs->maxadj = clocksource_max_adjustment(cs);
 	}
 
-	cs->max_idle_ns = clocksource_max_deferment(cs);
+	clocksource_update_max_deferment(cs);
 }
 EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
 
@@ -730,8 +734,8 @@ int clocksource_register(struct clocksource *cs)
 		"Clocksource %s might overflow on 11%% adjustment\n",
 		cs->name);
 
-	/* calculate max idle time permitted for this clocksource */
-	cs->max_idle_ns = clocksource_max_deferment(cs);
+	/* Update max idle time permitted for this clocksource */
+	clocksource_update_max_deferment(cs);
 
 	mutex_lock(&clocksource_mutex);
 	clocksource_enqueue(cs);
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 3b8ae45020c1..ca3bc5c7027c 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -126,7 +126,7 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
 	new_mask = CLOCKSOURCE_MASK(bits);
 
 	/* calculate how many nanosecs until we risk wrapping */
-	wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask);
+	wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL);
 	new_wrap_kt = ns_to_ktime(wrap);
 
 	/* update epoch for new counter and update epoch_ns from old counter*/
-- 
cgit v1.2.3


From 3c17ad19f0697ffe5ef7438cdafc2d2b7757d8a5 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 11 Mar 2015 21:16:32 -0700
Subject: timekeeping: Add debugging checks to warn if we see delays

Recently there's been requests for better sanity
checking in the time code, so that it's more clear
when something is going wrong, since timekeeping issues
could manifest in a large number of strange ways in
various subsystems.

Thus, this patch adds some extra infrastructure to
add a check to update_wall_time() to print two new
warnings:

 1) if we see the call delayed beyond the 'max_cycles'
    overflow point,

 2) or if we see the call delayed beyond the clocksource's
    'max_idle_ns' value, which is currently 50% of the
    overflow point.

This extra infrastructure is conditional on
a new CONFIG_DEBUG_TIMEKEEPING option, also
added in this patch - default off.

Tested this a bit by halting qemu for specified
lengths of time to trigger the warnings.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1426133800-29329-5-git-send-email-john.stultz@linaro.org
[ Improved the changelog and the messages a bit. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/jiffies.c     |  1 +
 kernel/time/timekeeping.c | 28 ++++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'kernel/time')

diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index a6a5bf53e86d..7e413902aa6a 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -71,6 +71,7 @@ static struct clocksource clocksource_jiffies = {
 	.mask		= 0xffffffff, /*32bits*/
 	.mult		= NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
 	.shift		= JIFFIES_SHIFT,
+	.max_cycles	= 10,
 };
 
 __cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 91db94136c10..acf049144cf6 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -118,6 +118,31 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
 	tk->offs_boot = ktime_add(tk->offs_boot, delta);
 }
 
+#ifdef CONFIG_DEBUG_TIMEKEEPING
+static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
+{
+
+	cycle_t max_cycles = tk->tkr.clock->max_cycles;
+	const char *name = tk->tkr.clock->name;
+
+	if (offset > max_cycles) {
+		printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow\n",
+				offset, name, max_cycles);
+		printk_deferred("         timekeeping: Your kernel is sick, but tries to cope\n");
+	} else {
+		if (offset > (max_cycles >> 1)) {
+			printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the the '%s' clock's 50%% safety margin (%lld)\n",
+					offset, name, max_cycles >> 1);
+			printk_deferred("      timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
+		}
+	}
+}
+#else
+static inline void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
+{
+}
+#endif
+
 /**
  * tk_setup_internals - Set up internals to use clocksource clock.
  *
@@ -1630,6 +1655,9 @@ void update_wall_time(void)
 	if (offset < real_tk->cycle_interval)
 		goto out;
 
+	/* Do some additional sanity checking */
+	timekeeping_check_update(real_tk, offset);
+
 	/*
 	 * With NO_HZ we may have to accumulate many cycle_intervals
 	 * (think "ticks") worth of time at once. To do this efficiently,
-- 
cgit v1.2.3


From a558cd021d83b65c47ee5b9bec1fcfe5298a769f Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 11 Mar 2015 21:16:33 -0700
Subject: timekeeping: Add checks to cap clocksource reads to the 'max_cycles'
 value

When calculating the current delta since the last tick, we
currently have no hard protections to prevent a multiplication
overflow from occuring.

This patch introduces infrastructure to allow a cap that
limits the clocksource read delta value to the 'max_cycles' value,
which is where an overflow would occur.

Since this is in the hotpath, it adds the extra checking under
CONFIG_DEBUG_TIMEKEEPING=y.

There was some concern that capping time like this could cause
problems as we may stop expiring timers, which could go circular
if the timer that triggers time accumulation were mis-scheduled
too far in the future, which would cause time to stop.

However, since the mult overflow would result in a smaller time
value, we would effectively have the same problem there.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1426133800-29329-6-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/timekeeping.c | 49 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 35 insertions(+), 14 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index acf049144cf6..657414cf2e46 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -126,9 +126,9 @@ static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
 	const char *name = tk->tkr.clock->name;
 
 	if (offset > max_cycles) {
-		printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow\n",
+		printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
 				offset, name, max_cycles);
-		printk_deferred("         timekeeping: Your kernel is sick, but tries to cope\n");
+		printk_deferred("         timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
 	} else {
 		if (offset > (max_cycles >> 1)) {
 			printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the the '%s' clock's 50%% safety margin (%lld)\n",
@@ -137,10 +137,39 @@ static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
 		}
 	}
 }
+
+static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
+{
+	cycle_t cycle_now, delta;
+
+	/* read clocksource */
+	cycle_now = tkr->read(tkr->clock);
+
+	/* calculate the delta since the last update_wall_time */
+	delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
+
+	/* Cap delta value to the max_cycles values to avoid mult overflows */
+	if (unlikely(delta > tkr->clock->max_cycles))
+		delta = tkr->clock->max_cycles;
+
+	return delta;
+}
 #else
 static inline void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
 {
 }
+static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
+{
+	cycle_t cycle_now, delta;
+
+	/* read clocksource */
+	cycle_now = tkr->read(tkr->clock);
+
+	/* calculate the delta since the last update_wall_time */
+	delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
+
+	return delta;
+}
 #endif
 
 /**
@@ -218,14 +247,10 @@ static inline u32 arch_gettimeoffset(void) { return 0; }
 
 static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
 {
-	cycle_t cycle_now, delta;
+	cycle_t delta;
 	s64 nsec;
 
-	/* read clocksource: */
-	cycle_now = tkr->read(tkr->clock);
-
-	/* calculate the delta since the last update_wall_time: */
-	delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
+	delta = timekeeping_get_delta(tkr);
 
 	nsec = delta * tkr->mult + tkr->xtime_nsec;
 	nsec >>= tkr->shift;
@@ -237,14 +262,10 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
 static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
 {
 	struct clocksource *clock = tk->tkr.clock;
-	cycle_t cycle_now, delta;
+	cycle_t delta;
 	s64 nsec;
 
-	/* read clocksource: */
-	cycle_now = tk->tkr.read(clock);
-
-	/* calculate the delta since the last update_wall_time: */
-	delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
+	delta = timekeeping_get_delta(&tk->tkr);
 
 	/* convert delta to nanoseconds. */
 	nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
-- 
cgit v1.2.3


From 057b87e3161d1194a095718f9918c01b2c389e74 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 11 Mar 2015 21:16:34 -0700
Subject: timekeeping: Try to catch clocksource delta underflows

In the case where there is a broken clocksource
where there are multiple actual clocks that
aren't perfectly aligned, we may see small "negative"
deltas when we subtract 'now' from 'cycle_last'.

The values are actually negative with respect to the
clocksource mask value, not necessarily negative
if cast to a s64, but we can check by checking the
delta to see if it is a small (relative to the mask)
negative value (again negative relative to the mask).

If so, we assume we jumped backwards somehow and
instead use zero for our delta.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1426133800-29329-7-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/timekeeping.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 657414cf2e46..187149be83ea 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -148,6 +148,13 @@ static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
 	/* calculate the delta since the last update_wall_time */
 	delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
 
+	/*
+	 * Try to catch underflows by checking if we are seeing small
+	 * mask-relative negative values.
+	 */
+	if (unlikely((~delta & tkr->mask) < (tkr->mask >> 3)))
+		delta = 0;
+
 	/* Cap delta value to the max_cycles values to avoid mult overflows */
 	if (unlikely(delta > tkr->clock->max_cycles))
 		delta = tkr->clock->max_cycles;
-- 
cgit v1.2.3


From 4ca22c2648f9c1cec0b242f58d7302136f5a4cbb Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 11 Mar 2015 21:16:35 -0700
Subject: timekeeping: Add warnings when overflows or underflows are observed

It was suggested that the underflow/overflow protection
should probably throw some sort of warning out, rather
than just silently fixing the issue.

So this patch adds some warnings here. The flag variables
used are not protected by locks, but since we can't print
from the reading functions, just being able to say we
saw an issue in the update interval is useful enough,
and can be slightly racy without real consequence.

The big complication is that we're only under a read
seqlock, so the data could shift under us during
our calculation to see if there was a problem. This
patch avoids this issue by nesting another seqlock
which allows us to snapshot the just required values
atomically. So we shouldn't see false positives.

I also added some basic rate-limiting here, since
on one build machine w/ skewed TSCs it was fairly
noisy at bootup.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1426133800-29329-8-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/timekeeping.c | 64 +++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 57 insertions(+), 7 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 187149be83ea..892f6cbf1e67 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -119,6 +119,20 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
 }
 
 #ifdef CONFIG_DEBUG_TIMEKEEPING
+#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
+/*
+ * These simple flag variables are managed
+ * without locks, which is racy, but ok since
+ * we don't really care about being super
+ * precise about how many events were seen,
+ * just that a problem was observed.
+ */
+static int timekeeping_underflow_seen;
+static int timekeeping_overflow_seen;
+
+/* last_warning is only modified under the timekeeping lock */
+static long timekeeping_last_warning;
+
 static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
 {
 
@@ -136,28 +150,64 @@ static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
 			printk_deferred("      timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
 		}
 	}
+
+	if (timekeeping_underflow_seen) {
+		if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
+			printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
+			printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
+			printk_deferred("         Your kernel is probably still fine.\n");
+			timekeeping_last_warning = jiffies;
+		}
+		timekeeping_underflow_seen = 0;
+	}
+
+	if (timekeeping_overflow_seen) {
+		if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
+			printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
+			printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
+			printk_deferred("         Your kernel is probably still fine.\n");
+			timekeeping_last_warning = jiffies;
+		}
+		timekeeping_overflow_seen = 0;
+	}
 }
 
 static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
 {
-	cycle_t cycle_now, delta;
+	cycle_t now, last, mask, max, delta;
+	unsigned int seq;
 
-	/* read clocksource */
-	cycle_now = tkr->read(tkr->clock);
+	/*
+	 * Since we're called holding a seqlock, the data may shift
+	 * under us while we're doing the calculation. This can cause
+	 * false positives, since we'd note a problem but throw the
+	 * results away. So nest another seqlock here to atomically
+	 * grab the points we are checking with.
+	 */
+	do {
+		seq = read_seqcount_begin(&tk_core.seq);
+		now = tkr->read(tkr->clock);
+		last = tkr->cycle_last;
+		mask = tkr->mask;
+		max = tkr->clock->max_cycles;
+	} while (read_seqcount_retry(&tk_core.seq, seq));
 
-	/* calculate the delta since the last update_wall_time */
-	delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
+	delta = clocksource_delta(now, last, mask);
 
 	/*
 	 * Try to catch underflows by checking if we are seeing small
 	 * mask-relative negative values.
 	 */
-	if (unlikely((~delta & tkr->mask) < (tkr->mask >> 3)))
+	if (unlikely((~delta & mask) < (mask >> 3))) {
+		timekeeping_underflow_seen = 1;
 		delta = 0;
+	}
 
 	/* Cap delta value to the max_cycles values to avoid mult overflows */
-	if (unlikely(delta > tkr->clock->max_cycles))
+	if (unlikely(delta > max)) {
+		timekeeping_overflow_seen = 1;
 		delta = tkr->clock->max_cycles;
+	}
 
 	return delta;
 }
-- 
cgit v1.2.3


From 0b046b217ad4c64fbbeaaac24d0648cb1fa49ad8 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 11 Mar 2015 21:16:36 -0700
Subject: clocksource: Improve clocksource watchdog reporting

The clocksource watchdog reporting has been less helpful
then desired, as it just printed the delta between
the two clocksources. This prevents any useful analysis
of why the skew occurred.

Thus this patch tries to improve the output when we
mark a clocksource as unstable, printing out the cycle
last and now values for both the current clocksource
and the watchdog clocksource. This will allow us to see
if the result was due to a false positive caused by
a problematic watchdog.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1426133800-29329-9-git-send-email-john.stultz@linaro.org
[ Minor cleanups of kernel messages. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/clocksource.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index fc2a9de43ca1..c4cc04bec698 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -142,13 +142,6 @@ static void __clocksource_unstable(struct clocksource *cs)
 		schedule_work(&watchdog_work);
 }
 
-static void clocksource_unstable(struct clocksource *cs, int64_t delta)
-{
-	printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
-	       cs->name, delta);
-	__clocksource_unstable(cs);
-}
-
 /**
  * clocksource_mark_unstable - mark clocksource unstable via watchdog
  * @cs:		clocksource to be marked unstable
@@ -174,7 +167,7 @@ void clocksource_mark_unstable(struct clocksource *cs)
 static void clocksource_watchdog(unsigned long data)
 {
 	struct clocksource *cs;
-	cycle_t csnow, wdnow, delta;
+	cycle_t csnow, wdnow, cslast, wdlast, delta;
 	int64_t wd_nsec, cs_nsec;
 	int next_cpu, reset_pending;
 
@@ -213,6 +206,8 @@ static void clocksource_watchdog(unsigned long data)
 
 		delta = clocksource_delta(csnow, cs->cs_last, cs->mask);
 		cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
+		wdlast = cs->wd_last; /* save these in case we print them */
+		cslast = cs->cs_last;
 		cs->cs_last = csnow;
 		cs->wd_last = wdnow;
 
@@ -221,7 +216,12 @@ static void clocksource_watchdog(unsigned long data)
 
 		/* Check the deviation from the watchdog clocksource. */
 		if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
-			clocksource_unstable(cs, cs_nsec - wd_nsec);
+			pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable, because the skew is too large:\n", cs->name);
+			pr_warn("	'%s' wd_now: %llx wd_last: %llx mask: %llx\n",
+				watchdog->name, wdnow, wdlast, watchdog->mask);
+			pr_warn("	'%s' cs_now: %llx cs_last: %llx mask: %llx\n",
+				cs->name, csnow, cslast, cs->mask);
+			__clocksource_unstable(cs);
 			continue;
 		}
 
-- 
cgit v1.2.3


From f8935983f110505daa38e8d36ee406807f83a069 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 11 Mar 2015 21:16:37 -0700
Subject: clocksource: Mostly kill clocksource_register()

A long running project has been to clean up remaining uses
of clocksource_register(), replacing it with the simpler
clocksource_register_khz/hz() functions.

However, there are a few cases where we need to self-define
our mult/shift values, so switch the function to a more
obviously internal __clocksource_register() name, and
consolidate much of the internal logic so we don't have
duplication.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: David S. Miller <davem@davemloft.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1426133800-29329-10-git-send-email-john.stultz@linaro.org
[ Minor cleanups. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/clocksource.c | 81 ++++++++++++++++++++---------------------------
 kernel/time/jiffies.c     |  4 +--
 2 files changed, 36 insertions(+), 49 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index c4cc04bec698..5cdf17eb4fa6 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -656,38 +656,52 @@ static void clocksource_enqueue(struct clocksource *cs)
 void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
 {
 	u64 sec;
+
 	/*
-	 * Calc the maximum number of seconds which we can run before
-	 * wrapping around. For clocksources which have a mask > 32bit
-	 * we need to limit the max sleep time to have a good
-	 * conversion precision. 10 minutes is still a reasonable
-	 * amount. That results in a shift value of 24 for a
-	 * clocksource with mask >= 40bit and f >= 4GHz. That maps to
-	 * ~ 0.06ppm granularity for NTP.
+	 * Default clocksources are *special* and self-define their mult/shift.
+	 * But, you're not special, so you should specify a freq value.
 	 */
-	sec = cs->mask;
-	do_div(sec, freq);
-	do_div(sec, scale);
-	if (!sec)
-		sec = 1;
-	else if (sec > 600 && cs->mask > UINT_MAX)
-		sec = 600;
-
-	clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
-			       NSEC_PER_SEC / scale, sec * scale);
-
+	if (freq) {
+		/*
+		 * Calc the maximum number of seconds which we can run before
+		 * wrapping around. For clocksources which have a mask > 32-bit
+		 * we need to limit the max sleep time to have a good
+		 * conversion precision. 10 minutes is still a reasonable
+		 * amount. That results in a shift value of 24 for a
+		 * clocksource with mask >= 40-bit and f >= 4GHz. That maps to
+		 * ~ 0.06ppm granularity for NTP.
+		 */
+		sec = cs->mask;
+		do_div(sec, freq);
+		do_div(sec, scale);
+		if (!sec)
+			sec = 1;
+		else if (sec > 600 && cs->mask > UINT_MAX)
+			sec = 600;
+
+		clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
+				       NSEC_PER_SEC / scale, sec * scale);
+	}
 	/*
 	 * Ensure clocksources that have large 'mult' values don't overflow
 	 * when adjusted.
 	 */
 	cs->maxadj = clocksource_max_adjustment(cs);
-	while ((cs->mult + cs->maxadj < cs->mult)
-		|| (cs->mult - cs->maxadj > cs->mult)) {
+	while (freq && ((cs->mult + cs->maxadj < cs->mult)
+		|| (cs->mult - cs->maxadj > cs->mult))) {
 		cs->mult >>= 1;
 		cs->shift--;
 		cs->maxadj = clocksource_max_adjustment(cs);
 	}
 
+	/*
+	 * Only warn for *special* clocksources that self-define
+	 * their mult/shift values and don't specify a freq.
+	 */
+	WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
+		"timekeeping: Clocksource %s might overflow on 11%% adjustment\n",
+		cs->name);
+
 	clocksource_update_max_deferment(cs);
 }
 EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
@@ -719,33 +733,6 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
 }
 EXPORT_SYMBOL_GPL(__clocksource_register_scale);
 
-
-/**
- * clocksource_register - Used to install new clocksources
- * @cs:		clocksource to be registered
- *
- * Returns -EBUSY if registration fails, zero otherwise.
- */
-int clocksource_register(struct clocksource *cs)
-{
-	/* calculate max adjustment for given mult/shift */
-	cs->maxadj = clocksource_max_adjustment(cs);
-	WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
-		"Clocksource %s might overflow on 11%% adjustment\n",
-		cs->name);
-
-	/* Update max idle time permitted for this clocksource */
-	clocksource_update_max_deferment(cs);
-
-	mutex_lock(&clocksource_mutex);
-	clocksource_enqueue(cs);
-	clocksource_enqueue_watchdog(cs);
-	clocksource_select();
-	mutex_unlock(&clocksource_mutex);
-	return 0;
-}
-EXPORT_SYMBOL(clocksource_register);
-
 static void __clocksource_change_rating(struct clocksource *cs, int rating)
 {
 	list_del(&cs->list);
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 7e413902aa6a..c4bb518725b5 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -95,7 +95,7 @@ EXPORT_SYMBOL(jiffies);
 
 static int __init init_jiffies_clocksource(void)
 {
-	return clocksource_register(&clocksource_jiffies);
+	return __clocksource_register(&clocksource_jiffies);
 }
 
 core_initcall(init_jiffies_clocksource);
@@ -131,6 +131,6 @@ int register_refined_jiffies(long cycles_per_second)
 
 	refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT;
 
-	clocksource_register(&refined_jiffies);
+	__clocksource_register(&refined_jiffies);
 	return 0;
 }
-- 
cgit v1.2.3


From 8cc8c525ad4e7b581cacf84119e1a28dcb4044db Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 11 Mar 2015 21:16:39 -0700
Subject: clocksource: Add some debug info about clocksources being registered

Print the mask, max_cycles, and max_idle_ns values for
clocksources being registered.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1426133800-29329-12-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/clocksource.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'kernel/time')

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 5cdf17eb4fa6..1977ebabd922 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -703,6 +703,9 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
 		cs->name);
 
 	clocksource_update_max_deferment(cs);
+
+	pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
+			cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
 }
 EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
 
-- 
cgit v1.2.3


From fba9e07208c0f9d92d9f73761c99c8612039da44 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 11 Mar 2015 21:16:40 -0700
Subject: clocksource: Rename __clocksource_updatefreq_*() to
 __clocksource_update_freq_*()

Ingo requested this function be renamed to improve readability,
so I've renamed __clocksource_updatefreq_scale() as well as the
__clocksource_updatefreq_hz/khz() functions to avoid
squishedtogethernames.

This touches some of the sh clocksources, which I've not tested.

The arch/arm/plat-omap change is just a comment change for
consistency.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1426133800-29329-13-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/clocksource.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 1977ebabd922..c3be3c71bbad 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -643,7 +643,7 @@ static void clocksource_enqueue(struct clocksource *cs)
 }
 
 /**
- * __clocksource_updatefreq_scale - Used update clocksource with new freq
+ * __clocksource_update_freq_scale - Used update clocksource with new freq
  * @cs:		clocksource to be registered
  * @scale:	Scale factor multiplied against freq to get clocksource hz
  * @freq:	clocksource frequency (cycles per second) divided by scale
@@ -651,9 +651,10 @@ static void clocksource_enqueue(struct clocksource *cs)
  * This should only be called from the clocksource->enable() method.
  *
  * This *SHOULD NOT* be called directly! Please use the
- * clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions.
+ * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper
+ * functions.
  */
-void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
+void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq)
 {
 	u64 sec;
 
@@ -707,7 +708,7 @@ void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
 	pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
 			cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
 }
-EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
+EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
 
 /**
  * __clocksource_register_scale - Used to install new clocksources
@@ -724,7 +725,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
 {
 
 	/* Initialize mult/shift and max_idle_ns */
-	__clocksource_updatefreq_scale(cs, scale, freq);
+	__clocksource_update_freq_scale(cs, scale, freq);
 
 	/* Add clocksource to the clocksource list */
 	mutex_lock(&clocksource_mutex);
-- 
cgit v1.2.3


From 8710e914027e4f64058ebbf0501cc6db3cc8454f Mon Sep 17 00:00:00 2001
From: Daniel Thompson <daniel.thompson@linaro.org>
Date: Thu, 26 Mar 2015 12:23:22 -0700
Subject: timers, sched/clock: Match scope of read and write seqcounts

Currently the scope of the raw_write_seqcount_begin/end() in
sched_clock_register() far exceeds the scope of the read section
in sched_clock(). This gives the impression of safety during
cursory review but achieves little.

Note that this is likely to be a latent issue at present because
sched_clock_register() is typically called before we enable
interrupts, however the issue does risk bugs being needlessly
introduced as the code evolves.

This patch fixes the problem by increasing the scope of the read
locking performed by sched_clock() to cover all data modified by
sched_clock_register.

We also improve clarity by moving writes to struct clock_data
that do not impact sched_clock() outside of the critical
section.

Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
[ Reworked it slightly to apply to tip/timers/core]
Signed-off-by: John Stultz <john.stultz@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/1427397806-20889-2-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/sched_clock.c | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index ca3bc5c7027c..1751e956add9 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -58,23 +58,21 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
 
 unsigned long long notrace sched_clock(void)
 {
-	u64 epoch_ns;
-	u64 epoch_cyc;
-	u64 cyc;
+	u64 cyc, res;
 	unsigned long seq;
 
-	if (cd.suspended)
-		return cd.epoch_ns;
-
 	do {
 		seq = raw_read_seqcount_begin(&cd.seq);
-		epoch_cyc = cd.epoch_cyc;
-		epoch_ns = cd.epoch_ns;
+
+		res = cd.epoch_ns;
+		if (!cd.suspended) {
+			cyc = read_sched_clock();
+			cyc = (cyc - cd.epoch_cyc) & sched_clock_mask;
+			res += cyc_to_ns(cyc, cd.mult, cd.shift);
+		}
 	} while (read_seqcount_retry(&cd.seq, seq));
 
-	cyc = read_sched_clock();
-	cyc = (cyc - epoch_cyc) & sched_clock_mask;
-	return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift);
+	return res;
 }
 
 /*
@@ -111,7 +109,6 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
 {
 	u64 res, wrap, new_mask, new_epoch, cyc, ns;
 	u32 new_mult, new_shift;
-	ktime_t new_wrap_kt;
 	unsigned long r;
 	char r_unit;
 
@@ -124,10 +121,11 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
 	clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600);
 
 	new_mask = CLOCKSOURCE_MASK(bits);
+	cd.rate = rate;
 
 	/* calculate how many nanosecs until we risk wrapping */
 	wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL);
-	new_wrap_kt = ns_to_ktime(wrap);
+	cd.wrap_kt = ns_to_ktime(wrap);
 
 	/* update epoch for new counter and update epoch_ns from old counter*/
 	new_epoch = read();
@@ -138,8 +136,6 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
 	raw_write_seqcount_begin(&cd.seq);
 	read_sched_clock = read;
 	sched_clock_mask = new_mask;
-	cd.rate = rate;
-	cd.wrap_kt = new_wrap_kt;
 	cd.mult = new_mult;
 	cd.shift = new_shift;
 	cd.epoch_cyc = new_epoch;
-- 
cgit v1.2.3


From cf7c9c170787d6870af54684822f58acc00a966c Mon Sep 17 00:00:00 2001
From: Daniel Thompson <daniel.thompson@linaro.org>
Date: Thu, 26 Mar 2015 12:23:23 -0700
Subject: timers, sched/clock: Optimize cache line usage

Currently sched_clock(), a very hot code path, is not optimized
to minimise its cache profile. In particular:

  1. cd is not ____cacheline_aligned,

  2. struct clock_data does not distinguish between hotpath and
     coldpath data, reducing locality of reference in the hotpath,

  3. Some hotpath data is missing from struct clock_data and is marked
     __read_mostly (which more or less guarantees it will not share a
     cache line with cd).

This patch corrects these problems by extracting all hotpath
data into a separate structure and using ____cacheline_aligned
to ensure the hotpath uses a single (64 byte) cache line.

Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/1427397806-20889-3-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/sched_clock.c | 112 +++++++++++++++++++++++++++++++---------------
 1 file changed, 77 insertions(+), 35 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 1751e956add9..872e0685d1fb 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -18,28 +18,59 @@
 #include <linux/seqlock.h>
 #include <linux/bitops.h>
 
-struct clock_data {
-	ktime_t wrap_kt;
+/**
+ * struct clock_read_data - data required to read from sched_clock
+ *
+ * @epoch_ns:		sched_clock value at last update
+ * @epoch_cyc:		Clock cycle value at last update
+ * @sched_clock_mask:   Bitmask for two's complement subtraction of non 64bit
+ *			clocks
+ * @read_sched_clock:	Current clock source (or dummy source when suspended)
+ * @mult:		Multipler for scaled math conversion
+ * @shift:		Shift value for scaled math conversion
+ * @suspended:		Flag to indicate if the clock is suspended (stopped)
+ *
+ * Care must be taken when updating this structure; it is read by
+ * some very hot code paths. It occupies <=48 bytes and, when combined
+ * with the seqcount used to synchronize access, comfortably fits into
+ * a 64 byte cache line.
+ */
+struct clock_read_data {
 	u64 epoch_ns;
 	u64 epoch_cyc;
-	seqcount_t seq;
-	unsigned long rate;
+	u64 sched_clock_mask;
+	u64 (*read_sched_clock)(void);
 	u32 mult;
 	u32 shift;
 	bool suspended;
 };
 
+/**
+ * struct clock_data - all data needed for sched_clock (including
+ *                     registration of a new clock source)
+ *
+ * @seq:		Sequence counter for protecting updates.
+ * @read_data:		Data required to read from sched_clock.
+ * @wrap_kt:		Duration for which clock can run before wrapping
+ * @rate:		Tick rate of the registered clock
+ * @actual_read_sched_clock: Registered clock read function
+ *
+ * The ordering of this structure has been chosen to optimize cache
+ * performance. In particular seq and read_data (combined) should fit
+ * into a single 64 byte cache line.
+ */
+struct clock_data {
+	seqcount_t seq;
+	struct clock_read_data read_data;
+	ktime_t wrap_kt;
+	unsigned long rate;
+};
+
 static struct hrtimer sched_clock_timer;
 static int irqtime = -1;
 
 core_param(irqtime, irqtime, int, 0400);
 
-static struct clock_data cd = {
-	.mult	= NSEC_PER_SEC / HZ,
-};
-
-static u64 __read_mostly sched_clock_mask;
-
 static u64 notrace jiffy_sched_clock_read(void)
 {
 	/*
@@ -49,7 +80,10 @@ static u64 notrace jiffy_sched_clock_read(void)
 	return (u64)(jiffies - INITIAL_JIFFIES);
 }
 
-static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;
+static struct clock_data cd ____cacheline_aligned = {
+	.read_data = { .mult = NSEC_PER_SEC / HZ,
+		       .read_sched_clock = jiffy_sched_clock_read, },
+};
 
 static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
 {
@@ -60,15 +94,16 @@ unsigned long long notrace sched_clock(void)
 {
 	u64 cyc, res;
 	unsigned long seq;
+	struct clock_read_data *rd = &cd.read_data;
 
 	do {
 		seq = raw_read_seqcount_begin(&cd.seq);
 
-		res = cd.epoch_ns;
-		if (!cd.suspended) {
-			cyc = read_sched_clock();
-			cyc = (cyc - cd.epoch_cyc) & sched_clock_mask;
-			res += cyc_to_ns(cyc, cd.mult, cd.shift);
+		res = rd->epoch_ns;
+		if (!rd->suspended) {
+			cyc = rd->read_sched_clock();
+			cyc = (cyc - rd->epoch_cyc) & rd->sched_clock_mask;
+			res += cyc_to_ns(cyc, rd->mult, rd->shift);
 		}
 	} while (read_seqcount_retry(&cd.seq, seq));
 
@@ -83,16 +118,17 @@ static void notrace update_sched_clock(void)
 	unsigned long flags;
 	u64 cyc;
 	u64 ns;
+	struct clock_read_data *rd = &cd.read_data;
 
-	cyc = read_sched_clock();
-	ns = cd.epoch_ns +
-		cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
-			  cd.mult, cd.shift);
+	cyc = rd->read_sched_clock();
+	ns = rd->epoch_ns +
+	     cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask,
+		       rd->mult, rd->shift);
 
 	raw_local_irq_save(flags);
 	raw_write_seqcount_begin(&cd.seq);
-	cd.epoch_ns = ns;
-	cd.epoch_cyc = cyc;
+	rd->epoch_ns = ns;
+	rd->epoch_cyc = cyc;
 	raw_write_seqcount_end(&cd.seq);
 	raw_local_irq_restore(flags);
 }
@@ -111,6 +147,7 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
 	u32 new_mult, new_shift;
 	unsigned long r;
 	char r_unit;
+	struct clock_read_data *rd = &cd.read_data;
 
 	if (cd.rate > rate)
 		return;
@@ -129,17 +166,18 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
 
 	/* update epoch for new counter and update epoch_ns from old counter*/
 	new_epoch = read();
-	cyc = read_sched_clock();
-	ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
-			  cd.mult, cd.shift);
+	cyc = rd->read_sched_clock();
+	ns = rd->epoch_ns +
+	     cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask,
+		       rd->mult, rd->shift);
 
 	raw_write_seqcount_begin(&cd.seq);
-	read_sched_clock = read;
-	sched_clock_mask = new_mask;
-	cd.mult = new_mult;
-	cd.shift = new_shift;
-	cd.epoch_cyc = new_epoch;
-	cd.epoch_ns = ns;
+	rd->read_sched_clock = read;
+	rd->sched_clock_mask = new_mask;
+	rd->mult = new_mult;
+	rd->shift = new_shift;
+	rd->epoch_cyc = new_epoch;
+	rd->epoch_ns = ns;
 	raw_write_seqcount_end(&cd.seq);
 
 	r = rate;
@@ -171,7 +209,7 @@ void __init sched_clock_postinit(void)
 	 * If no sched_clock function has been provided at that point,
 	 * make it the final one one.
 	 */
-	if (read_sched_clock == jiffy_sched_clock_read)
+	if (cd.read_data.read_sched_clock == jiffy_sched_clock_read)
 		sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ);
 
 	update_sched_clock();
@@ -187,17 +225,21 @@ void __init sched_clock_postinit(void)
 
 static int sched_clock_suspend(void)
 {
+	struct clock_read_data *rd = &cd.read_data;
+
 	update_sched_clock();
 	hrtimer_cancel(&sched_clock_timer);
-	cd.suspended = true;
+	rd->suspended = true;
 	return 0;
 }
 
 static void sched_clock_resume(void)
 {
-	cd.epoch_cyc = read_sched_clock();
+	struct clock_read_data *rd = &cd.read_data;
+
+	rd->epoch_cyc = rd->read_sched_clock();
 	hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
-	cd.suspended = false;
+	rd->suspended = false;
 }
 
 static struct syscore_ops sched_clock_ops = {
-- 
cgit v1.2.3


From 13dbeb384d2d3aa555ea48d511e8cb110bd172e0 Mon Sep 17 00:00:00 2001
From: Daniel Thompson <daniel.thompson@linaro.org>
Date: Thu, 26 Mar 2015 12:23:24 -0700
Subject: timers, sched/clock: Remove suspend from clock_read_data()

Currently cd.read_data.suspended is read by the hotpath function
sched_clock(). This variable need not be accessed on the
hotpath. In fact, once it is removed, we can remove the
conditional branches from sched_clock() and install a dummy
read_sched_clock function to suspend the clock.

The new master copy of the function pointer
(actual_read_sched_clock) is introduced and is used for all
reads of the clock hardware except those within sched_clock
itself.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/1427397806-20889-4-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/sched_clock.c | 40 +++++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 15 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 872e0685d1fb..52ea5d976393 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -28,10 +28,9 @@
  * @read_sched_clock:	Current clock source (or dummy source when suspended)
  * @mult:		Multipler for scaled math conversion
  * @shift:		Shift value for scaled math conversion
- * @suspended:		Flag to indicate if the clock is suspended (stopped)
  *
  * Care must be taken when updating this structure; it is read by
- * some very hot code paths. It occupies <=48 bytes and, when combined
+ * some very hot code paths. It occupies <=40 bytes and, when combined
  * with the seqcount used to synchronize access, comfortably fits into
  * a 64 byte cache line.
  */
@@ -42,7 +41,6 @@ struct clock_read_data {
 	u64 (*read_sched_clock)(void);
 	u32 mult;
 	u32 shift;
-	bool suspended;
 };
 
 /**
@@ -64,6 +62,7 @@ struct clock_data {
 	struct clock_read_data read_data;
 	ktime_t wrap_kt;
 	unsigned long rate;
+	u64 (*actual_read_sched_clock)(void);
 };
 
 static struct hrtimer sched_clock_timer;
@@ -83,6 +82,8 @@ static u64 notrace jiffy_sched_clock_read(void)
 static struct clock_data cd ____cacheline_aligned = {
 	.read_data = { .mult = NSEC_PER_SEC / HZ,
 		       .read_sched_clock = jiffy_sched_clock_read, },
+	.actual_read_sched_clock = jiffy_sched_clock_read,
+
 };
 
 static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
@@ -99,12 +100,9 @@ unsigned long long notrace sched_clock(void)
 	do {
 		seq = raw_read_seqcount_begin(&cd.seq);
 
-		res = rd->epoch_ns;
-		if (!rd->suspended) {
-			cyc = rd->read_sched_clock();
-			cyc = (cyc - rd->epoch_cyc) & rd->sched_clock_mask;
-			res += cyc_to_ns(cyc, rd->mult, rd->shift);
-		}
+		cyc = (rd->read_sched_clock() - rd->epoch_cyc) &
+		      rd->sched_clock_mask;
+		res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift);
 	} while (read_seqcount_retry(&cd.seq, seq));
 
 	return res;
@@ -120,7 +118,7 @@ static void notrace update_sched_clock(void)
 	u64 ns;
 	struct clock_read_data *rd = &cd.read_data;
 
-	cyc = rd->read_sched_clock();
+	cyc = cd.actual_read_sched_clock();
 	ns = rd->epoch_ns +
 	     cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask,
 		       rd->mult, rd->shift);
@@ -166,10 +164,11 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
 
 	/* update epoch for new counter and update epoch_ns from old counter*/
 	new_epoch = read();
-	cyc = rd->read_sched_clock();
+	cyc = cd.actual_read_sched_clock();
 	ns = rd->epoch_ns +
 	     cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask,
 		       rd->mult, rd->shift);
+	cd.actual_read_sched_clock = read;
 
 	raw_write_seqcount_begin(&cd.seq);
 	rd->read_sched_clock = read;
@@ -209,7 +208,7 @@ void __init sched_clock_postinit(void)
 	 * If no sched_clock function has been provided at that point,
 	 * make it the final one one.
 	 */
-	if (cd.read_data.read_sched_clock == jiffy_sched_clock_read)
+	if (cd.actual_read_sched_clock == jiffy_sched_clock_read)
 		sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ);
 
 	update_sched_clock();
@@ -223,13 +222,24 @@ void __init sched_clock_postinit(void)
 	hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
 }
 
+/*
+ * Clock read function for use when the clock is suspended.
+ *
+ * This function makes it appear to sched_clock() as if the clock
+ * stopped counting at its last update.
+ */
+static u64 notrace suspended_sched_clock_read(void)
+{
+	return cd.read_data.epoch_cyc;
+}
+
 static int sched_clock_suspend(void)
 {
 	struct clock_read_data *rd = &cd.read_data;
 
 	update_sched_clock();
 	hrtimer_cancel(&sched_clock_timer);
-	rd->suspended = true;
+	rd->read_sched_clock = suspended_sched_clock_read;
 	return 0;
 }
 
@@ -237,9 +247,9 @@ static void sched_clock_resume(void)
 {
 	struct clock_read_data *rd = &cd.read_data;
 
-	rd->epoch_cyc = rd->read_sched_clock();
+	rd->epoch_cyc = cd.actual_read_sched_clock();
 	hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
-	rd->suspended = false;
+	rd->read_sched_clock = cd.actual_read_sched_clock;
 }
 
 static struct syscore_ops sched_clock_ops = {
-- 
cgit v1.2.3


From 9fee69a8c8070b38b558161a3f18bd5e2b664682 Mon Sep 17 00:00:00 2001
From: Daniel Thompson <daniel.thompson@linaro.org>
Date: Thu, 26 Mar 2015 12:23:25 -0700
Subject: timers, sched/clock: Remove redundant notrace from update function

Currently update_sched_clock() is marked as notrace but this
function is not called by ftrace. This is trivially fixed by
removing the mark up.

Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/1427397806-20889-5-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/sched_clock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/time')

diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 52ea5d976393..8adb9d0c969a 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -111,7 +111,7 @@ unsigned long long notrace sched_clock(void)
 /*
  * Atomically update the sched_clock epoch.
  */
-static void notrace update_sched_clock(void)
+static void update_sched_clock(void)
 {
 	unsigned long flags;
 	u64 cyc;
-- 
cgit v1.2.3


From 1809bfa44e1019e397fabaa6f2349bb7237e57a4 Mon Sep 17 00:00:00 2001
From: Daniel Thompson <daniel.thompson@linaro.org>
Date: Thu, 26 Mar 2015 12:23:26 -0700
Subject: timers, sched/clock: Avoid deadlock during read from NMI

Currently it is possible for an NMI (or FIQ on ARM) to come in
and read sched_clock() whilst update_sched_clock() has locked
the seqcount for writing. This results in the NMI handler
locking up when it calls raw_read_seqcount_begin().

This patch fixes the NMI safety issues by providing banked clock
data. This is a similar approach to the one used in Thomas
Gleixner's 4396e058c52e("timekeeping: Provide fast and NMI safe
access to CLOCK_MONOTONIC").

Suggested-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/1427397806-20889-6-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/sched_clock.c | 103 ++++++++++++++++++++++++++++++----------------
 1 file changed, 68 insertions(+), 35 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 8adb9d0c969a..eeea1e950b72 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -47,19 +47,20 @@ struct clock_read_data {
  * struct clock_data - all data needed for sched_clock (including
  *                     registration of a new clock source)
  *
- * @seq:		Sequence counter for protecting updates.
+ * @seq:		Sequence counter for protecting updates. The lowest
+ *			bit is the index for @read_data.
  * @read_data:		Data required to read from sched_clock.
  * @wrap_kt:		Duration for which clock can run before wrapping
  * @rate:		Tick rate of the registered clock
  * @actual_read_sched_clock: Registered clock read function
  *
  * The ordering of this structure has been chosen to optimize cache
- * performance. In particular seq and read_data (combined) should fit
+ * performance. In particular seq and read_data[0] (combined) should fit
  * into a single 64 byte cache line.
  */
 struct clock_data {
 	seqcount_t seq;
-	struct clock_read_data read_data;
+	struct clock_read_data read_data[2];
 	ktime_t wrap_kt;
 	unsigned long rate;
 	u64 (*actual_read_sched_clock)(void);
@@ -80,10 +81,9 @@ static u64 notrace jiffy_sched_clock_read(void)
 }
 
 static struct clock_data cd ____cacheline_aligned = {
-	.read_data = { .mult = NSEC_PER_SEC / HZ,
-		       .read_sched_clock = jiffy_sched_clock_read, },
+	.read_data[0] = { .mult = NSEC_PER_SEC / HZ,
+			  .read_sched_clock = jiffy_sched_clock_read, },
 	.actual_read_sched_clock = jiffy_sched_clock_read,
-
 };
 
 static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
@@ -95,10 +95,11 @@ unsigned long long notrace sched_clock(void)
 {
 	u64 cyc, res;
 	unsigned long seq;
-	struct clock_read_data *rd = &cd.read_data;
+	struct clock_read_data *rd;
 
 	do {
-		seq = raw_read_seqcount_begin(&cd.seq);
+		seq = raw_read_seqcount(&cd.seq);
+		rd = cd.read_data + (seq & 1);
 
 		cyc = (rd->read_sched_clock() - rd->epoch_cyc) &
 		      rd->sched_clock_mask;
@@ -108,27 +109,51 @@ unsigned long long notrace sched_clock(void)
 	return res;
 }
 
+/*
+ * Updating the data required to read the clock.
+ *
+ * sched_clock will never observe mis-matched data even if called from
+ * an NMI. We do this by maintaining an odd/even copy of the data and
+ * steering sched_clock to one or the other using a sequence counter.
+ * In order to preserve the data cache profile of sched_clock as much
+ * as possible the system reverts back to the even copy when the update
+ * completes; the odd copy is used *only* during an update.
+ */
+static void update_clock_read_data(struct clock_read_data *rd)
+{
+	/* update the backup (odd) copy with the new data */
+	cd.read_data[1] = *rd;
+
+	/* steer readers towards the odd copy */
+	raw_write_seqcount_latch(&cd.seq);
+
+	/* now its safe for us to update the normal (even) copy */
+	cd.read_data[0] = *rd;
+
+	/* switch readers back to the even copy */
+	raw_write_seqcount_latch(&cd.seq);
+}
+
 /*
  * Atomically update the sched_clock epoch.
  */
 static void update_sched_clock(void)
 {
-	unsigned long flags;
 	u64 cyc;
 	u64 ns;
-	struct clock_read_data *rd = &cd.read_data;
+	struct clock_read_data rd;
+
+	rd = cd.read_data[0];
 
 	cyc = cd.actual_read_sched_clock();
-	ns = rd->epoch_ns +
-	     cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask,
-		       rd->mult, rd->shift);
-
-	raw_local_irq_save(flags);
-	raw_write_seqcount_begin(&cd.seq);
-	rd->epoch_ns = ns;
-	rd->epoch_cyc = cyc;
-	raw_write_seqcount_end(&cd.seq);
-	raw_local_irq_restore(flags);
+	ns = rd.epoch_ns +
+	     cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask,
+		       rd.mult, rd.shift);
+
+	rd.epoch_ns = ns;
+	rd.epoch_cyc = cyc;
+
+	update_clock_read_data(&rd);
 }
 
 static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)
@@ -145,7 +170,7 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
 	u32 new_mult, new_shift;
 	unsigned long r;
 	char r_unit;
-	struct clock_read_data *rd = &cd.read_data;
+	struct clock_read_data rd;
 
 	if (cd.rate > rate)
 		return;
@@ -162,22 +187,23 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
 	wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL);
 	cd.wrap_kt = ns_to_ktime(wrap);
 
+	rd = cd.read_data[0];
+
 	/* update epoch for new counter and update epoch_ns from old counter*/
 	new_epoch = read();
 	cyc = cd.actual_read_sched_clock();
-	ns = rd->epoch_ns +
-	     cyc_to_ns((cyc - rd->epoch_cyc) & rd->sched_clock_mask,
-		       rd->mult, rd->shift);
+	ns = rd.epoch_ns +
+	     cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask,
+		       rd.mult, rd.shift);
 	cd.actual_read_sched_clock = read;
 
-	raw_write_seqcount_begin(&cd.seq);
-	rd->read_sched_clock = read;
-	rd->sched_clock_mask = new_mask;
-	rd->mult = new_mult;
-	rd->shift = new_shift;
-	rd->epoch_cyc = new_epoch;
-	rd->epoch_ns = ns;
-	raw_write_seqcount_end(&cd.seq);
+	rd.read_sched_clock = read;
+	rd.sched_clock_mask = new_mask;
+	rd.mult = new_mult;
+	rd.shift = new_shift;
+	rd.epoch_cyc = new_epoch;
+	rd.epoch_ns = ns;
+	update_clock_read_data(&rd);
 
 	r = rate;
 	if (r >= 4000000) {
@@ -227,15 +253,22 @@ void __init sched_clock_postinit(void)
  *
  * This function makes it appear to sched_clock() as if the clock
  * stopped counting at its last update.
+ *
+ * This function must only be called from the critical
+ * section in sched_clock(). It relies on the read_seqcount_retry()
+ * at the end of the critical section to be sure we observe the
+ * correct copy of epoch_cyc.
  */
 static u64 notrace suspended_sched_clock_read(void)
 {
-	return cd.read_data.epoch_cyc;
+	unsigned long seq = raw_read_seqcount(&cd.seq);
+
+	return cd.read_data[seq & 1].epoch_cyc;
 }
 
 static int sched_clock_suspend(void)
 {
-	struct clock_read_data *rd = &cd.read_data;
+	struct clock_read_data *rd = &cd.read_data[0];
 
 	update_sched_clock();
 	hrtimer_cancel(&sched_clock_timer);
@@ -245,7 +278,7 @@ static int sched_clock_suspend(void)
 
 static void sched_clock_resume(void)
 {
-	struct clock_read_data *rd = &cd.read_data;
+	struct clock_read_data *rd = &cd.read_data[0];
 
 	rd->epoch_cyc = cd.actual_read_sched_clock();
 	hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
-- 
cgit v1.2.3


From 32fea568aec5b73ae27253125522b5c2a970a1f0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Fri, 27 Mar 2015 07:08:06 +0100
Subject: timers, sched/clock: Clean up the code a bit

Trivial cleanups, to improve the readability of the generic sched_clock() code:

 - Improve and standardize comments
 - Standardize the coding style
 - Use vertical spacing where appropriate
 - etc.

No code changed:

  md5:
    19a053b31e0c54feaeff1492012b019a  sched_clock.o.before.asm
    19a053b31e0c54feaeff1492012b019a  sched_clock.o.after.asm

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Daniel Thompson <daniel.thompson@linaro.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/sched_clock.c | 107 ++++++++++++++++++++++++----------------------
 1 file changed, 56 insertions(+), 51 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index eeea1e950b72..a26036d37a38 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -1,5 +1,6 @@
 /*
- * sched_clock.c: support for extending counters to full 64-bit ns counter
+ * sched_clock.c: Generic sched_clock() support, to extend low level
+ *                hardware time counters to full 64-bit ns values.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -19,15 +20,15 @@
 #include <linux/bitops.h>
 
 /**
- * struct clock_read_data - data required to read from sched_clock
+ * struct clock_read_data - data required to read from sched_clock()
  *
- * @epoch_ns:		sched_clock value at last update
- * @epoch_cyc:		Clock cycle value at last update
+ * @epoch_ns:		sched_clock() value at last update
+ * @epoch_cyc:		Clock cycle value at last update.
  * @sched_clock_mask:   Bitmask for two's complement subtraction of non 64bit
- *			clocks
- * @read_sched_clock:	Current clock source (or dummy source when suspended)
- * @mult:		Multipler for scaled math conversion
- * @shift:		Shift value for scaled math conversion
+ *			clocks.
+ * @read_sched_clock:	Current clock source (or dummy source when suspended).
+ * @mult:		Multipler for scaled math conversion.
+ * @shift:		Shift value for scaled math conversion.
  *
  * Care must be taken when updating this structure; it is read by
  * some very hot code paths. It occupies <=40 bytes and, when combined
@@ -44,25 +45,26 @@ struct clock_read_data {
 };
 
 /**
- * struct clock_data - all data needed for sched_clock (including
+ * struct clock_data - all data needed for sched_clock() (including
  *                     registration of a new clock source)
  *
  * @seq:		Sequence counter for protecting updates. The lowest
  *			bit is the index for @read_data.
  * @read_data:		Data required to read from sched_clock.
- * @wrap_kt:		Duration for which clock can run before wrapping
- * @rate:		Tick rate of the registered clock
- * @actual_read_sched_clock: Registered clock read function
+ * @wrap_kt:		Duration for which clock can run before wrapping.
+ * @rate:		Tick rate of the registered clock.
+ * @actual_read_sched_clock: Registered hardware level clock read function.
  *
  * The ordering of this structure has been chosen to optimize cache
- * performance. In particular seq and read_data[0] (combined) should fit
- * into a single 64 byte cache line.
+ * performance. In particular 'seq' and 'read_data[0]' (combined) should fit
+ * into a single 64-byte cache line.
  */
 struct clock_data {
-	seqcount_t seq;
-	struct clock_read_data read_data[2];
-	ktime_t wrap_kt;
-	unsigned long rate;
+	seqcount_t		seq;
+	struct clock_read_data	read_data[2];
+	ktime_t			wrap_kt;
+	unsigned long		rate;
+
 	u64 (*actual_read_sched_clock)(void);
 };
 
@@ -112,10 +114,10 @@ unsigned long long notrace sched_clock(void)
 /*
  * Updating the data required to read the clock.
  *
- * sched_clock will never observe mis-matched data even if called from
+ * sched_clock() will never observe mis-matched data even if called from
  * an NMI. We do this by maintaining an odd/even copy of the data and
- * steering sched_clock to one or the other using a sequence counter.
- * In order to preserve the data cache profile of sched_clock as much
+ * steering sched_clock() to one or the other using a sequence counter.
+ * In order to preserve the data cache profile of sched_clock() as much
  * as possible the system reverts back to the even copy when the update
  * completes; the odd copy is used *only* during an update.
  */
@@ -135,7 +137,7 @@ static void update_clock_read_data(struct clock_read_data *rd)
 }
 
 /*
- * Atomically update the sched_clock epoch.
+ * Atomically update the sched_clock() epoch.
  */
 static void update_sched_clock(void)
 {
@@ -146,9 +148,7 @@ static void update_sched_clock(void)
 	rd = cd.read_data[0];
 
 	cyc = cd.actual_read_sched_clock();
-	ns = rd.epoch_ns +
-	     cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask,
-		       rd.mult, rd.shift);
+	ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
 
 	rd.epoch_ns = ns;
 	rd.epoch_cyc = cyc;
@@ -160,11 +160,12 @@ static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)
 {
 	update_sched_clock();
 	hrtimer_forward_now(hrt, cd.wrap_kt);
+
 	return HRTIMER_RESTART;
 }
 
-void __init sched_clock_register(u64 (*read)(void), int bits,
-				 unsigned long rate)
+void __init
+sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
 {
 	u64 res, wrap, new_mask, new_epoch, cyc, ns;
 	u32 new_mult, new_shift;
@@ -177,51 +178,53 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
 
 	WARN_ON(!irqs_disabled());
 
-	/* calculate the mult/shift to convert counter ticks to ns. */
+	/* Calculate the mult/shift to convert counter ticks to ns. */
 	clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600);
 
 	new_mask = CLOCKSOURCE_MASK(bits);
 	cd.rate = rate;
 
-	/* calculate how many nanosecs until we risk wrapping */
+	/* Calculate how many nanosecs until we risk wrapping */
 	wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL);
 	cd.wrap_kt = ns_to_ktime(wrap);
 
 	rd = cd.read_data[0];
 
-	/* update epoch for new counter and update epoch_ns from old counter*/
+	/* Update epoch for new counter and update 'epoch_ns' from old counter*/
 	new_epoch = read();
 	cyc = cd.actual_read_sched_clock();
-	ns = rd.epoch_ns +
-	     cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask,
-		       rd.mult, rd.shift);
+	ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
 	cd.actual_read_sched_clock = read;
 
-	rd.read_sched_clock = read;
-	rd.sched_clock_mask = new_mask;
-	rd.mult = new_mult;
-	rd.shift = new_shift;
-	rd.epoch_cyc = new_epoch;
-	rd.epoch_ns = ns;
+	rd.read_sched_clock	= read;
+	rd.sched_clock_mask	= new_mask;
+	rd.mult			= new_mult;
+	rd.shift		= new_shift;
+	rd.epoch_cyc		= new_epoch;
+	rd.epoch_ns		= ns;
+
 	update_clock_read_data(&rd);
 
 	r = rate;
 	if (r >= 4000000) {
 		r /= 1000000;
 		r_unit = 'M';
-	} else if (r >= 1000) {
-		r /= 1000;
-		r_unit = 'k';
-	} else
-		r_unit = ' ';
-
-	/* calculate the ns resolution of this counter */
+	} else {
+		if (r >= 1000) {
+			r /= 1000;
+			r_unit = 'k';
+		} else {
+			r_unit = ' ';
+		}
+	}
+
+	/* Calculate the ns resolution of this counter */
 	res = cyc_to_ns(1ULL, new_mult, new_shift);
 
 	pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n",
 		bits, r, r_unit, res, wrap);
 
-	/* Enable IRQ time accounting if we have a fast enough sched_clock */
+	/* Enable IRQ time accounting if we have a fast enough sched_clock() */
 	if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
 		enable_sched_clock_irqtime();
 
@@ -231,7 +234,7 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
 void __init sched_clock_postinit(void)
 {
 	/*
-	 * If no sched_clock function has been provided at that point,
+	 * If no sched_clock() function has been provided at that point,
 	 * make it the final one one.
 	 */
 	if (cd.actual_read_sched_clock == jiffy_sched_clock_read)
@@ -257,7 +260,7 @@ void __init sched_clock_postinit(void)
  * This function must only be called from the critical
  * section in sched_clock(). It relies on the read_seqcount_retry()
  * at the end of the critical section to be sure we observe the
- * correct copy of epoch_cyc.
+ * correct copy of 'epoch_cyc'.
  */
 static u64 notrace suspended_sched_clock_read(void)
 {
@@ -273,6 +276,7 @@ static int sched_clock_suspend(void)
 	update_sched_clock();
 	hrtimer_cancel(&sched_clock_timer);
 	rd->read_sched_clock = suspended_sched_clock_read;
+
 	return 0;
 }
 
@@ -286,13 +290,14 @@ static void sched_clock_resume(void)
 }
 
 static struct syscore_ops sched_clock_ops = {
-	.suspend = sched_clock_suspend,
-	.resume = sched_clock_resume,
+	.suspend	= sched_clock_suspend,
+	.resume		= sched_clock_resume,
 };
 
 static int __init sched_clock_syscore_init(void)
 {
 	register_syscore_ops(&sched_clock_ops);
+
 	return 0;
 }
 device_initcall(sched_clock_syscore_init);
-- 
cgit v1.2.3


From 876e78818def2983be55878b21f7152fbaebbd36 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 19 Mar 2015 10:09:06 +0100
Subject: time: Rename timekeeper::tkr to timekeeper::tkr_mono

In preparation of adding another tkr field, rename this one to
tkr_mono. Also rename tk_read_base::base_mono to tk_read_base::base,
since the structure is not specific to CLOCK_MONOTONIC and the mono
name got added to the tk_read_base instance.

Lots of trivial churn.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: John Stultz <john.stultz@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150319093400.344679419@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/timekeeping.c | 150 +++++++++++++++++++++++-----------------------
 1 file changed, 75 insertions(+), 75 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 892f6cbf1e67..1405091f3acb 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -68,8 +68,8 @@ bool __read_mostly persistent_clock_exist = false;
 
 static inline void tk_normalize_xtime(struct timekeeper *tk)
 {
-	while (tk->tkr.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr.shift)) {
-		tk->tkr.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr.shift;
+	while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {
+		tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
 		tk->xtime_sec++;
 	}
 }
@@ -79,20 +79,20 @@ static inline struct timespec64 tk_xtime(struct timekeeper *tk)
 	struct timespec64 ts;
 
 	ts.tv_sec = tk->xtime_sec;
-	ts.tv_nsec = (long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+	ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
 	return ts;
 }
 
 static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
 {
 	tk->xtime_sec = ts->tv_sec;
-	tk->tkr.xtime_nsec = (u64)ts->tv_nsec << tk->tkr.shift;
+	tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift;
 }
 
 static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)
 {
 	tk->xtime_sec += ts->tv_sec;
-	tk->tkr.xtime_nsec += (u64)ts->tv_nsec << tk->tkr.shift;
+	tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift;
 	tk_normalize_xtime(tk);
 }
 
@@ -136,8 +136,8 @@ static long timekeeping_last_warning;
 static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
 {
 
-	cycle_t max_cycles = tk->tkr.clock->max_cycles;
-	const char *name = tk->tkr.clock->name;
+	cycle_t max_cycles = tk->tkr_mono.clock->max_cycles;
+	const char *name = tk->tkr_mono.clock->name;
 
 	if (offset > max_cycles) {
 		printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
@@ -246,11 +246,11 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	u64 tmp, ntpinterval;
 	struct clocksource *old_clock;
 
-	old_clock = tk->tkr.clock;
-	tk->tkr.clock = clock;
-	tk->tkr.read = clock->read;
-	tk->tkr.mask = clock->mask;
-	tk->tkr.cycle_last = tk->tkr.read(clock);
+	old_clock = tk->tkr_mono.clock;
+	tk->tkr_mono.clock = clock;
+	tk->tkr_mono.read = clock->read;
+	tk->tkr_mono.mask = clock->mask;
+	tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
 
 	/* Do the ns -> cycle conversion first, using original mult */
 	tmp = NTP_INTERVAL_LENGTH;
@@ -274,11 +274,11 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	if (old_clock) {
 		int shift_change = clock->shift - old_clock->shift;
 		if (shift_change < 0)
-			tk->tkr.xtime_nsec >>= -shift_change;
+			tk->tkr_mono.xtime_nsec >>= -shift_change;
 		else
-			tk->tkr.xtime_nsec <<= shift_change;
+			tk->tkr_mono.xtime_nsec <<= shift_change;
 	}
-	tk->tkr.shift = clock->shift;
+	tk->tkr_mono.shift = clock->shift;
 
 	tk->ntp_error = 0;
 	tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
@@ -289,7 +289,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	 * active clocksource. These value will be adjusted via NTP
 	 * to counteract clock drifting.
 	 */
-	tk->tkr.mult = clock->mult;
+	tk->tkr_mono.mult = clock->mult;
 	tk->ntp_err_mult = 0;
 }
 
@@ -318,11 +318,11 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
 
 static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
 {
-	struct clocksource *clock = tk->tkr.clock;
+	struct clocksource *clock = tk->tkr_mono.clock;
 	cycle_t delta;
 	s64 nsec;
 
-	delta = timekeeping_get_delta(&tk->tkr);
+	delta = timekeeping_get_delta(&tk->tkr_mono);
 
 	/* convert delta to nanoseconds. */
 	nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
@@ -428,7 +428,7 @@ u64 notrace ktime_get_mono_fast_ns(void)
 	do {
 		seq = raw_read_seqcount(&tk_fast_mono.seq);
 		tkr = tk_fast_mono.base + (seq & 0x01);
-		now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr);
+		now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr);
 
 	} while (read_seqcount_retry(&tk_fast_mono.seq, seq));
 	return now;
@@ -456,7 +456,7 @@ static cycle_t dummy_clock_read(struct clocksource *cs)
 static void halt_fast_timekeeper(struct timekeeper *tk)
 {
 	static struct tk_read_base tkr_dummy;
-	struct tk_read_base *tkr = &tk->tkr;
+	struct tk_read_base *tkr = &tk->tkr_mono;
 
 	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
 	cycles_at_suspend = tkr->read(tkr->clock);
@@ -472,8 +472,8 @@ static inline void update_vsyscall(struct timekeeper *tk)
 
 	xt = timespec64_to_timespec(tk_xtime(tk));
 	wm = timespec64_to_timespec(tk->wall_to_monotonic);
-	update_vsyscall_old(&xt, &wm, tk->tkr.clock, tk->tkr.mult,
-			    tk->tkr.cycle_last);
+	update_vsyscall_old(&xt, &wm, tk->tkr_mono.clock, tk->tkr_mono.mult,
+			    tk->tkr_mono.cycle_last);
 }
 
 static inline void old_vsyscall_fixup(struct timekeeper *tk)
@@ -490,11 +490,11 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk)
 	* (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
 	* users are removed, this can be killed.
 	*/
-	remainder = tk->tkr.xtime_nsec & ((1ULL << tk->tkr.shift) - 1);
-	tk->tkr.xtime_nsec -= remainder;
-	tk->tkr.xtime_nsec += 1ULL << tk->tkr.shift;
+	remainder = tk->tkr_mono.xtime_nsec & ((1ULL << tk->tkr_mono.shift) - 1);
+	tk->tkr_mono.xtime_nsec -= remainder;
+	tk->tkr_mono.xtime_nsec += 1ULL << tk->tkr_mono.shift;
 	tk->ntp_error += remainder << tk->ntp_error_shift;
-	tk->ntp_error -= (1ULL << tk->tkr.shift) << tk->ntp_error_shift;
+	tk->ntp_error -= (1ULL << tk->tkr_mono.shift) << tk->ntp_error_shift;
 }
 #else
 #define old_vsyscall_fixup(tk)
@@ -559,7 +559,7 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
 	 */
 	seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
 	nsec = (u32) tk->wall_to_monotonic.tv_nsec;
-	tk->tkr.base_mono = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
+	tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
 
 	/* Update the monotonic raw base */
 	tk->base_raw = timespec64_to_ktime(tk->raw_time);
@@ -569,7 +569,7 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
 	 * wall_to_monotonic can be greater/equal one second. Take
 	 * this into account before updating tk->ktime_sec.
 	 */
-	nsec += (u32)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+	nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
 	if (nsec >= NSEC_PER_SEC)
 		seconds++;
 	tk->ktime_sec = seconds;
@@ -592,7 +592,7 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
 		memcpy(&shadow_timekeeper, &tk_core.timekeeper,
 		       sizeof(tk_core.timekeeper));
 
-	update_fast_timekeeper(&tk->tkr);
+	update_fast_timekeeper(&tk->tkr_mono);
 }
 
 /**
@@ -604,18 +604,18 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
  */
 static void timekeeping_forward_now(struct timekeeper *tk)
 {
-	struct clocksource *clock = tk->tkr.clock;
+	struct clocksource *clock = tk->tkr_mono.clock;
 	cycle_t cycle_now, delta;
 	s64 nsec;
 
-	cycle_now = tk->tkr.read(clock);
-	delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
-	tk->tkr.cycle_last = cycle_now;
+	cycle_now = tk->tkr_mono.read(clock);
+	delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
+	tk->tkr_mono.cycle_last = cycle_now;
 
-	tk->tkr.xtime_nsec += delta * tk->tkr.mult;
+	tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult;
 
 	/* If arch requires, add in get_arch_timeoffset() */
-	tk->tkr.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr.shift;
+	tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift;
 
 	tk_normalize_xtime(tk);
 
@@ -640,7 +640,7 @@ int __getnstimeofday64(struct timespec64 *ts)
 		seq = read_seqcount_begin(&tk_core.seq);
 
 		ts->tv_sec = tk->xtime_sec;
-		nsecs = timekeeping_get_ns(&tk->tkr);
+		nsecs = timekeeping_get_ns(&tk->tkr_mono);
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -680,8 +680,8 @@ ktime_t ktime_get(void)
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		base = tk->tkr.base_mono;
-		nsecs = timekeeping_get_ns(&tk->tkr);
+		base = tk->tkr_mono.base;
+		nsecs = timekeeping_get_ns(&tk->tkr_mono);
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -706,8 +706,8 @@ ktime_t ktime_get_with_offset(enum tk_offsets offs)
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		base = ktime_add(tk->tkr.base_mono, *offset);
-		nsecs = timekeeping_get_ns(&tk->tkr);
+		base = ktime_add(tk->tkr_mono.base, *offset);
+		nsecs = timekeeping_get_ns(&tk->tkr_mono);
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -777,7 +777,7 @@ void ktime_get_ts64(struct timespec64 *ts)
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 		ts->tv_sec = tk->xtime_sec;
-		nsec = timekeeping_get_ns(&tk->tkr);
+		nsec = timekeeping_get_ns(&tk->tkr_mono);
 		tomono = tk->wall_to_monotonic;
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -863,7 +863,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
 		ts_real->tv_nsec = 0;
 
 		nsecs_raw = timekeeping_get_ns_raw(tk);
-		nsecs_real = timekeeping_get_ns(&tk->tkr);
+		nsecs_real = timekeeping_get_ns(&tk->tkr_mono);
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -1046,7 +1046,7 @@ static int change_clocksource(void *data)
 	 */
 	if (try_module_get(new->owner)) {
 		if (!new->enable || new->enable(new) == 0) {
-			old = tk->tkr.clock;
+			old = tk->tkr_mono.clock;
 			tk_setup_internals(tk, new);
 			if (old->disable)
 				old->disable(old);
@@ -1074,11 +1074,11 @@ int timekeeping_notify(struct clocksource *clock)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 
-	if (tk->tkr.clock == clock)
+	if (tk->tkr_mono.clock == clock)
 		return 0;
 	stop_machine(change_clocksource, clock, NULL);
 	tick_clock_notify();
-	return tk->tkr.clock == clock ? 0 : -1;
+	return tk->tkr_mono.clock == clock ? 0 : -1;
 }
 
 /**
@@ -1119,7 +1119,7 @@ int timekeeping_valid_for_hres(void)
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 
-		ret = tk->tkr.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
+		ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -1138,7 +1138,7 @@ u64 timekeeping_max_deferment(void)
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 
-		ret = tk->tkr.clock->max_idle_ns;
+		ret = tk->tkr_mono.clock->max_idle_ns;
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -1303,7 +1303,7 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta)
 void timekeeping_resume(void)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	struct clocksource *clock = tk->tkr.clock;
+	struct clocksource *clock = tk->tkr_mono.clock;
 	unsigned long flags;
 	struct timespec64 ts_new, ts_delta;
 	struct timespec tmp;
@@ -1331,16 +1331,16 @@ void timekeeping_resume(void)
 	 * The less preferred source will only be tried if there is no better
 	 * usable source. The rtc part is handled separately in rtc core code.
 	 */
-	cycle_now = tk->tkr.read(clock);
+	cycle_now = tk->tkr_mono.read(clock);
 	if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
-		cycle_now > tk->tkr.cycle_last) {
+		cycle_now > tk->tkr_mono.cycle_last) {
 		u64 num, max = ULLONG_MAX;
 		u32 mult = clock->mult;
 		u32 shift = clock->shift;
 		s64 nsec = 0;
 
-		cycle_delta = clocksource_delta(cycle_now, tk->tkr.cycle_last,
-						tk->tkr.mask);
+		cycle_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last,
+						tk->tkr_mono.mask);
 
 		/*
 		 * "cycle_delta * mutl" may cause 64 bits overflow, if the
@@ -1366,7 +1366,7 @@ void timekeeping_resume(void)
 		__timekeeping_inject_sleeptime(tk, &ts_delta);
 
 	/* Re-base the last cycle value */
-	tk->tkr.cycle_last = cycle_now;
+	tk->tkr_mono.cycle_last = cycle_now;
 	tk->ntp_error = 0;
 	timekeeping_suspended = 0;
 	timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
@@ -1519,15 +1519,15 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
 	 *
 	 * XXX - TODO: Doc ntp_error calculation.
 	 */
-	if ((mult_adj > 0) && (tk->tkr.mult + mult_adj < mult_adj)) {
+	if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) {
 		/* NTP adjustment caused clocksource mult overflow */
 		WARN_ON_ONCE(1);
 		return;
 	}
 
-	tk->tkr.mult += mult_adj;
+	tk->tkr_mono.mult += mult_adj;
 	tk->xtime_interval += interval;
-	tk->tkr.xtime_nsec -= offset;
+	tk->tkr_mono.xtime_nsec -= offset;
 	tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
 }
 
@@ -1589,13 +1589,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
 		tk->ntp_err_mult = 0;
 	}
 
-	if (unlikely(tk->tkr.clock->maxadj &&
-		(abs(tk->tkr.mult - tk->tkr.clock->mult)
-			> tk->tkr.clock->maxadj))) {
+	if (unlikely(tk->tkr_mono.clock->maxadj &&
+		(abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult)
+			> tk->tkr_mono.clock->maxadj))) {
 		printk_once(KERN_WARNING
 			"Adjusting %s more than 11%% (%ld vs %ld)\n",
-			tk->tkr.clock->name, (long)tk->tkr.mult,
-			(long)tk->tkr.clock->mult + tk->tkr.clock->maxadj);
+			tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult,
+			(long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj);
 	}
 
 	/*
@@ -1612,9 +1612,9 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
 	 * We'll correct this error next time through this function, when
 	 * xtime_nsec is not as small.
 	 */
-	if (unlikely((s64)tk->tkr.xtime_nsec < 0)) {
-		s64 neg = -(s64)tk->tkr.xtime_nsec;
-		tk->tkr.xtime_nsec = 0;
+	if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) {
+		s64 neg = -(s64)tk->tkr_mono.xtime_nsec;
+		tk->tkr_mono.xtime_nsec = 0;
 		tk->ntp_error += neg << tk->ntp_error_shift;
 	}
 }
@@ -1629,13 +1629,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
  */
 static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
 {
-	u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr.shift;
+	u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
 	unsigned int clock_set = 0;
 
-	while (tk->tkr.xtime_nsec >= nsecps) {
+	while (tk->tkr_mono.xtime_nsec >= nsecps) {
 		int leap;
 
-		tk->tkr.xtime_nsec -= nsecps;
+		tk->tkr_mono.xtime_nsec -= nsecps;
 		tk->xtime_sec++;
 
 		/* Figure out if its a leap sec and apply if needed */
@@ -1680,9 +1680,9 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
 
 	/* Accumulate one shifted interval */
 	offset -= interval;
-	tk->tkr.cycle_last += interval;
+	tk->tkr_mono.cycle_last += interval;
 
-	tk->tkr.xtime_nsec += tk->xtime_interval << shift;
+	tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift;
 	*clock_set |= accumulate_nsecs_to_secs(tk);
 
 	/* Accumulate raw time */
@@ -1725,8 +1725,8 @@ void update_wall_time(void)
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
 	offset = real_tk->cycle_interval;
 #else
-	offset = clocksource_delta(tk->tkr.read(tk->tkr.clock),
-				   tk->tkr.cycle_last, tk->tkr.mask);
+	offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
+				   tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
 #endif
 
 	/* Check if there's really nothing to do */
@@ -1890,8 +1890,8 @@ ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot,
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 
-		base = tk->tkr.base_mono;
-		nsecs = tk->tkr.xtime_nsec >> tk->tkr.shift;
+		base = tk->tkr_mono.base;
+		nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
 
 		*offs_real = tk->offs_real;
 		*offs_boot = tk->offs_boot;
@@ -1922,8 +1922,8 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot,
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 
-		base = tk->tkr.base_mono;
-		nsecs = timekeeping_get_ns(&tk->tkr);
+		base = tk->tkr_mono.base;
+		nsecs = timekeeping_get_ns(&tk->tkr_mono);
 
 		*offs_real = tk->offs_real;
 		*offs_boot = tk->offs_boot;
-- 
cgit v1.2.3


From 4a4ad80d32cea69ee93bd4589f24dc478804cd80 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 19 Mar 2015 09:28:44 +0100
Subject: time: Add timerkeeper::tkr_raw

Introduce tkr_raw and make use of it.

  base_raw -> tkr_raw.base
  clock->{mult,shift} -> tkr_raw.{mult.shift}

Kill timekeeping_get_ns_raw() in favour of
timekeeping_get_ns(&tkr_raw), this removes all mono_raw special
casing.

Duplicate the updates to tkr_mono.cycle_last into tkr_raw.cycle_last,
both need the same value.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: John Stultz <john.stultz@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150319093400.422589590@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/timekeeping.c | 41 +++++++++++++++++++----------------------
 1 file changed, 19 insertions(+), 22 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 1405091f3acb..cbb612ee813f 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -252,6 +252,11 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	tk->tkr_mono.mask = clock->mask;
 	tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
 
+	tk->tkr_raw.clock = clock;
+	tk->tkr_raw.read = clock->read;
+	tk->tkr_raw.mask = clock->mask;
+	tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
+
 	/* Do the ns -> cycle conversion first, using original mult */
 	tmp = NTP_INTERVAL_LENGTH;
 	tmp <<= clock->shift;
@@ -278,7 +283,10 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 		else
 			tk->tkr_mono.xtime_nsec <<= shift_change;
 	}
+	tk->tkr_raw.xtime_nsec = 0;
+
 	tk->tkr_mono.shift = clock->shift;
+	tk->tkr_raw.shift = clock->shift;
 
 	tk->ntp_error = 0;
 	tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
@@ -290,6 +298,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
 	 * to counteract clock drifting.
 	 */
 	tk->tkr_mono.mult = clock->mult;
+	tk->tkr_raw.mult = clock->mult;
 	tk->ntp_err_mult = 0;
 }
 
@@ -316,21 +325,6 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
 	return nsec + arch_gettimeoffset();
 }
 
-static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
-{
-	struct clocksource *clock = tk->tkr_mono.clock;
-	cycle_t delta;
-	s64 nsec;
-
-	delta = timekeeping_get_delta(&tk->tkr_mono);
-
-	/* convert delta to nanoseconds. */
-	nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
-
-	/* If arch requires, add in get_arch_timeoffset() */
-	return nsec + arch_gettimeoffset();
-}
-
 /**
  * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
  * @tkr: Timekeeping readout base from which we take the update
@@ -562,7 +556,7 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
 	tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
 
 	/* Update the monotonic raw base */
-	tk->base_raw = timespec64_to_ktime(tk->raw_time);
+	tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time);
 
 	/*
 	 * The sum of the nanoseconds portions of xtime and
@@ -611,6 +605,7 @@ static void timekeeping_forward_now(struct timekeeper *tk)
 	cycle_now = tk->tkr_mono.read(clock);
 	delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
 	tk->tkr_mono.cycle_last = cycle_now;
+	tk->tkr_raw.cycle_last  = cycle_now;
 
 	tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult;
 
@@ -619,7 +614,7 @@ static void timekeeping_forward_now(struct timekeeper *tk)
 
 	tk_normalize_xtime(tk);
 
-	nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
+	nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift);
 	timespec64_add_ns(&tk->raw_time, nsec);
 }
 
@@ -748,8 +743,8 @@ ktime_t ktime_get_raw(void)
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		base = tk->base_raw;
-		nsecs = timekeeping_get_ns_raw(tk);
+		base = tk->tkr_raw.base;
+		nsecs = timekeeping_get_ns(&tk->tkr_raw);
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -862,7 +857,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
 		ts_real->tv_sec = tk->xtime_sec;
 		ts_real->tv_nsec = 0;
 
-		nsecs_raw = timekeeping_get_ns_raw(tk);
+		nsecs_raw  = timekeeping_get_ns(&tk->tkr_raw);
 		nsecs_real = timekeeping_get_ns(&tk->tkr_mono);
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -1096,7 +1091,7 @@ void getrawmonotonic64(struct timespec64 *ts)
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		nsecs = timekeeping_get_ns_raw(tk);
+		nsecs = timekeeping_get_ns(&tk->tkr_raw);
 		ts64 = tk->raw_time;
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
@@ -1217,7 +1212,6 @@ void __init timekeeping_init(void)
 	tk_set_xtime(tk, &now);
 	tk->raw_time.tv_sec = 0;
 	tk->raw_time.tv_nsec = 0;
-	tk->base_raw.tv64 = 0;
 	if (boot.tv_sec == 0 && boot.tv_nsec == 0)
 		boot = tk_xtime(tk);
 
@@ -1367,6 +1361,8 @@ void timekeeping_resume(void)
 
 	/* Re-base the last cycle value */
 	tk->tkr_mono.cycle_last = cycle_now;
+	tk->tkr_raw.cycle_last  = cycle_now;
+
 	tk->ntp_error = 0;
 	timekeeping_suspended = 0;
 	timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
@@ -1681,6 +1677,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
 	/* Accumulate one shifted interval */
 	offset -= interval;
 	tk->tkr_mono.cycle_last += interval;
+	tk->tkr_raw.cycle_last  += interval;
 
 	tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift;
 	*clock_set |= accumulate_nsecs_to_secs(tk);
-- 
cgit v1.2.3


From 4498e7467e9e441c18ca12f1ca08460356e0508a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 19 Mar 2015 09:36:19 +0100
Subject: time: Parametrize all tk_fast_mono users

In preparation for more tk_fast instances, remove all hard-coded
tk_fast_mono references.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: John Stultz <john.stultz@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150319093400.484279927@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/timekeeping.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index cbb612ee813f..278373edb472 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -364,18 +364,18 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
  * slightly wrong timestamp (a few nanoseconds). See
  * @ktime_get_mono_fast_ns.
  */
-static void update_fast_timekeeper(struct tk_read_base *tkr)
+static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf)
 {
-	struct tk_read_base *base = tk_fast_mono.base;
+	struct tk_read_base *base = tkf->base;
 
 	/* Force readers off to base[1] */
-	raw_write_seqcount_latch(&tk_fast_mono.seq);
+	raw_write_seqcount_latch(&tkf->seq);
 
 	/* Update base[0] */
 	memcpy(base, tkr, sizeof(*base));
 
 	/* Force readers back to base[0] */
-	raw_write_seqcount_latch(&tk_fast_mono.seq);
+	raw_write_seqcount_latch(&tkf->seq);
 
 	/* Update base[1] */
 	memcpy(base + 1, base, sizeof(*base));
@@ -413,20 +413,25 @@ static void update_fast_timekeeper(struct tk_read_base *tkr)
  * of the following timestamps. Callers need to be aware of that and
  * deal with it.
  */
-u64 notrace ktime_get_mono_fast_ns(void)
+static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
 {
 	struct tk_read_base *tkr;
 	unsigned int seq;
 	u64 now;
 
 	do {
-		seq = raw_read_seqcount(&tk_fast_mono.seq);
-		tkr = tk_fast_mono.base + (seq & 0x01);
+		seq = raw_read_seqcount(&tkf->seq);
+		tkr = tkf->base + (seq & 0x01);
 		now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr);
+	} while (read_seqcount_retry(&tkf->seq, seq));
 
-	} while (read_seqcount_retry(&tk_fast_mono.seq, seq));
 	return now;
 }
+
+u64 ktime_get_mono_fast_ns(void)
+{
+	return __ktime_get_fast_ns(&tk_fast_mono);
+}
 EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
 
 /* Suspend-time cycles value for halted fast timekeeper. */
@@ -455,7 +460,7 @@ static void halt_fast_timekeeper(struct timekeeper *tk)
 	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
 	cycles_at_suspend = tkr->read(tkr->clock);
 	tkr_dummy.read = dummy_clock_read;
-	update_fast_timekeeper(&tkr_dummy);
+	update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
 }
 
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
@@ -586,7 +591,7 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
 		memcpy(&shadow_timekeeper, &tk_core.timekeeper,
 		       sizeof(tk_core.timekeeper));
 
-	update_fast_timekeeper(&tk->tkr_mono);
+	update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
 }
 
 /**
-- 
cgit v1.2.3


From f09cb9a1808e35ad7502ea39b6bfb443c7fa0f19 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 19 Mar 2015 09:39:08 +0100
Subject: time: Introduce tk_fast_raw

Add the NMI safe CLOCK_MONOTONIC_RAW accessor..

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: John Stultz <john.stultz@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150319093400.562746929@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/timekeeping.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 278373edb472..c3fcff06d30a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -59,6 +59,7 @@ struct tk_fast {
 };
 
 static struct tk_fast tk_fast_mono ____cacheline_aligned;
+static struct tk_fast tk_fast_raw  ____cacheline_aligned;
 
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
@@ -434,6 +435,12 @@ u64 ktime_get_mono_fast_ns(void)
 }
 EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
 
+u64 ktime_get_raw_fast_ns(void)
+{
+	return __ktime_get_fast_ns(&tk_fast_raw);
+}
+EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
+
 /* Suspend-time cycles value for halted fast timekeeper. */
 static cycle_t cycles_at_suspend;
 
@@ -461,6 +468,11 @@ static void halt_fast_timekeeper(struct timekeeper *tk)
 	cycles_at_suspend = tkr->read(tkr->clock);
 	tkr_dummy.read = dummy_clock_read;
 	update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
+
+	tkr = &tk->tkr_raw;
+	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
+	tkr_dummy.read = dummy_clock_read;
+	update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
 }
 
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
@@ -592,6 +604,7 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
 		       sizeof(tk_core.timekeeper));
 
 	update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
+	update_fast_timekeeper(&tk->tkr_raw,  &tk_fast_raw);
 }
 
 /**
-- 
cgit v1.2.3


From 554ef3876c6acdff1331feab10275e9e9e0adb84 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 27 Feb 2015 17:21:32 +0530
Subject: clockevents: Handle tick device's resume separately

Upcoming patch will redefine possible states of a clockevent
device. The RESUME mode is a special case only for tick's
clockevent devices. In future it can be replaced by ->resume()
callback already available for clockevent devices.

Lets handle it separately so that clockevents_set_mode() only
handles states valid across all devices. This also renames
set_mode_resume() to tick_resume() to make it more explicit.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Kevin Hilman <khilman@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: linaro-kernel@lists.linaro.org
Cc: linaro-networking@linaro.org
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/c1b0112410870f49e7bf06958e1483eac6c15e20.1425037853.git.viresh.kumar@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/clockevents.c    | 30 +++++++++++++++++++++---------
 kernel/time/tick-broadcast.c |  2 +-
 kernel/time/tick-common.c    |  2 +-
 kernel/time/tick-internal.h  |  1 +
 kernel/time/timer_list.c     |  4 ++--
 5 files changed, 26 insertions(+), 13 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 489642b08d64..1b0ea63de69c 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -100,7 +100,7 @@ static int __clockevents_set_mode(struct clock_event_device *dev,
 	/* Transition with legacy set_mode() callback */
 	if (dev->set_mode) {
 		/* Legacy callback doesn't support new modes */
-		if (mode > CLOCK_EVT_MODE_RESUME)
+		if (mode > CLOCK_EVT_MODE_ONESHOT)
 			return -ENOSYS;
 		dev->set_mode(mode, dev);
 		return 0;
@@ -133,13 +133,6 @@ static int __clockevents_set_mode(struct clock_event_device *dev,
 			return -ENOSYS;
 		return dev->set_mode_oneshot(dev);
 
-	case CLOCK_EVT_MODE_RESUME:
-		/* Optional callback */
-		if (dev->set_mode_resume)
-			return dev->set_mode_resume(dev);
-		else
-			return 0;
-
 	default:
 		return -ENOSYS;
 	}
@@ -184,6 +177,25 @@ void clockevents_shutdown(struct clock_event_device *dev)
 	dev->next_event.tv64 = KTIME_MAX;
 }
 
+/**
+ * clockevents_tick_resume -	Resume the tick device before using it again
+ * @dev:			device to resume
+ */
+int clockevents_tick_resume(struct clock_event_device *dev)
+{
+	int ret = 0;
+
+	if (dev->set_mode)
+		dev->set_mode(CLOCK_EVT_MODE_RESUME, dev);
+	else if (dev->tick_resume)
+		ret = dev->tick_resume(dev);
+
+	if (likely(!ret))
+		dev->mode = CLOCK_EVT_MODE_RESUME;
+
+	return ret;
+}
+
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST
 
 /* Limit min_delta to a jiffie */
@@ -433,7 +445,7 @@ static int clockevents_sanity_check(struct clock_event_device *dev)
 	if (dev->set_mode) {
 		/* We shouldn't be supporting new modes now */
 		WARN_ON(dev->set_mode_periodic || dev->set_mode_oneshot ||
-			dev->set_mode_shutdown || dev->set_mode_resume);
+			dev->set_mode_shutdown || dev->tick_resume);
 		return 0;
 	}
 
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 066f0ec05e48..542d5bb5c13d 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -464,7 +464,7 @@ int tick_resume_broadcast(void)
 	bc = tick_broadcast_device.evtdev;
 
 	if (bc) {
-		clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME);
+		clockevents_tick_resume(bc);
 
 		switch (tick_broadcast_device.mode) {
 		case TICKDEV_MODE_PERIODIC:
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index f7c515595b42..5c50664c21d7 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -384,7 +384,7 @@ void tick_resume(void)
 	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 	int broadcast = tick_resume_broadcast();
 
-	clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME);
+	clockevents_tick_resume(td->evtdev);
 
 	if (!broadcast) {
 		if (td->mode == TICKDEV_MODE_PERIODIC)
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 366aeb4f2c66..98700e4a2000 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -32,6 +32,7 @@ extern bool tick_check_replacement(struct clock_event_device *curdev,
 extern void tick_install_replacement(struct clock_event_device *dev);
 
 extern void clockevents_shutdown(struct clock_event_device *dev);
+extern int clockevents_tick_resume(struct clock_event_device *dev);
 
 extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
 
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 2cfd19485824..2b3e9393034d 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -251,9 +251,9 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
 			SEQ_printf(m, "\n");
 		}
 
-		if (dev->set_mode_resume) {
+		if (dev->tick_resume) {
 			SEQ_printf(m, " resume:   ");
-			print_name_offset(m, dev->set_mode_resume);
+			print_name_offset(m, dev->tick_resume);
 			SEQ_printf(m, "\n");
 		}
 	}
-- 
cgit v1.2.3


From 77e32c89a7117614ab3d66d20c1088de721abfaa Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 27 Feb 2015 17:21:33 +0530
Subject: clockevents: Manage device's state separately for the core

'enum clock_event_mode' is used for two purposes today:

 - to pass mode to the driver of clockevent device::set_mode().

 - for managing state of the device for clockevents core.

For supporting new modes/states we have moved away from the
legacy set_mode() callback to new per-mode/state callbacks. New
modes/states shouldn't be exposed to the legacy (now OBSOLOTE)
callbacks and so we shouldn't add new states to 'enum
clock_event_mode'.

Lets have separate enums for the two use cases mentioned above.
Keep using the earlier enum for legacy set_mode() callback and
mark it OBSOLETE. And add another enum to clearly specify the
possible states of a clockevent device.

This also renames the newly added per-mode callbacks to reflect
state changes.

We haven't got rid of 'mode' member of 'struct
clock_event_device' as it is used by some of the clockevent
drivers and it would automatically die down once we migrate
those drivers to the new interface. It ('mode') is only updated
now for the drivers using the legacy interface.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Kevin Hilman <khilman@linaro.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: linaro-kernel@lists.linaro.org
Cc: linaro-networking@linaro.org
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/b6b0143a8a57bd58352ad35e08c25424c879c0cb.1425037853.git.viresh.kumar@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/clockevents.c    | 99 ++++++++++++++++++++++++--------------------
 kernel/time/tick-broadcast.c | 20 ++++-----
 kernel/time/tick-common.c    |  7 ++--
 kernel/time/tick-oneshot.c   |  6 +--
 kernel/time/timer_list.c     | 12 +++---
 5 files changed, 76 insertions(+), 68 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 1b0ea63de69c..6e53e9a0c2e8 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -94,44 +94,49 @@ u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
 }
 EXPORT_SYMBOL_GPL(clockevent_delta2ns);
 
-static int __clockevents_set_mode(struct clock_event_device *dev,
-				  enum clock_event_mode mode)
+static int __clockevents_set_state(struct clock_event_device *dev,
+				   enum clock_event_state state)
 {
 	/* Transition with legacy set_mode() callback */
 	if (dev->set_mode) {
 		/* Legacy callback doesn't support new modes */
-		if (mode > CLOCK_EVT_MODE_ONESHOT)
+		if (state > CLOCK_EVT_STATE_ONESHOT)
 			return -ENOSYS;
-		dev->set_mode(mode, dev);
+		/*
+		 * 'clock_event_state' and 'clock_event_mode' have 1-to-1
+		 * mapping until *_ONESHOT, and so a simple cast will work.
+		 */
+		dev->set_mode((enum clock_event_mode)state, dev);
+		dev->mode = (enum clock_event_mode)state;
 		return 0;
 	}
 
 	if (dev->features & CLOCK_EVT_FEAT_DUMMY)
 		return 0;
 
-	/* Transition with new mode-specific callbacks */
-	switch (mode) {
-	case CLOCK_EVT_MODE_UNUSED:
+	/* Transition with new state-specific callbacks */
+	switch (state) {
+	case CLOCK_EVT_STATE_DETACHED:
 		/*
 		 * This is an internal state, which is guaranteed to go from
-		 * SHUTDOWN to UNUSED. No driver interaction required.
+		 * SHUTDOWN to DETACHED. No driver interaction required.
 		 */
 		return 0;
 
-	case CLOCK_EVT_MODE_SHUTDOWN:
-		return dev->set_mode_shutdown(dev);
+	case CLOCK_EVT_STATE_SHUTDOWN:
+		return dev->set_state_shutdown(dev);
 
-	case CLOCK_EVT_MODE_PERIODIC:
+	case CLOCK_EVT_STATE_PERIODIC:
 		/* Core internal bug */
 		if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC))
 			return -ENOSYS;
-		return dev->set_mode_periodic(dev);
+		return dev->set_state_periodic(dev);
 
-	case CLOCK_EVT_MODE_ONESHOT:
+	case CLOCK_EVT_STATE_ONESHOT:
 		/* Core internal bug */
 		if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
 			return -ENOSYS;
-		return dev->set_mode_oneshot(dev);
+		return dev->set_state_oneshot(dev);
 
 	default:
 		return -ENOSYS;
@@ -139,26 +144,26 @@ static int __clockevents_set_mode(struct clock_event_device *dev,
 }
 
 /**
- * clockevents_set_mode - set the operating mode of a clock event device
+ * clockevents_set_state - set the operating state of a clock event device
  * @dev:	device to modify
- * @mode:	new mode
+ * @state:	new state
  *
  * Must be called with interrupts disabled !
  */
-void clockevents_set_mode(struct clock_event_device *dev,
-				 enum clock_event_mode mode)
+void clockevents_set_state(struct clock_event_device *dev,
+			   enum clock_event_state state)
 {
-	if (dev->mode != mode) {
-		if (__clockevents_set_mode(dev, mode))
+	if (dev->state != state) {
+		if (__clockevents_set_state(dev, state))
 			return;
 
-		dev->mode = mode;
+		dev->state = state;
 
 		/*
 		 * A nsec2cyc multiplicator of 0 is invalid and we'd crash
 		 * on it, so fix it up and emit a warning:
 		 */
-		if (mode == CLOCK_EVT_MODE_ONESHOT) {
+		if (state == CLOCK_EVT_STATE_ONESHOT) {
 			if (unlikely(!dev->mult)) {
 				dev->mult = 1;
 				WARN_ON(1);
@@ -173,7 +178,7 @@ void clockevents_set_mode(struct clock_event_device *dev,
  */
 void clockevents_shutdown(struct clock_event_device *dev)
 {
-	clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
+	clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
 	dev->next_event.tv64 = KTIME_MAX;
 }
 
@@ -185,13 +190,12 @@ int clockevents_tick_resume(struct clock_event_device *dev)
 {
 	int ret = 0;
 
-	if (dev->set_mode)
+	if (dev->set_mode) {
 		dev->set_mode(CLOCK_EVT_MODE_RESUME, dev);
-	else if (dev->tick_resume)
-		ret = dev->tick_resume(dev);
-
-	if (likely(!ret))
 		dev->mode = CLOCK_EVT_MODE_RESUME;
+	} else if (dev->tick_resume) {
+		ret = dev->tick_resume(dev);
+	}
 
 	return ret;
 }
@@ -248,7 +252,7 @@ static int clockevents_program_min_delta(struct clock_event_device *dev)
 		delta = dev->min_delta_ns;
 		dev->next_event = ktime_add_ns(ktime_get(), delta);
 
-		if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
+		if (dev->state == CLOCK_EVT_STATE_SHUTDOWN)
 			return 0;
 
 		dev->retries++;
@@ -285,7 +289,7 @@ static int clockevents_program_min_delta(struct clock_event_device *dev)
 	delta = dev->min_delta_ns;
 	dev->next_event = ktime_add_ns(ktime_get(), delta);
 
-	if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
+	if (dev->state == CLOCK_EVT_STATE_SHUTDOWN)
 		return 0;
 
 	dev->retries++;
@@ -317,7 +321,7 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
 
 	dev->next_event = expires;
 
-	if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
+	if (dev->state == CLOCK_EVT_STATE_SHUTDOWN)
 		return 0;
 
 	/* Shortcut for clockevent devices that can deal with ktime. */
@@ -362,7 +366,7 @@ static int clockevents_replace(struct clock_event_device *ced)
 	struct clock_event_device *dev, *newdev = NULL;
 
 	list_for_each_entry(dev, &clockevent_devices, list) {
-		if (dev == ced || dev->mode != CLOCK_EVT_MODE_UNUSED)
+		if (dev == ced || dev->state != CLOCK_EVT_STATE_DETACHED)
 			continue;
 
 		if (!tick_check_replacement(newdev, dev))
@@ -388,7 +392,7 @@ static int clockevents_replace(struct clock_event_device *ced)
 static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu)
 {
 	/* Fast track. Device is unused */
-	if (ced->mode == CLOCK_EVT_MODE_UNUSED) {
+	if (ced->state == CLOCK_EVT_STATE_DETACHED) {
 		list_del_init(&ced->list);
 		return 0;
 	}
@@ -438,30 +442,30 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu)
 }
 EXPORT_SYMBOL_GPL(clockevents_unbind);
 
-/* Sanity check of mode transition callbacks */
+/* Sanity check of state transition callbacks */
 static int clockevents_sanity_check(struct clock_event_device *dev)
 {
 	/* Legacy set_mode() callback */
 	if (dev->set_mode) {
 		/* We shouldn't be supporting new modes now */
-		WARN_ON(dev->set_mode_periodic || dev->set_mode_oneshot ||
-			dev->set_mode_shutdown || dev->tick_resume);
+		WARN_ON(dev->set_state_periodic || dev->set_state_oneshot ||
+			dev->set_state_shutdown || dev->tick_resume);
 		return 0;
 	}
 
 	if (dev->features & CLOCK_EVT_FEAT_DUMMY)
 		return 0;
 
-	/* New mode-specific callbacks */
-	if (!dev->set_mode_shutdown)
+	/* New state-specific callbacks */
+	if (!dev->set_state_shutdown)
 		return -EINVAL;
 
 	if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
-	    !dev->set_mode_periodic)
+	    !dev->set_state_periodic)
 		return -EINVAL;
 
 	if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) &&
-	    !dev->set_mode_oneshot)
+	    !dev->set_state_oneshot)
 		return -EINVAL;
 
 	return 0;
@@ -478,6 +482,9 @@ void clockevents_register_device(struct clock_event_device *dev)
 	BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
 	BUG_ON(clockevents_sanity_check(dev));
 
+	/* Initialize state to DETACHED */
+	dev->state = CLOCK_EVT_STATE_DETACHED;
+
 	if (!dev->cpumask) {
 		WARN_ON(num_possible_cpus() > 1);
 		dev->cpumask = cpumask_of(smp_processor_id());
@@ -541,11 +548,11 @@ int __clockevents_update_freq(struct clock_event_device *dev, u32 freq)
 {
 	clockevents_config(dev, freq);
 
-	if (dev->mode == CLOCK_EVT_MODE_ONESHOT)
+	if (dev->state == CLOCK_EVT_STATE_ONESHOT)
 		return clockevents_program_event(dev, dev->next_event, false);
 
-	if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
-		return __clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
+	if (dev->state == CLOCK_EVT_STATE_PERIODIC)
+		return __clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC);
 
 	return 0;
 }
@@ -601,13 +608,13 @@ void clockevents_exchange_device(struct clock_event_device *old,
 	 */
 	if (old) {
 		module_put(old->owner);
-		clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED);
+		clockevents_set_state(old, CLOCK_EVT_STATE_DETACHED);
 		list_del(&old->list);
 		list_add(&old->list, &clockevents_released);
 	}
 
 	if (new) {
-		BUG_ON(new->mode != CLOCK_EVT_MODE_UNUSED);
+		BUG_ON(new->state != CLOCK_EVT_STATE_DETACHED);
 		clockevents_shutdown(new);
 	}
 	local_irq_restore(flags);
@@ -693,7 +700,7 @@ int clockevents_notify(unsigned long reason, void *arg)
 			if (cpumask_test_cpu(cpu, dev->cpumask) &&
 			    cpumask_weight(dev->cpumask) == 1 &&
 			    !tick_is_broadcast_device(dev)) {
-				BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+				BUG_ON(dev->state != CLOCK_EVT_STATE_DETACHED);
 				list_del(&dev->list);
 			}
 		}
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 542d5bb5c13d..f0f8ee9dbc28 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -303,7 +303,7 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
 	/*
 	 * The device is in periodic mode. No reprogramming necessary:
 	 */
-	if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
+	if (dev->state == CLOCK_EVT_STATE_PERIODIC)
 		goto unlock;
 
 	/*
@@ -532,8 +532,8 @@ static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
 {
 	int ret;
 
-	if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
-		clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
+	if (bc->state != CLOCK_EVT_STATE_ONESHOT)
+		clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
 
 	ret = clockevents_program_event(bc, expires, force);
 	if (!ret)
@@ -543,7 +543,7 @@ static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
 
 int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
 {
-	clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
+	clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
 	return 0;
 }
 
@@ -562,8 +562,8 @@ void tick_check_oneshot_broadcast_this_cpu(void)
 		 * switched over, leave the device alone.
 		 */
 		if (td->mode == TICKDEV_MODE_ONESHOT) {
-			clockevents_set_mode(td->evtdev,
-					     CLOCK_EVT_MODE_ONESHOT);
+			clockevents_set_state(td->evtdev,
+					      CLOCK_EVT_STATE_ONESHOT);
 		}
 	}
 }
@@ -666,7 +666,7 @@ static void broadcast_shutdown_local(struct clock_event_device *bc,
 		if (dev->next_event.tv64 < bc->next_event.tv64)
 			return;
 	}
-	clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
+	clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
 }
 
 static void broadcast_move_bc(int deadcpu)
@@ -741,7 +741,7 @@ int tick_broadcast_oneshot_control(unsigned long reason)
 			cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
 	} else {
 		if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
-			clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
+			clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
 			/*
 			 * The cpu which was handling the broadcast
 			 * timer marked this cpu in the broadcast
@@ -842,7 +842,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 
 	/* Set it up only once ! */
 	if (bc->event_handler != tick_handle_oneshot_broadcast) {
-		int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
+		int was_periodic = bc->state == CLOCK_EVT_STATE_PERIODIC;
 
 		bc->event_handler = tick_handle_oneshot_broadcast;
 
@@ -858,7 +858,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 			   tick_broadcast_oneshot_mask, tmpmask);
 
 		if (was_periodic && !cpumask_empty(tmpmask)) {
-			clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
+			clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
 			tick_broadcast_init_next_event(tmpmask,
 						       tick_next_period);
 			tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 5c50664c21d7..a5b877130ae9 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -102,7 +102,7 @@ void tick_handle_periodic(struct clock_event_device *dev)
 
 	tick_periodic(cpu);
 
-	if (dev->mode != CLOCK_EVT_MODE_ONESHOT)
+	if (dev->state != CLOCK_EVT_STATE_ONESHOT)
 		return;
 	for (;;) {
 		/*
@@ -140,7 +140,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
 
 	if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
 	    !tick_broadcast_oneshot_active()) {
-		clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
+		clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC);
 	} else {
 		unsigned long seq;
 		ktime_t next;
@@ -150,7 +150,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
 			next = tick_next_period;
 		} while (read_seqretry(&jiffies_lock, seq));
 
-		clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
+		clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
 
 		for (;;) {
 			if (!clockevents_program_event(dev, next, false))
@@ -365,6 +365,7 @@ void tick_shutdown(unsigned int *cpup)
 		 * Prevent that the clock events layer tries to call
 		 * the set mode function!
 		 */
+		dev->state = CLOCK_EVT_STATE_DETACHED;
 		dev->mode = CLOCK_EVT_MODE_UNUSED;
 		clockevents_exchange_device(dev, NULL);
 		dev->event_handler = clockevents_handle_noop;
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 7ce740e78e1b..67a64b1670bf 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -38,7 +38,7 @@ void tick_resume_oneshot(void)
 {
 	struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
 
-	clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
+	clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
 	clockevents_program_event(dev, ktime_get(), true);
 }
 
@@ -50,7 +50,7 @@ void tick_setup_oneshot(struct clock_event_device *newdev,
 			ktime_t next_event)
 {
 	newdev->event_handler = handler;
-	clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT);
+	clockevents_set_state(newdev, CLOCK_EVT_STATE_ONESHOT);
 	clockevents_program_event(newdev, next_event, true);
 }
 
@@ -81,7 +81,7 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *))
 
 	td->mode = TICKDEV_MODE_ONESHOT;
 	dev->event_handler = handler;
-	clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
+	clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
 	tick_broadcast_switch_to_oneshot();
 	return 0;
 }
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 2b3e9393034d..05aa5590106a 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -233,21 +233,21 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
 		print_name_offset(m, dev->set_mode);
 		SEQ_printf(m, "\n");
 	} else {
-		if (dev->set_mode_shutdown) {
+		if (dev->set_state_shutdown) {
 			SEQ_printf(m, " shutdown: ");
-			print_name_offset(m, dev->set_mode_shutdown);
+			print_name_offset(m, dev->set_state_shutdown);
 			SEQ_printf(m, "\n");
 		}
 
-		if (dev->set_mode_periodic) {
+		if (dev->set_state_periodic) {
 			SEQ_printf(m, " periodic: ");
-			print_name_offset(m, dev->set_mode_periodic);
+			print_name_offset(m, dev->set_state_periodic);
 			SEQ_printf(m, "\n");
 		}
 
-		if (dev->set_mode_oneshot) {
+		if (dev->set_state_oneshot) {
 			SEQ_printf(m, " oneshot:  ");
-			print_name_offset(m, dev->set_mode_oneshot);
+			print_name_offset(m, dev->set_state_oneshot);
 			SEQ_printf(m, "\n");
 		}
 
-- 
cgit v1.2.3


From de81e64b250d3865a75d221a80b4311e3273670a Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 27 Feb 2015 17:21:34 +0530
Subject: clockevents: Don't validate dev->mode against CLOCK_EVT_MODE_UNUSED
 for new interface

It was a requirement in the legacy interface that drivers must
initialize ->mode field to 'CLOCK_EVT_MODE_UNUSED'. This field
isn't used anymore by the new interface and so should be only
checked for the legacy interface.

Probably it can be dropped as well as core doesn't rely on it
anymore, but lets keep it to support legacy interface.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Kevin Hilman <khilman@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: linaro-kernel@lists.linaro.org
Cc: linaro-networking@linaro.org
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/c6604fa1a77fe1fc8dcab87769857228fb1dadd5.1425037853.git.viresh.kumar@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/clockevents.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 6e53e9a0c2e8..73689df1e4b8 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -450,6 +450,8 @@ static int clockevents_sanity_check(struct clock_event_device *dev)
 		/* We shouldn't be supporting new modes now */
 		WARN_ON(dev->set_state_periodic || dev->set_state_oneshot ||
 			dev->set_state_shutdown || dev->tick_resume);
+
+		BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
 		return 0;
 	}
 
@@ -479,7 +481,6 @@ void clockevents_register_device(struct clock_event_device *dev)
 {
 	unsigned long flags;
 
-	BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
 	BUG_ON(clockevents_sanity_check(dev));
 
 	/* Initialize state to DETACHED */
-- 
cgit v1.2.3


From 9f083b74df3a7eaa100b456f2dc195512daf728e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 25 Mar 2015 13:05:19 +0100
Subject: clockevents: Remove CONFIG_GENERIC_CLOCKEVENTS_BUILD

This option was for simpler migration to the clock events code.
Most architectures have been converted and the option has been
disfunctional as a standalone option for quite some time. Remove
it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/5021859.jl9OC1medj@vostro.rjw.lan
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/Kconfig         | 6 ------
 kernel/time/Makefile        | 6 ++----
 kernel/time/clockevents.c   | 3 ---
 kernel/time/tick-internal.h | 4 ++--
 4 files changed, 4 insertions(+), 15 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index d626dc98e8df..579ce1b929af 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -33,12 +33,6 @@ config ARCH_USES_GETTIMEOFFSET
 config GENERIC_CLOCKEVENTS
 	bool
 
-# Migration helper. Builds, but does not invoke
-config GENERIC_CLOCKEVENTS_BUILD
-	bool
-	default y
-	depends on GENERIC_CLOCKEVENTS
-
 # Architecture can handle broadcast in a driver-agnostic way
 config ARCH_HAS_TICK_BROADCAST
 	bool
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index c09c07817d7a..01f0312419b3 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -2,15 +2,13 @@ obj-y += time.o timer.o hrtimer.o itimer.o posix-timers.o posix-cpu-timers.o
 obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o
 obj-y += timeconv.o timecounter.o posix-clock.o alarmtimer.o
 
-obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD)		+= clockevents.o
-obj-$(CONFIG_GENERIC_CLOCKEVENTS)		+= tick-common.o
+obj-$(CONFIG_GENERIC_CLOCKEVENTS)		+= clockevents.o tick-common.o
 ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y)
  obj-y						+= tick-broadcast.o
  obj-$(CONFIG_TICK_ONESHOT)			+= tick-broadcast-hrtimer.o
 endif
 obj-$(CONFIG_GENERIC_SCHED_CLOCK)		+= sched_clock.o
-obj-$(CONFIG_TICK_ONESHOT)			+= tick-oneshot.o
-obj-$(CONFIG_TICK_ONESHOT)			+= tick-sched.o
+obj-$(CONFIG_TICK_ONESHOT)			+= tick-oneshot.o tick-sched.o
 obj-$(CONFIG_TIMER_STATS)			+= timer_stats.o
 obj-$(CONFIG_DEBUG_FS)				+= timekeeping_debug.o
 obj-$(CONFIG_TEST_UDELAY)			+= test_udelay.o
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 73689df1e4b8..3531beecbe95 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -645,7 +645,6 @@ void clockevents_resume(void)
 			dev->resume(dev);
 }
 
-#ifdef CONFIG_GENERIC_CLOCKEVENTS
 /**
  * clockevents_notify - notification about relevant events
  * Returns 0 on success, any other value on error
@@ -831,5 +830,3 @@ static int __init clockevents_init_sysfs(void)
 }
 device_initcall(clockevents_init_sysfs);
 #endif /* SYSFS */
-
-#endif /* GENERIC_CLOCK_EVENTS */
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 98700e4a2000..c7b75bec27f2 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -10,7 +10,7 @@ extern seqlock_t jiffies_lock;
 
 #define CS_NAME_LEN	32
 
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
 
 #define TICK_DO_TIMER_NONE	-1
 #define TICK_DO_TIMER_BOOT	-2
@@ -167,7 +167,7 @@ static inline int tick_device_is_functional(struct clock_event_device *dev)
 
 int __clockevents_update_freq(struct clock_event_device *dev, u32 freq);
 
-#endif
+#endif /* GENERIC_CLOCKEVENTS */
 
 extern void do_timer(unsigned long ticks);
 extern void update_wall_time(void);
-- 
cgit v1.2.3


From bfb83b27519aa7ed9510f601a8f825a2c1484bc2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 25 Mar 2015 13:06:04 +0100
Subject: tick: Move clocksource related stuff to timekeeping.h

Move clocksource related stuff to timekeeping.h and remove the
pointless include from ntp.c

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[ rjw: Subject ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/2714218.nM5AEfAHj0@vostro.rjw.lan
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/clocksource.c   | 2 +-
 kernel/time/jiffies.c       | 2 +-
 kernel/time/ntp.c           | 1 -
 kernel/time/tick-internal.h | 6 ------
 kernel/time/timekeeping.h   | 7 +++++++
 5 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index c3be3c71bbad..8b4010f0b1b4 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -31,7 +31,7 @@
 #include <linux/tick.h>
 #include <linux/kthread.h>
 
-#include "tick-internal.h"
+#include "timekeeping.h"
 #include "timekeeping_internal.h"
 
 /**
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index c4bb518725b5..347fecf86a3f 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -25,7 +25,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 
-#include "tick-internal.h"
+#include "timekeeping.h"
 
 /* The Jiffies based clocksource is the lowest common
  * denominator clock source which should function on
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 0f60b08a4f07..9ad60d028508 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -17,7 +17,6 @@
 #include <linux/module.h>
 #include <linux/rtc.h>
 
-#include "tick-internal.h"
 #include "ntp_internal.h"
 
 /*
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index c7b75bec27f2..cba52140a298 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -6,10 +6,6 @@
 
 #include "timekeeping.h"
 
-extern seqlock_t jiffies_lock;
-
-#define CS_NAME_LEN	32
-
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 
 #define TICK_DO_TIMER_NONE	-1
@@ -169,5 +165,3 @@ int __clockevents_update_freq(struct clock_event_device *dev, u32 freq);
 
 #endif /* GENERIC_CLOCKEVENTS */
 
-extern void do_timer(unsigned long ticks);
-extern void update_wall_time(void);
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
index 1d91416055d5..ead8794b9a4e 100644
--- a/kernel/time/timekeeping.h
+++ b/kernel/time/timekeeping.h
@@ -19,4 +19,11 @@ extern void timekeeping_clocktai(struct timespec *ts);
 extern int timekeeping_suspend(void);
 extern void timekeeping_resume(void);
 
+extern void do_timer(unsigned long ticks);
+extern void update_wall_time(void);
+
+extern seqlock_t jiffies_lock;
+
+#define CS_NAME_LEN	32
+
 #endif
-- 
cgit v1.2.3


From b7475eb599ddb2e8cab2dc86ff38a9507463ad6b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 25 Mar 2015 13:06:47 +0100
Subject: tick: Simplify tick-internal.h

tick-internal.h is pretty confusing as a lot of the stub inlines
are there several times.

Distangle the maze and make clear functional sections.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[ rjw: Subject ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/16068264.vcNp79HLaT@vostro.rjw.lan
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/tick-internal.h | 145 +++++++++++++++-----------------------------
 1 file changed, 49 insertions(+), 96 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index cba52140a298..d86eb8d485e9 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -27,14 +27,19 @@ extern bool tick_check_replacement(struct clock_event_device *curdev,
 				   struct clock_event_device *newdev);
 extern void tick_install_replacement(struct clock_event_device *dev);
 
-extern void clockevents_shutdown(struct clock_event_device *dev);
 extern int clockevents_tick_resume(struct clock_event_device *dev);
+/* Check, if the device is functional or a dummy for broadcast */
+static inline int tick_device_is_functional(struct clock_event_device *dev)
+{
+	return !(dev->features & CLOCK_EVT_FEAT_DUMMY);
+}
 
+extern void clockevents_shutdown(struct clock_event_device *dev);
+extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq);
 extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
+#endif /* GENERIC_CLOCKEVENTS */
 
-/*
- * NO_HZ / high resolution timer shared code
- */
+/* Oneshot related functions */
 #ifdef CONFIG_TICK_ONESHOT
 extern void tick_setup_oneshot(struct clock_event_device *newdev,
 			       void (*handler)(struct clock_event_device *),
@@ -43,69 +48,19 @@ extern int tick_program_event(ktime_t expires, int force);
 extern void tick_oneshot_notify(void);
 extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));
 extern void tick_resume_oneshot(void);
-# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
-extern int tick_broadcast_oneshot_control(unsigned long reason);
-extern void tick_broadcast_switch_to_oneshot(void);
-extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
-extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
-extern int tick_broadcast_oneshot_active(void);
-extern void tick_check_oneshot_broadcast_this_cpu(void);
-bool tick_broadcast_oneshot_available(void);
-# else /* BROADCAST */
-static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
-{
-	BUG();
-}
-static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; }
-static inline void tick_broadcast_switch_to_oneshot(void) { }
-static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
-static inline int tick_broadcast_oneshot_active(void) { return 0; }
-static inline void tick_check_oneshot_broadcast_this_cpu(void) { }
-static inline bool tick_broadcast_oneshot_available(void) { return true; }
-# endif /* !BROADCAST */
-
+static inline bool tick_oneshot_possible(void) { return true; }
 #else /* !ONESHOT */
 static inline
 void tick_setup_oneshot(struct clock_event_device *newdev,
 			void (*handler)(struct clock_event_device *),
-			ktime_t nextevt)
-{
-	BUG();
-}
-static inline void tick_resume_oneshot(void)
-{
-	BUG();
-}
-static inline int tick_program_event(ktime_t expires, int force)
-{
-	return 0;
-}
+			ktime_t nextevt) { BUG(); }
+static inline void tick_resume_oneshot(void) { BUG(); }
+static inline int tick_program_event(ktime_t expires, int force) { return 0; }
 static inline void tick_oneshot_notify(void) { }
-static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
-{
-	BUG();
-}
-static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; }
-static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
-static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
-{
-	return 0;
-}
-static inline int tick_broadcast_oneshot_active(void) { return 0; }
-static inline bool tick_broadcast_oneshot_available(void) { return false; }
+static inline bool tick_oneshot_possible(void) { return false; }
 #endif /* !TICK_ONESHOT */
 
-/* NO_HZ_FULL internal */
-#ifdef CONFIG_NO_HZ_FULL
-extern void tick_nohz_init(void);
-# else
-static inline void tick_nohz_init(void) { }
-#endif
-
-/*
- * Broadcasting support
- */
+/* Broadcasting support */
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
 extern void tick_install_broadcast_device(struct clock_event_device *dev);
@@ -115,53 +70,51 @@ extern void tick_shutdown_broadcast(unsigned int *cpup);
 extern void tick_suspend_broadcast(void);
 extern int tick_resume_broadcast(void);
 extern void tick_broadcast_init(void);
-extern void
-tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
-int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq);
-
+extern void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
+extern int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq);
 #else /* !BROADCAST */
-
-static inline void tick_install_broadcast_device(struct clock_event_device *dev)
-{
-}
-
-static inline int tick_is_broadcast_device(struct clock_event_device *dev)
-{
-	return 0;
-}
-static inline int tick_device_uses_broadcast(struct clock_event_device *dev,
-					     int cpu)
-{
-	return 0;
-}
+static inline void tick_install_broadcast_device(struct clock_event_device *dev) { }
+static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; }
+static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; }
 static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
 static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { }
 static inline void tick_shutdown_broadcast(unsigned int *cpup) { }
 static inline void tick_suspend_broadcast(void) { }
 static inline int tick_resume_broadcast(void) { return 0; }
 static inline void tick_broadcast_init(void) { }
-static inline int tick_broadcast_update_freq(struct clock_event_device *dev,
-					     u32 freq) { return -ENODEV; }
+static inline int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) { return -ENODEV; }
 
-/*
- * Set the periodic handler in non broadcast mode
- */
-static inline void tick_set_periodic_handler(struct clock_event_device *dev,
-					     int broadcast)
+/* Set the periodic handler in non broadcast mode */
+static inline void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
 {
 	dev->event_handler = tick_handle_periodic;
 }
 #endif /* !BROADCAST */
 
-/*
- * Check, if the device is functional or a dummy for broadcast
- */
-static inline int tick_device_is_functional(struct clock_event_device *dev)
-{
-	return !(dev->features & CLOCK_EVT_FEAT_DUMMY);
-}
-
-int __clockevents_update_freq(struct clock_event_device *dev, u32 freq);
-
-#endif /* GENERIC_CLOCKEVENTS */
+/* Functions related to oneshot broadcasting */
+#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
+extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
+extern int tick_broadcast_oneshot_control(unsigned long reason);
+extern void tick_broadcast_switch_to_oneshot(void);
+extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
+extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
+extern int tick_broadcast_oneshot_active(void);
+extern void tick_check_oneshot_broadcast_this_cpu(void);
+bool tick_broadcast_oneshot_available(void);
+#else /* BROADCAST && ONESHOT */
+static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
+static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; }
+static inline void tick_broadcast_switch_to_oneshot(void) { }
+static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
+static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc) { return 0; }
+static inline int tick_broadcast_oneshot_active(void) { return 0; }
+static inline void tick_check_oneshot_broadcast_this_cpu(void) { }
+static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); }
+#endif /* !BROADCAST && ONESHOT */
 
+/* NO_HZ_FULL internal */
+#ifdef CONFIG_NO_HZ_FULL
+extern void tick_nohz_init(void);
+# else
+static inline void tick_nohz_init(void) { }
+#endif
-- 
cgit v1.2.3


From c1797baf6880174f899ce3960d0598f5bbeeb7ff Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 25 Mar 2015 13:07:37 +0100
Subject: tick: Move core only declarations and functions to core

No point to expose everything to the world. People just believe
such functions can be abused for whatever purposes. Sigh.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[ Rebased on top of 4.0-rc5 ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/28017337.VbCUc39Gme@vostro.rjw.lan
[ Merged to latest timers/core ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/clocksource.c   |  2 +-
 kernel/time/hrtimer.c       |  2 +-
 kernel/time/tick-internal.h | 15 +++++++++++
 kernel/time/tick-sched.c    |  7 ++++-
 kernel/time/tick-sched.h    | 64 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/time/timer_list.c    |  2 +-
 6 files changed, 88 insertions(+), 4 deletions(-)
 create mode 100644 kernel/time/tick-sched.h

(limited to 'kernel/time')

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 8b4010f0b1b4..c3be3c71bbad 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -31,7 +31,7 @@
 #include <linux/tick.h>
 #include <linux/kthread.h>
 
-#include "timekeeping.h"
+#include "tick-internal.h"
 #include "timekeeping_internal.h"
 
 /**
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index bee0c1f78091..721d29b99d10 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -54,7 +54,7 @@
 
 #include <trace/events/timer.h>
 
-#include "timekeeping.h"
+#include "tick-internal.h"
 
 /*
  * The timer bases:
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index d86eb8d485e9..dd2c45d057b9 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -5,6 +5,7 @@
 #include <linux/tick.h>
 
 #include "timekeeping.h"
+#include "tick-sched.h"
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 
@@ -26,6 +27,7 @@ extern void tick_resume(void);
 extern bool tick_check_replacement(struct clock_event_device *curdev,
 				   struct clock_event_device *newdev);
 extern void tick_install_replacement(struct clock_event_device *dev);
+extern int tick_is_oneshot_available(void);
 
 extern int clockevents_tick_resume(struct clock_event_device *dev);
 /* Check, if the device is functional or a dummy for broadcast */
@@ -35,6 +37,9 @@ static inline int tick_device_is_functional(struct clock_event_device *dev)
 }
 
 extern void clockevents_shutdown(struct clock_event_device *dev);
+extern void clockevents_exchange_device(struct clock_event_device *old,
+					struct clock_event_device *new);
+extern void clockevents_handle_noop(struct clock_event_device *dev);
 extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq);
 extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
 #endif /* GENERIC_CLOCKEVENTS */
@@ -49,6 +54,10 @@ extern void tick_oneshot_notify(void);
 extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));
 extern void tick_resume_oneshot(void);
 static inline bool tick_oneshot_possible(void) { return true; }
+extern int tick_oneshot_mode_active(void);
+extern void tick_clock_notify(void);
+extern int tick_check_oneshot_change(int allow_nohz);
+extern int tick_init_highres(void);
 #else /* !ONESHOT */
 static inline
 void tick_setup_oneshot(struct clock_event_device *newdev,
@@ -58,6 +67,9 @@ static inline void tick_resume_oneshot(void) { BUG(); }
 static inline int tick_program_event(ktime_t expires, int force) { return 0; }
 static inline void tick_oneshot_notify(void) { }
 static inline bool tick_oneshot_possible(void) { return false; }
+static inline int tick_oneshot_mode_active(void) { return 0; }
+static inline void tick_clock_notify(void) { }
+static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
 #endif /* !TICK_ONESHOT */
 
 /* Broadcasting support */
@@ -72,6 +84,8 @@ extern int tick_resume_broadcast(void);
 extern void tick_broadcast_init(void);
 extern void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
 extern int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq);
+extern struct tick_device *tick_get_broadcast_device(void);
+extern struct cpumask *tick_get_broadcast_mask(void);
 #else /* !BROADCAST */
 static inline void tick_install_broadcast_device(struct clock_event_device *dev) { }
 static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; }
@@ -101,6 +115,7 @@ extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
 extern int tick_broadcast_oneshot_active(void);
 extern void tick_check_oneshot_broadcast_this_cpu(void);
 bool tick_broadcast_oneshot_available(void);
+extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
 #else /* BROADCAST && ONESHOT */
 static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
 static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index a4c4edac4528..914259128145 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -34,7 +34,7 @@
 /*
  * Per cpu nohz control structure
  */
-DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
+static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
 
 /*
  * The time, when the last jiffy update happened. Protected by jiffies_lock.
@@ -416,6 +416,11 @@ static int __init setup_tick_nohz(char *str)
 
 __setup("nohz=", setup_tick_nohz);
 
+int tick_nohz_tick_stopped(void)
+{
+	return __this_cpu_read(tick_cpu_sched.tick_stopped);
+}
+
 /**
  * tick_nohz_update_jiffies - update jiffies when idle was interrupted
  *
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
new file mode 100644
index 000000000000..930743249127
--- /dev/null
+++ b/kernel/time/tick-sched.h
@@ -0,0 +1,64 @@
+#ifndef _TICK_SCHED_H
+#define _TICK_SCHED_H
+
+#include <linux/hrtimer.h>
+
+enum tick_nohz_mode {
+	NOHZ_MODE_INACTIVE,
+	NOHZ_MODE_LOWRES,
+	NOHZ_MODE_HIGHRES,
+};
+
+/**
+ * struct tick_sched - sched tick emulation and no idle tick control/stats
+ * @sched_timer:	hrtimer to schedule the periodic tick in high
+ *			resolution mode
+ * @last_tick:		Store the last tick expiry time when the tick
+ *			timer is modified for nohz sleeps. This is necessary
+ *			to resume the tick timer operation in the timeline
+ *			when the CPU returns from nohz sleep.
+ * @tick_stopped:	Indicator that the idle tick has been stopped
+ * @idle_jiffies:	jiffies at the entry to idle for idle time accounting
+ * @idle_calls:		Total number of idle calls
+ * @idle_sleeps:	Number of idle calls, where the sched tick was stopped
+ * @idle_entrytime:	Time when the idle call was entered
+ * @idle_waketime:	Time when the idle was interrupted
+ * @idle_exittime:	Time when the idle state was left
+ * @idle_sleeptime:	Sum of the time slept in idle with sched tick stopped
+ * @iowait_sleeptime:	Sum of the time slept in idle with sched tick stopped, with IO outstanding
+ * @sleep_length:	Duration of the current idle sleep
+ * @do_timer_lst:	CPU was the last one doing do_timer before going idle
+ */
+struct tick_sched {
+	struct hrtimer			sched_timer;
+	unsigned long			check_clocks;
+	enum tick_nohz_mode		nohz_mode;
+	ktime_t				last_tick;
+	int				inidle;
+	int				tick_stopped;
+	unsigned long			idle_jiffies;
+	unsigned long			idle_calls;
+	unsigned long			idle_sleeps;
+	int				idle_active;
+	ktime_t				idle_entrytime;
+	ktime_t				idle_waketime;
+	ktime_t				idle_exittime;
+	ktime_t				idle_sleeptime;
+	ktime_t				iowait_sleeptime;
+	ktime_t				sleep_length;
+	unsigned long			last_jiffies;
+	unsigned long			next_jiffies;
+	ktime_t				idle_expires;
+	int				do_timer_last;
+};
+
+extern struct tick_sched *tick_get_tick_sched(int cpu);
+
+extern void tick_setup_sched_timer(void);
+#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
+extern void tick_cancel_sched_timer(int cpu);
+#else
+static inline void tick_cancel_sched_timer(int cpu) { }
+#endif
+
+#endif
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 05aa5590106a..e878c2e0ba45 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -16,10 +16,10 @@
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/kallsyms.h>
-#include <linux/tick.h>
 
 #include <asm/uaccess.h>
 
+#include "tick-internal.h"
 
 struct timer_list_iter {
 	int cpu;
-- 
cgit v1.2.3


From db6f672ef11d7a3c5aa128a3c3e57c92580a25f7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 25 Mar 2015 13:08:27 +0100
Subject: clockevents: Remove extra local_irq_save() in
 clockevents_exchange_device()

Called with 'clockevents_lock' held and interrupts disabled
already.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/51005827.yXt5tjZMBs@vostro.rjw.lan
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/clockevents.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 3531beecbe95..b73002718536 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -595,14 +595,12 @@ void clockevents_handle_noop(struct clock_event_device *dev)
  * @old:	device to release (can be NULL)
  * @new:	device to request (can be NULL)
  *
- * Called from the notifier chain. clockevents_lock is held already
+ * Called from various tick functions with clockevents_lock held and
+ * interrupts disabled.
  */
 void clockevents_exchange_device(struct clock_event_device *old,
 				 struct clock_event_device *new)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
 	/*
 	 * Caller releases a clock event device. We queue it into the
 	 * released list and do a notify add later.
@@ -618,7 +616,6 @@ void clockevents_exchange_device(struct clock_event_device *old,
 		BUG_ON(new->state != CLOCK_EVT_STATE_DETACHED);
 		clockevents_shutdown(new);
 	}
-	local_irq_restore(flags);
 }
 
 /**
-- 
cgit v1.2.3


From 4ffee521f36390c7720d493591b764ca35c8030b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 25 Mar 2015 13:09:16 +0100
Subject: clockevents: Make suspend/resume calls explicit

clockevents_notify() is a leftover from the early design of the
clockevents facility. It's really not a notification mechanism,
it's a multiplex call.

We are way better off to have explicit calls instead of this
monstrosity. Split out the suspend/resume() calls and invoke
them directly from the call sites.

No locking required at this point because these calls happen
with interrupts disabled and a single cpu online.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[ Rebased on top of 4.0-rc5. ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/713674030.jVm1qaHuPf@vostro.rjw.lan
[ Rebased on top of latest timers/core. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/clockevents.c   |  9 ---------
 kernel/time/tick-common.c   | 26 +++++++++++++++++++++++---
 kernel/time/tick-internal.h |  3 ++-
 kernel/time/timekeeping.c   |  6 ++----
 4 files changed, 27 insertions(+), 17 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index b73002718536..7af614829da1 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -670,15 +670,6 @@ int clockevents_notify(unsigned long reason, void *arg)
 		tick_handover_do_timer(arg);
 		break;
 
-	case CLOCK_EVT_NOTIFY_SUSPEND:
-		tick_suspend();
-		tick_suspend_broadcast();
-		break;
-
-	case CLOCK_EVT_NOTIFY_RESUME:
-		tick_resume();
-		break;
-
 	case CLOCK_EVT_NOTIFY_CPU_DEAD:
 		tick_shutdown_broadcast_oneshot(arg);
 		tick_shutdown_broadcast(arg);
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index a5b877130ae9..1a60c2ae96a8 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -373,18 +373,39 @@ void tick_shutdown(unsigned int *cpup)
 	}
 }
 
+/**
+ * tick_suspend - Suspend the tick and the broadcast device
+ *
+ * Called from syscore_suspend() via timekeeping_suspend with only one
+ * CPU online and interrupts disabled or from tick_unfreeze() under
+ * tick_freeze_lock.
+ *
+ * No locks required. Nothing can change the per cpu device.
+ */
 void tick_suspend(void)
 {
 	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 
 	clockevents_shutdown(td->evtdev);
+	tick_suspend_broadcast();
 }
 
+/**
+ * tick_resume - Resume the tick and the broadcast device
+ *
+ * Called from syscore_resume() via timekeeping_resume with only one
+ * CPU online and interrupts disabled or from tick_unfreeze() under
+ * tick_freeze_lock.
+ *
+ * No locks required. Nothing can change the per cpu device.
+ */
 void tick_resume(void)
 {
-	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
-	int broadcast = tick_resume_broadcast();
+	struct tick_device *td;
+	int broadcast;
 
+	broadcast = tick_resume_broadcast();
+	td = this_cpu_ptr(&tick_cpu_device);
 	clockevents_tick_resume(td->evtdev);
 
 	if (!broadcast) {
@@ -416,7 +437,6 @@ void tick_freeze(void)
 		timekeeping_suspend();
 	} else {
 		tick_suspend();
-		tick_suspend_broadcast();
 	}
 
 	raw_spin_unlock(&tick_freeze_lock);
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index dd2c45d057b9..85a957195bf6 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -23,7 +23,6 @@ extern void tick_check_new_device(struct clock_event_device *dev);
 extern void tick_handover_do_timer(int *cpup);
 extern void tick_shutdown(unsigned int *cpup);
 extern void tick_suspend(void);
-extern void tick_resume(void);
 extern bool tick_check_replacement(struct clock_event_device *curdev,
 				   struct clock_event_device *newdev);
 extern void tick_install_replacement(struct clock_event_device *dev);
@@ -42,6 +41,8 @@ extern void clockevents_exchange_device(struct clock_event_device *old,
 extern void clockevents_handle_noop(struct clock_event_device *dev);
 extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq);
 extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
+#else
+static inline void tick_suspend(void) { }
 #endif /* GENERIC_CLOCKEVENTS */
 
 /* Oneshot related functions */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index c3fcff06d30a..5b12292b343a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1389,9 +1389,7 @@ void timekeeping_resume(void)
 
 	touch_softlockup_watchdog();
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL);
-
-	/* Resume hrtimers */
+	tick_resume();
 	hrtimers_resume();
 }
 
@@ -1444,7 +1442,7 @@ int timekeeping_suspend(void)
 	write_seqcount_end(&tk_core.seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
-	clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
+	tick_suspend();
 	clocksource_suspend();
 	clockevents_suspend();
 
-- 
cgit v1.2.3


From 080873ce2d1abd8c0a2b8c87bfa0762546a6b713 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 25 Mar 2015 13:09:55 +0100
Subject: tick: Make tick_resume_broadcast_oneshot() static

Solely used in tick-broadcast.c and the return value is
hardcoded 0. Make it static and void.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1689058.QkHYDJSRKu@vostro.rjw.lan
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/tick-broadcast.c | 7 ++++---
 kernel/time/tick-internal.h  | 2 --
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index f0f8ee9dbc28..60e6c23ce1c7 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -37,8 +37,10 @@ static int tick_broadcast_force;
 
 #ifdef CONFIG_TICK_ONESHOT
 static void tick_broadcast_clear_oneshot(int cpu);
+static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
 #else
 static inline void tick_broadcast_clear_oneshot(int cpu) { }
+static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
 #endif
 
 /*
@@ -475,7 +477,7 @@ int tick_resume_broadcast(void)
 			break;
 		case TICKDEV_MODE_ONESHOT:
 			if (!cpumask_empty(tick_broadcast_mask))
-				broadcast = tick_resume_broadcast_oneshot(bc);
+				tick_resume_broadcast_oneshot(bc);
 			break;
 		}
 	}
@@ -541,10 +543,9 @@ static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
 	return ret;
 }
 
-int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
+static void tick_resume_broadcast_oneshot(struct clock_event_device *bc)
 {
 	clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
-	return 0;
 }
 
 /*
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 85a957195bf6..5c9f0eec56b2 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -112,7 +112,6 @@ extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
 extern int tick_broadcast_oneshot_control(unsigned long reason);
 extern void tick_broadcast_switch_to_oneshot(void);
 extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
-extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
 extern int tick_broadcast_oneshot_active(void);
 extern void tick_check_oneshot_broadcast_this_cpu(void);
 bool tick_broadcast_oneshot_available(void);
@@ -122,7 +121,6 @@ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) {
 static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; }
 static inline void tick_broadcast_switch_to_oneshot(void) { }
 static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
-static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc) { return 0; }
 static inline int tick_broadcast_oneshot_active(void) { return 0; }
 static inline void tick_check_oneshot_broadcast_this_cpu(void) { }
 static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); }
-- 
cgit v1.2.3


From f46481d0a7cb942b84145acb80ad43bdb1ff8eb4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 25 Mar 2015 13:11:04 +0100
Subject: tick/xen: Provide and use tick_suspend_local() and
 tick_resume_local()

Xen calls on every cpu into tick_resume() which is just wrong.
tick_resume() is for the syscore global suspend/resume
invocation. What XEN really wants is a per cpu local resume
function.

Provide a tick_resume_local() function and use it in XEN.

Also provide a complementary tick_suspend_local() and modify
tick_unfreeze() and tick_freeze(), respectively, to use the
new local tick resume/suspend functions.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[ Combined two patches, rebased, modified subject/changelog. ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1698741.eezk9tnXtG@vostro.rjw.lan
[ Merged to latest timers/core. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/tick-broadcast.c | 24 +++++++++++++------
 kernel/time/tick-common.c    | 55 ++++++++++++++++++++++++++++++--------------
 kernel/time/tick-internal.h  |  8 +++++--
 3 files changed, 61 insertions(+), 26 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 60e6c23ce1c7..19cfb381faa9 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -455,11 +455,26 @@ void tick_suspend_broadcast(void)
 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
 
-int tick_resume_broadcast(void)
+/*
+ * This is called from tick_resume_local() on a resuming CPU. That's
+ * called from the core resume function, tick_unfreeze() and the magic XEN
+ * resume hackery.
+ *
+ * In none of these cases the broadcast device mode can change and the
+ * bit of the resuming CPU in the broadcast mask is safe as well.
+ */
+bool tick_resume_check_broadcast(void)
+{
+	if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT)
+		return false;
+	else
+		return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask);
+}
+
+void tick_resume_broadcast(void)
 {
 	struct clock_event_device *bc;
 	unsigned long flags;
-	int broadcast = 0;
 
 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 
@@ -472,8 +487,6 @@ int tick_resume_broadcast(void)
 		case TICKDEV_MODE_PERIODIC:
 			if (!cpumask_empty(tick_broadcast_mask))
 				tick_broadcast_start_periodic(bc);
-			broadcast = cpumask_test_cpu(smp_processor_id(),
-						     tick_broadcast_mask);
 			break;
 		case TICKDEV_MODE_ONESHOT:
 			if (!cpumask_empty(tick_broadcast_mask))
@@ -482,11 +495,8 @@ int tick_resume_broadcast(void)
 		}
 	}
 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
-
-	return broadcast;
 }
 
-
 #ifdef CONFIG_TICK_ONESHOT
 
 static cpumask_var_t tick_broadcast_oneshot_mask;
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 1a60c2ae96a8..da796d65d1fb 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -374,40 +374,32 @@ void tick_shutdown(unsigned int *cpup)
 }
 
 /**
- * tick_suspend - Suspend the tick and the broadcast device
+ * tick_suspend_local - Suspend the local tick device
  *
- * Called from syscore_suspend() via timekeeping_suspend with only one
- * CPU online and interrupts disabled or from tick_unfreeze() under
- * tick_freeze_lock.
+ * Called from the local cpu for freeze with interrupts disabled.
  *
  * No locks required. Nothing can change the per cpu device.
  */
-void tick_suspend(void)
+static void tick_suspend_local(void)
 {
 	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 
 	clockevents_shutdown(td->evtdev);
-	tick_suspend_broadcast();
 }
 
 /**
- * tick_resume - Resume the tick and the broadcast device
+ * tick_resume_local - Resume the local tick device
  *
- * Called from syscore_resume() via timekeeping_resume with only one
- * CPU online and interrupts disabled or from tick_unfreeze() under
- * tick_freeze_lock.
+ * Called from the local CPU for unfreeze or XEN resume magic.
  *
  * No locks required. Nothing can change the per cpu device.
  */
-void tick_resume(void)
+void tick_resume_local(void)
 {
-	struct tick_device *td;
-	int broadcast;
+	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
+	bool broadcast = tick_resume_check_broadcast();
 
-	broadcast = tick_resume_broadcast();
-	td = this_cpu_ptr(&tick_cpu_device);
 	clockevents_tick_resume(td->evtdev);
-
 	if (!broadcast) {
 		if (td->mode == TICKDEV_MODE_PERIODIC)
 			tick_setup_periodic(td->evtdev, 0);
@@ -416,6 +408,35 @@ void tick_resume(void)
 	}
 }
 
+/**
+ * tick_suspend - Suspend the tick and the broadcast device
+ *
+ * Called from syscore_suspend() via timekeeping_suspend with only one
+ * CPU online and interrupts disabled or from tick_unfreeze() under
+ * tick_freeze_lock.
+ *
+ * No locks required. Nothing can change the per cpu device.
+ */
+void tick_suspend(void)
+{
+	tick_suspend_local();
+	tick_suspend_broadcast();
+}
+
+/**
+ * tick_resume - Resume the tick and the broadcast device
+ *
+ * Called from syscore_resume() via timekeeping_resume with only one
+ * CPU online and interrupts disabled.
+ *
+ * No locks required. Nothing can change the per cpu device.
+ */
+void tick_resume(void)
+{
+	tick_resume_broadcast();
+	tick_resume_local();
+}
+
 static DEFINE_RAW_SPINLOCK(tick_freeze_lock);
 static unsigned int tick_freeze_depth;
 
@@ -436,7 +457,7 @@ void tick_freeze(void)
 	if (tick_freeze_depth == num_online_cpus()) {
 		timekeeping_suspend();
 	} else {
-		tick_suspend();
+		tick_suspend_local();
 	}
 
 	raw_spin_unlock(&tick_freeze_lock);
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 5c9f0eec56b2..6ba7bce732f2 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -23,6 +23,7 @@ extern void tick_check_new_device(struct clock_event_device *dev);
 extern void tick_handover_do_timer(int *cpup);
 extern void tick_shutdown(unsigned int *cpup);
 extern void tick_suspend(void);
+extern void tick_resume(void);
 extern bool tick_check_replacement(struct clock_event_device *curdev,
 				   struct clock_event_device *newdev);
 extern void tick_install_replacement(struct clock_event_device *dev);
@@ -43,6 +44,7 @@ extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq);
 extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
 #else
 static inline void tick_suspend(void) { }
+static inline void tick_resume(void) { }
 #endif /* GENERIC_CLOCKEVENTS */
 
 /* Oneshot related functions */
@@ -81,7 +83,8 @@ extern int tick_is_broadcast_device(struct clock_event_device *dev);
 extern void tick_broadcast_on_off(unsigned long reason, int *oncpu);
 extern void tick_shutdown_broadcast(unsigned int *cpup);
 extern void tick_suspend_broadcast(void);
-extern int tick_resume_broadcast(void);
+extern void tick_resume_broadcast(void);
+extern bool tick_resume_check_broadcast(void);
 extern void tick_broadcast_init(void);
 extern void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
 extern int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq);
@@ -95,7 +98,8 @@ static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
 static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { }
 static inline void tick_shutdown_broadcast(unsigned int *cpup) { }
 static inline void tick_suspend_broadcast(void) { }
-static inline int tick_resume_broadcast(void) { return 0; }
+static inline void tick_resume_broadcast(void) { }
+static inline bool tick_resume_check_broadcast(void) { return false; }
 static inline void tick_broadcast_init(void) { }
 static inline int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) { return -ENODEV; }
 
-- 
cgit v1.2.3


From 7270d11c56f594af4d166b2988421cd8ed933dc1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 25 Mar 2015 13:11:52 +0100
Subject: arm/bL_switcher: Kill tick suspend hackery

Use the new tick_suspend/resume_local() and get rid of the
homebrewn implementation of these in the ARM bL switcher.  The
check for the cpumask is completely pointless.  There is no harm
to suspend a per cpu tick device unconditionally.  If that's a
real issue then we fix it proper at the core level and not with
some completely undocumented hacks in some random core code.

Move the tick internals to the core code, now that this nuisance
is gone.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[ rjw: Rebase, changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Link: http://lkml.kernel.org/r/1655112.Ws17YsMfN7@vostro.rjw.lan
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/tick-common.c   |  2 +-
 kernel/time/tick-internal.h |  5 +++++
 kernel/time/tick-sched.h    | 10 ++++++++++
 3 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'kernel/time')

diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index da796d65d1fb..e28ba5c044c5 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -380,7 +380,7 @@ void tick_shutdown(unsigned int *cpup)
  *
  * No locks required. Nothing can change the per cpu device.
  */
-static void tick_suspend_local(void)
+void tick_suspend_local(void)
 {
 	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 6ba7bce732f2..5fc2dafabd58 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -28,6 +28,7 @@ extern bool tick_check_replacement(struct clock_event_device *curdev,
 				   struct clock_event_device *newdev);
 extern void tick_install_replacement(struct clock_event_device *dev);
 extern int tick_is_oneshot_available(void);
+extern struct tick_device *tick_get_device(int cpu);
 
 extern int clockevents_tick_resume(struct clock_event_device *dev);
 /* Check, if the device is functional or a dummy for broadcast */
@@ -39,6 +40,10 @@ static inline int tick_device_is_functional(struct clock_event_device *dev)
 extern void clockevents_shutdown(struct clock_event_device *dev);
 extern void clockevents_exchange_device(struct clock_event_device *old,
 					struct clock_event_device *new);
+extern void clockevents_set_state(struct clock_event_device *dev,
+				 enum clock_event_state state);
+extern int clockevents_program_event(struct clock_event_device *dev,
+				     ktime_t expires, bool force);
 extern void clockevents_handle_noop(struct clock_event_device *dev);
 extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq);
 extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 930743249127..28b5da3e1a17 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -3,6 +3,16 @@
 
 #include <linux/hrtimer.h>
 
+enum tick_device_mode {
+	TICKDEV_MODE_PERIODIC,
+	TICKDEV_MODE_ONESHOT,
+};
+
+struct tick_device {
+	struct clock_event_device *evtdev;
+	enum tick_device_mode mode;
+};
+
 enum tick_nohz_mode {
 	NOHZ_MODE_INACTIVE,
 	NOHZ_MODE_LOWRES,
-- 
cgit v1.2.3


From 3ae7a939165c6159afb3c09e1d7405b6d1807f2b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 2 Apr 2015 11:26:06 +0200
Subject: tick: Further simplify tick-internal.h

Move the broadcasting related section to the GENERIC_CLOCKEVENTS=y
section - this also solves build failures on architectures that
don't use generic clockevents yet.

Also standardize include file style to make it easier to read, and
use nesting depth aware preprocessor directives to make future merges
easier.

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/tick-internal.h | 79 +++++++++++++++++++++++----------------------
 1 file changed, 40 insertions(+), 39 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 5fc2dafabd58..b6ba0a44e740 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -9,8 +9,8 @@
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 
-#define TICK_DO_TIMER_NONE	-1
-#define TICK_DO_TIMER_BOOT	-2
+# define TICK_DO_TIMER_NONE	-1
+# define TICK_DO_TIMER_BOOT	-2
 
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 extern ktime_t tick_next_period;
@@ -47,41 +47,9 @@ extern int clockevents_program_event(struct clock_event_device *dev,
 extern void clockevents_handle_noop(struct clock_event_device *dev);
 extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq);
 extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
-#else
-static inline void tick_suspend(void) { }
-static inline void tick_resume(void) { }
-#endif /* GENERIC_CLOCKEVENTS */
-
-/* Oneshot related functions */
-#ifdef CONFIG_TICK_ONESHOT
-extern void tick_setup_oneshot(struct clock_event_device *newdev,
-			       void (*handler)(struct clock_event_device *),
-			       ktime_t nextevt);
-extern int tick_program_event(ktime_t expires, int force);
-extern void tick_oneshot_notify(void);
-extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));
-extern void tick_resume_oneshot(void);
-static inline bool tick_oneshot_possible(void) { return true; }
-extern int tick_oneshot_mode_active(void);
-extern void tick_clock_notify(void);
-extern int tick_check_oneshot_change(int allow_nohz);
-extern int tick_init_highres(void);
-#else /* !ONESHOT */
-static inline
-void tick_setup_oneshot(struct clock_event_device *newdev,
-			void (*handler)(struct clock_event_device *),
-			ktime_t nextevt) { BUG(); }
-static inline void tick_resume_oneshot(void) { BUG(); }
-static inline int tick_program_event(ktime_t expires, int force) { return 0; }
-static inline void tick_oneshot_notify(void) { }
-static inline bool tick_oneshot_possible(void) { return false; }
-static inline int tick_oneshot_mode_active(void) { return 0; }
-static inline void tick_clock_notify(void) { }
-static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
-#endif /* !TICK_ONESHOT */
 
 /* Broadcasting support */
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
 extern void tick_install_broadcast_device(struct clock_event_device *dev);
 extern int tick_is_broadcast_device(struct clock_event_device *dev);
@@ -95,7 +63,7 @@ extern void tick_set_periodic_handler(struct clock_event_device *dev, int broadc
 extern int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq);
 extern struct tick_device *tick_get_broadcast_device(void);
 extern struct cpumask *tick_get_broadcast_mask(void);
-#else /* !BROADCAST */
+# else /* !CONFIG_GENERIC_CLOCKEVENTS_BROADCAST: */
 static inline void tick_install_broadcast_device(struct clock_event_device *dev) { }
 static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; }
 static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; }
@@ -113,7 +81,40 @@ static inline void tick_set_periodic_handler(struct clock_event_device *dev, int
 {
 	dev->event_handler = tick_handle_periodic;
 }
-#endif /* !BROADCAST */
+# endif /* !CONFIG_GENERIC_CLOCKEVENTS_BROADCAST */
+
+#else /* !GENERIC_CLOCKEVENTS: */
+static inline void tick_suspend(void) { }
+static inline void tick_resume(void) { }
+#endif /* !GENERIC_CLOCKEVENTS */
+
+/* Oneshot related functions */
+#ifdef CONFIG_TICK_ONESHOT
+extern void tick_setup_oneshot(struct clock_event_device *newdev,
+			       void (*handler)(struct clock_event_device *),
+			       ktime_t nextevt);
+extern int tick_program_event(ktime_t expires, int force);
+extern void tick_oneshot_notify(void);
+extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));
+extern void tick_resume_oneshot(void);
+static inline bool tick_oneshot_possible(void) { return true; }
+extern int tick_oneshot_mode_active(void);
+extern void tick_clock_notify(void);
+extern int tick_check_oneshot_change(int allow_nohz);
+extern int tick_init_highres(void);
+#else /* !CONFIG_TICK_ONESHOT: */
+static inline
+void tick_setup_oneshot(struct clock_event_device *newdev,
+			void (*handler)(struct clock_event_device *),
+			ktime_t nextevt) { BUG(); }
+static inline void tick_resume_oneshot(void) { BUG(); }
+static inline int tick_program_event(ktime_t expires, int force) { return 0; }
+static inline void tick_oneshot_notify(void) { }
+static inline bool tick_oneshot_possible(void) { return false; }
+static inline int tick_oneshot_mode_active(void) { return 0; }
+static inline void tick_clock_notify(void) { }
+static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
+#endif /* !CONFIG_TICK_ONESHOT */
 
 /* Functions related to oneshot broadcasting */
 #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
@@ -125,7 +126,7 @@ extern int tick_broadcast_oneshot_active(void);
 extern void tick_check_oneshot_broadcast_this_cpu(void);
 bool tick_broadcast_oneshot_available(void);
 extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
-#else /* BROADCAST && ONESHOT */
+#else /* !(BROADCAST && ONESHOT): */
 static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
 static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; }
 static inline void tick_broadcast_switch_to_oneshot(void) { }
@@ -133,7 +134,7 @@ static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
 static inline int tick_broadcast_oneshot_active(void) { return 0; }
 static inline void tick_check_oneshot_broadcast_this_cpu(void) { }
 static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); }
-#endif /* !BROADCAST && ONESHOT */
+#endif /* !(BROADCAST && ONESHOT) */
 
 /* NO_HZ_FULL internal */
 #ifdef CONFIG_NO_HZ_FULL
-- 
cgit v1.2.3


From 345527b1edce8df719e0884500c76832a18211c3 Mon Sep 17 00:00:00 2001
From: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Date: Mon, 30 Mar 2015 14:59:19 +0530
Subject: clockevents: Fix cpu_down() race for hrtimer based broadcasting

It was found when doing a hotplug stress test on POWER, that the
machine either hit softlockups or rcu_sched stall warnings.  The
issue was traced to commit:

  7cba160ad789 ("powernv/cpuidle: Redesign idle states management")

which exposed the cpu_down() race with hrtimer based broadcast mode:

  5d1638acb9f6 ("tick: Introduce hrtimer based broadcast")

The race is the following:

Assume CPU1 is the CPU which holds the hrtimer broadcasting duty
before it is taken down.

	CPU0					CPU1

	cpu_down()				take_cpu_down()
						disable_interrupts()

	cpu_die()

	while (CPU1 != CPU_DEAD) {
		msleep(100);
		switch_to_idle();
		stop_cpu_timer();
		schedule_broadcast();
	}

	tick_cleanup_cpu_dead()
		take_over_broadcast()

So after CPU1 disabled interrupts it cannot handle the broadcast
hrtimer anymore, so CPU0 will be stuck forever.

Fix this by explicitly taking over broadcast duty before cpu_die().

This is a temporary workaround. What we really want is a callback
in the clockevent device which allows us to do that from the dying
CPU by pushing the hrtimer onto a different cpu. That might involve
an IPI and is definitely more complex than this immediate fix.

Changelog was picked up from:

    https://lkml.org/lkml/2015/2/16/213

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Preeti U. Murthy <preeti@linux.vnet.ibm.com>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: mpe@ellerman.id.au
Cc: nicolas.pitre@linaro.org
Cc: peterz@infradead.org
Cc: rjw@rjwysocki.net
Fixes: http://linuxppc.10917.n7.nabble.com/offlining-cpus-breakage-td88619.html
Link: http://lkml.kernel.org/r/20150330092410.24979.59887.stgit@preeti.in.ibm.com
[ Merged it to the latest timer tree, renamed the callback, tidied up the changelog. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/tick-broadcast.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 19cfb381faa9..f5e0fd5652dc 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -680,14 +680,19 @@ static void broadcast_shutdown_local(struct clock_event_device *bc,
 	clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
 }
 
-static void broadcast_move_bc(int deadcpu)
+void hotplug_cpu__broadcast_tick_pull(int deadcpu)
 {
-	struct clock_event_device *bc = tick_broadcast_device.evtdev;
+	struct clock_event_device *bc;
+	unsigned long flags;
 
-	if (!bc || !broadcast_needs_cpu(bc, deadcpu))
-		return;
-	/* This moves the broadcast assignment to this cpu */
-	clockevents_program_event(bc, bc->next_event, 1);
+	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
+	bc = tick_broadcast_device.evtdev;
+
+	if (bc && broadcast_needs_cpu(bc, deadcpu)) {
+		/* This moves the broadcast assignment to this CPU: */
+		clockevents_program_event(bc, bc->next_event, 1);
+	}
+	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
 
 /*
@@ -924,8 +929,6 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
 	cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
 	cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
 
-	broadcast_move_bc(cpu);
-
 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
 
-- 
cgit v1.2.3


From b337a9380f7effd60d082569dd7e0b97a7549730 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 31 Mar 2015 20:49:00 +0530
Subject: timer: Allocate per-cpu tvec_base's statically

Memory for the 'tvec_base' array is allocated separately for the boot CPU (statically)
and non-boot CPUs (dynamically).

The reason is because __TIMER_INITIALIZER() needs to set ->base to a
valid pointer (because we've made NULL special, hint: lock_timer_base())
and we cannot get a compile time pointer to per-cpu entries because we
don't know where we'll map the section, even for the boot cpu.

This can be simplified a bit by statically allocating per-cpu memory.
The only disadvantage is that memory for one of the structures will stay
unused, i.e. for the boot CPU, which uses boot_tvec_bases.

This will also guarantee that tvec_base is cacheline aligned. Even
though tvec_base has ____cacheline_aligned stuck on, kzalloc_node() does
not actually respect that (but guarantees a minimum u64 alignment).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/17cdf560f2727f687ab159707d0aa591f8a2f82d.1427814611.git.viresh.kumar@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/timer.c | 48 +++++++++++++++++++-----------------------------
 1 file changed, 19 insertions(+), 29 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 2d3f5c504939..f3cc653f876c 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -90,8 +90,19 @@ struct tvec_base {
 	struct tvec tv5;
 } ____cacheline_aligned;
 
+/*
+ * __TIMER_INITIALIZER() needs to set ->base to a valid pointer (because we've
+ * made NULL special, hint: lock_timer_base()) and we cannot get a compile time
+ * pointer to per-cpu entries because we don't know where we'll map the section,
+ * even for the boot cpu.
+ *
+ * And so we use boot_tvec_bases for boot CPU and per-cpu __tvec_bases for the
+ * rest of them.
+ */
 struct tvec_base boot_tvec_bases;
 EXPORT_SYMBOL(boot_tvec_bases);
+static DEFINE_PER_CPU(struct tvec_base, __tvec_bases);
+
 static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
 
 /* Functions below help us manage 'deferrable' flag */
@@ -1534,46 +1545,25 @@ EXPORT_SYMBOL(schedule_timeout_uninterruptible);
 
 static int init_timers_cpu(int cpu)
 {
-	int j;
-	struct tvec_base *base;
+	struct tvec_base *base = per_cpu(tvec_bases, cpu);
 	static char tvec_base_done[NR_CPUS];
+	int j;
 
 	if (!tvec_base_done[cpu]) {
-		static char boot_done;
+		static char boot_cpu_skipped;
 
-		if (boot_done) {
-			/*
-			 * The APs use this path later in boot
-			 */
-			base = kzalloc_node(sizeof(*base), GFP_KERNEL,
-					    cpu_to_node(cpu));
-			if (!base)
-				return -ENOMEM;
-
-			/* Make sure tvec_base has TIMER_FLAG_MASK bits free */
-			if (WARN_ON(base != tbase_get_base(base))) {
-				kfree(base);
-				return -ENOMEM;
-			}
-			per_cpu(tvec_bases, cpu) = base;
+		if (!boot_cpu_skipped) {
+			boot_cpu_skipped = 1; /* skip the boot cpu */
 		} else {
-			/*
-			 * This is for the boot CPU - we use compile-time
-			 * static initialisation because per-cpu memory isn't
-			 * ready yet and because the memory allocators are not
-			 * initialised either.
-			 */
-			boot_done = 1;
-			base = &boot_tvec_bases;
+			base = per_cpu_ptr(&__tvec_bases, cpu);
+			per_cpu(tvec_bases, cpu) = base;
 		}
+
 		spin_lock_init(&base->lock);
 		tvec_base_done[cpu] = 1;
 		base->cpu = cpu;
-	} else {
-		base = per_cpu(tvec_bases, cpu);
 	}
 
-
 	for (j = 0; j < TVN_SIZE; j++) {
 		INIT_LIST_HEAD(base->tv5.vec + j);
 		INIT_LIST_HEAD(base->tv4.vec + j);
-- 
cgit v1.2.3


From 8def906044c02edcedac79aa3d6310ab4d90c4d8 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 31 Mar 2015 20:49:01 +0530
Subject: timer: Don't initialize 'tvec_base' on hotplug

There is no need to call init_timers_cpu() on every CPU hotplug event,
there is not much we need to reset.

 - Timer-lists are already empty at the end of migrate_timers().
 - timer_jiffies will be refreshed while adding a new timer, after the
   CPU is online again.
 - active_timers and all_timers can be reset from migrate_timers().

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/54a1c30ea7b805af55beb220cadf5a07a21b0a4d.1427814611.git.viresh.kumar@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/timer.c | 98 +++++++++++++++++++++++------------------------------
 1 file changed, 43 insertions(+), 55 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index f3cc653f876c..1feb9c7035c0 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1543,43 +1543,6 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
 }
 EXPORT_SYMBOL(schedule_timeout_uninterruptible);
 
-static int init_timers_cpu(int cpu)
-{
-	struct tvec_base *base = per_cpu(tvec_bases, cpu);
-	static char tvec_base_done[NR_CPUS];
-	int j;
-
-	if (!tvec_base_done[cpu]) {
-		static char boot_cpu_skipped;
-
-		if (!boot_cpu_skipped) {
-			boot_cpu_skipped = 1; /* skip the boot cpu */
-		} else {
-			base = per_cpu_ptr(&__tvec_bases, cpu);
-			per_cpu(tvec_bases, cpu) = base;
-		}
-
-		spin_lock_init(&base->lock);
-		tvec_base_done[cpu] = 1;
-		base->cpu = cpu;
-	}
-
-	for (j = 0; j < TVN_SIZE; j++) {
-		INIT_LIST_HEAD(base->tv5.vec + j);
-		INIT_LIST_HEAD(base->tv4.vec + j);
-		INIT_LIST_HEAD(base->tv3.vec + j);
-		INIT_LIST_HEAD(base->tv2.vec + j);
-	}
-	for (j = 0; j < TVR_SIZE; j++)
-		INIT_LIST_HEAD(base->tv1.vec + j);
-
-	base->timer_jiffies = jiffies;
-	base->next_timer = base->timer_jiffies;
-	base->active_timers = 0;
-	base->all_timers = 0;
-	return 0;
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head)
 {
@@ -1621,6 +1584,9 @@ static void migrate_timers(int cpu)
 		migrate_timer_list(new_base, old_base->tv5.vec + i);
 	}
 
+	old_base->active_timers = 0;
+	old_base->all_timers = 0;
+
 	spin_unlock(&old_base->lock);
 	spin_unlock_irq(&new_base->lock);
 	put_cpu_var(tvec_bases);
@@ -1630,25 +1596,16 @@ static void migrate_timers(int cpu)
 static int timer_cpu_notify(struct notifier_block *self,
 				unsigned long action, void *hcpu)
 {
-	long cpu = (long)hcpu;
-	int err;
-
-	switch(action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		err = init_timers_cpu(cpu);
-		if (err < 0)
-			return notifier_from_errno(err);
-		break;
 #ifdef CONFIG_HOTPLUG_CPU
+	switch (action) {
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
-		migrate_timers(cpu);
+		migrate_timers((long)hcpu);
 		break;
-#endif
 	default:
 		break;
 	}
+#endif
 	return NOTIFY_OK;
 }
 
@@ -1656,18 +1613,49 @@ static struct notifier_block timers_nb = {
 	.notifier_call	= timer_cpu_notify,
 };
 
+static void __init init_timer_cpu(struct tvec_base *base, int cpu)
+{
+	int j;
 
-void __init init_timers(void)
+	base->cpu = cpu;
+	per_cpu(tvec_bases, cpu) = base;
+	spin_lock_init(&base->lock);
+
+	for (j = 0; j < TVN_SIZE; j++) {
+		INIT_LIST_HEAD(base->tv5.vec + j);
+		INIT_LIST_HEAD(base->tv4.vec + j);
+		INIT_LIST_HEAD(base->tv3.vec + j);
+		INIT_LIST_HEAD(base->tv2.vec + j);
+	}
+	for (j = 0; j < TVR_SIZE; j++)
+		INIT_LIST_HEAD(base->tv1.vec + j);
+
+	base->timer_jiffies = jiffies;
+	base->next_timer = base->timer_jiffies;
+}
+
+static void __init init_timer_cpus(void)
 {
-	int err;
+	struct tvec_base *base;
+	int local_cpu = smp_processor_id();
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		if (cpu == local_cpu)
+			base = &boot_tvec_bases;
+		else
+			base = per_cpu_ptr(&__tvec_bases, cpu);
+
+		init_timer_cpu(base, cpu);
+	}
+}
 
+void __init init_timers(void)
+{
 	/* ensure there are enough low bits for flags in timer->base pointer */
 	BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK);
 
-	err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
-			       (void *)(long)smp_processor_id());
-	BUG_ON(err != NOTIFY_OK);
-
+	init_timer_cpus();
 	init_timer_stats();
 	register_cpu_notifier(&timers_nb);
 	open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
-- 
cgit v1.2.3


From 3650b57fdf208bc0e36cbe7b5e0744bd0e0cf34d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 31 Mar 2015 20:49:02 +0530
Subject: timer: Further simplify the SMP and HOTPLUG logic

Remove one CONFIG_HOTPLUG_CPU #ifdef in trade for introducing one
CONFIG_SMP #ifdef.

The CONFIG_SMP ifdef avoids declaring the per-CPU __tvec_bases storage
on UP systems since they already have boot_tvec_bases.

Also (re)add a runtime check on the base alignment -- for the paranoid
amongst us :-)

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/fdd2d35e169bdc554ffa3fe77f77716298c75ada.1427814611.git.viresh.kumar@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/timer.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 1feb9c7035c0..2ece3aa5069c 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -101,7 +101,6 @@ struct tvec_base {
  */
 struct tvec_base boot_tvec_bases;
 EXPORT_SYMBOL(boot_tvec_bases);
-static DEFINE_PER_CPU(struct tvec_base, __tvec_bases);
 
 static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
 
@@ -1038,6 +1037,8 @@ int try_to_del_timer_sync(struct timer_list *timer)
 EXPORT_SYMBOL(try_to_del_timer_sync);
 
 #ifdef CONFIG_SMP
+static DEFINE_PER_CPU(struct tvec_base, __tvec_bases);
+
 /**
  * del_timer_sync - deactivate a timer and wait for the handler to finish.
  * @timer: the timer to be deactivated
@@ -1591,12 +1592,10 @@ static void migrate_timers(int cpu)
 	spin_unlock_irq(&new_base->lock);
 	put_cpu_var(tvec_bases);
 }
-#endif /* CONFIG_HOTPLUG_CPU */
 
 static int timer_cpu_notify(struct notifier_block *self,
 				unsigned long action, void *hcpu)
 {
-#ifdef CONFIG_HOTPLUG_CPU
 	switch (action) {
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
@@ -1605,18 +1604,24 @@ static int timer_cpu_notify(struct notifier_block *self,
 	default:
 		break;
 	}
-#endif
+
 	return NOTIFY_OK;
 }
 
-static struct notifier_block timers_nb = {
-	.notifier_call	= timer_cpu_notify,
-};
+static inline void timer_register_cpu_notifier(void)
+{
+	cpu_notifier(timer_cpu_notify, 0);
+}
+#else
+static inline void timer_register_cpu_notifier(void) { }
+#endif /* CONFIG_HOTPLUG_CPU */
 
 static void __init init_timer_cpu(struct tvec_base *base, int cpu)
 {
 	int j;
 
+	BUG_ON(base != tbase_get_base(base));
+
 	base->cpu = cpu;
 	per_cpu(tvec_bases, cpu) = base;
 	spin_lock_init(&base->lock);
@@ -1643,8 +1648,10 @@ static void __init init_timer_cpus(void)
 	for_each_possible_cpu(cpu) {
 		if (cpu == local_cpu)
 			base = &boot_tvec_bases;
+#ifdef CONFIG_SMP
 		else
 			base = per_cpu_ptr(&__tvec_bases, cpu);
+#endif
 
 		init_timer_cpu(base, cpu);
 	}
@@ -1657,7 +1664,7 @@ void __init init_timers(void)
 
 	init_timer_cpus();
 	init_timer_stats();
-	register_cpu_notifier(&timers_nb);
+	timer_register_cpu_notifier();
 	open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
 }
 
-- 
cgit v1.2.3


From 9a806ddbb9a18c510e4acdcc828b9a87f5fd3aef Mon Sep 17 00:00:00 2001
From: Xunlei Pang <pang.xunlei@linaro.org>
Date: Wed, 1 Apr 2015 20:34:21 -0700
Subject: time: Add y2038 safe read_boot_clock64()

As part of addressing in-kernel y2038 issues, this patch adds
read_boot_clock64() and replaces all the call sites of
read_boot_clock() with this function. This is a __weak
implementation, which simply calls the existing y2038 unsafe
read_boot_clock().

This allows architecture specific implementations to be
converted independently, and eventually the y2038 unsafe
read_boot_clock() can be removed after all its architecture
specific implementations have been converted to
read_boot_clock64().

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Xunlei Pang <pang.xunlei@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1427945681-29972-2-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/timekeeping.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 5b12292b343a..652e50a9c6ed 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1188,6 +1188,14 @@ void __weak read_boot_clock(struct timespec *ts)
 	ts->tv_nsec = 0;
 }
 
+void __weak read_boot_clock64(struct timespec64 *ts64)
+{
+	struct timespec ts;
+
+	read_boot_clock(&ts);
+	*ts64 = timespec_to_timespec64(ts);
+}
+
 /*
  * timekeeping_init - Initializes the clocksource and common timekeeping values
  */
@@ -1209,8 +1217,7 @@ void __init timekeeping_init(void)
 	} else if (now.tv_sec || now.tv_nsec)
 		persistent_clock_exist = true;
 
-	read_boot_clock(&ts);
-	boot = timespec_to_timespec64(ts);
+	read_boot_clock64(&boot);
 	if (!timespec64_valid_strict(&boot)) {
 		pr_warn("WARNING: Boot clock returned invalid value!\n"
 			"         Check your CMOS/BIOS settings.\n");
-- 
cgit v1.2.3


From 2ee966320028ac846654eba5344540eeb4dc228d Mon Sep 17 00:00:00 2001
From: Xunlei Pang <pang.xunlei@linaro.org>
Date: Wed, 1 Apr 2015 20:34:22 -0700
Subject: time: Add y2038 safe read_persistent_clock64()

As part of addressing in-kernel y2038 issues, this patch adds
read_persistent_clock64() and replaces all the call sites of
read_persistent_clock() with this function. This is a __weak
implementation, which simply calls the existing y2038 unsafe
read_persistent_clock().

This allows architecture specific implementations to be
converted independently, and eventually the y2038 unsafe
read_persistent_clock() can be removed after all its
architecture specific implementations have been converted to
read_persistent_clock64().

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Xunlei Pang <pang.xunlei@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1427945681-29972-3-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/timekeeping.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 652e50a9c6ed..b1dbfa573dce 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1173,6 +1173,14 @@ void __weak read_persistent_clock(struct timespec *ts)
 	ts->tv_nsec = 0;
 }
 
+void __weak read_persistent_clock64(struct timespec64 *ts64)
+{
+	struct timespec ts;
+
+	read_persistent_clock(&ts);
+	*ts64 = timespec_to_timespec64(ts);
+}
+
 /**
  * read_boot_clock -  Return time of the system start.
  *
@@ -1205,10 +1213,8 @@ void __init timekeeping_init(void)
 	struct clocksource *clock;
 	unsigned long flags;
 	struct timespec64 now, boot, tmp;
-	struct timespec ts;
 
-	read_persistent_clock(&ts);
-	now = timespec_to_timespec64(ts);
+	read_persistent_clock64(&now);
 	if (!timespec64_valid_strict(&now)) {
 		pr_warn("WARNING: Persistent clock returned invalid value!\n"
 			"         Check your CMOS/BIOS settings.\n");
@@ -1278,7 +1284,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
  * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values
  * @delta: pointer to a timespec64 delta value
  *
- * This hook is for architectures that cannot support read_persistent_clock
+ * This hook is for architectures that cannot support read_persistent_clock64
  * because their RTC/persistent clock is only accessible when irqs are enabled.
  *
  * This function should only be called by rtc_resume(), and allows
@@ -1325,12 +1331,10 @@ void timekeeping_resume(void)
 	struct clocksource *clock = tk->tkr_mono.clock;
 	unsigned long flags;
 	struct timespec64 ts_new, ts_delta;
-	struct timespec tmp;
 	cycle_t cycle_now, cycle_delta;
 	bool suspendtime_found = false;
 
-	read_persistent_clock(&tmp);
-	ts_new = timespec_to_timespec64(tmp);
+	read_persistent_clock64(&ts_new);
 
 	clockevents_resume();
 	clocksource_resume();
@@ -1406,10 +1410,8 @@ int timekeeping_suspend(void)
 	unsigned long flags;
 	struct timespec64		delta, delta_delta;
 	static struct timespec64	old_delta;
-	struct timespec tmp;
 
-	read_persistent_clock(&tmp);
-	timekeeping_suspend_time = timespec_to_timespec64(tmp);
+	read_persistent_clock64(&timekeeping_suspend_time);
 
 	/*
 	 * On some systems the persistent_clock can not be detected at
-- 
cgit v1.2.3


From 3c00a1fe8496ff29ab62764bb3f4ce4b48089004 Mon Sep 17 00:00:00 2001
From: Xunlei Pang <pang.xunlei@linaro.org>
Date: Wed, 1 Apr 2015 20:34:23 -0700
Subject: time: Add y2038 safe update_persistent_clock64()

As part of addressing in-kernel y2038 issues, this patch adds
update_persistent_clock64() and replaces all the call sites of
update_persistent_clock() with this function. This is a __weak
implementation, which simply calls the existing y2038 unsafe
update_persistent_clock().

This allows architecture specific implementations to be
converted independently, and eventually y2038-unsafe
update_persistent_clock() can be removed after all its
architecture specific implementations have been converted to
update_persistent_clock64().

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Xunlei Pang <pang.xunlei@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1427945681-29972-4-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/ntp.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'kernel/time')

diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 9ad60d028508..7a681003001c 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -458,6 +458,16 @@ out:
 	return leap;
 }
 
+#ifdef CONFIG_GENERIC_CMOS_UPDATE
+int __weak update_persistent_clock64(struct timespec64 now64)
+{
+	struct timespec now;
+
+	now = timespec64_to_timespec(now64);
+	return update_persistent_clock(now);
+}
+#endif
+
 #if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC)
 static void sync_cmos_clock(struct work_struct *work);
 
@@ -493,8 +503,9 @@ static void sync_cmos_clock(struct work_struct *work)
 		if (persistent_clock_is_local)
 			adjust.tv_sec -= (sys_tz.tz_minuteswest * 60);
 #ifdef CONFIG_GENERIC_CMOS_UPDATE
-		fail = update_persistent_clock(timespec64_to_timespec(adjust));
+		fail = update_persistent_clock64(adjust);
 #endif
+
 #ifdef CONFIG_RTC_SYSTOHC
 		if (fail == -ENODEV)
 			fail = rtc_set_ntp_time(adjust);
-- 
cgit v1.2.3


From 7f2981393af31a854879f2496cab4c978e886902 Mon Sep 17 00:00:00 2001
From: Xunlei Pang <pang.xunlei@linaro.org>
Date: Wed, 1 Apr 2015 20:34:35 -0700
Subject: time: Don't build timekeeping_inject_sleeptime64() if no one uses it

timekeeping_inject_sleeptime64() is only used by RTC
suspend/resume, so add build dependencies on the necessary RTC
related macros.

Signed-off-by: Xunlei Pang <pang.xunlei@linaro.org>
[ Improve commit message clarity. ]
Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1427945681-29972-16-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/timekeeping.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index b1dbfa573dce..3be559b6fd0a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1280,6 +1280,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
 	tk_debug_account_sleep_time(delta);
 }
 
+#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE)
 /**
  * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values
  * @delta: pointer to a timespec64 delta value
@@ -1317,6 +1318,7 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta)
 	/* signal hrtimers about time change */
 	clock_was_set();
 }
+#endif
 
 /**
  * timekeeping_resume - Resumes the generic timekeeping subsystem.
-- 
cgit v1.2.3


From 264bb3f79f2a465477cdcd2f0554e21aedc443a3 Mon Sep 17 00:00:00 2001
From: Xunlei Pang <pang.xunlei@linaro.org>
Date: Wed, 1 Apr 2015 20:34:37 -0700
Subject: time: Fix a bug in timekeeping_suspend() with no persistent clock

When there's no persistent clock, normally
timekeeping_suspend_time should always be zero, but this can
break in timekeeping_suspend().

At T1, there was a system suspend, so old_delta was assigned T1.
After some time, one time adjustment happened, and xtime got the
value of T1-dt(0s<dt<2s). Then, there comes another system
suspend soon after this adjustment, obviously we will get a
small negative delta_delta, resulting in a negative
timekeeping_suspend_time.

This is problematic, when doing timekeeping_resume() if there is
no nonstop clocksource for example, it will hit the else leg and
inject the improper sleeptime which is the wrong logic.

So, we can solve this problem by only doing delta related code
when the persistent clock is existent. Actually the code only
makes sense for persistent clock cases.

Signed-off-by: Xunlei Pang <pang.xunlei@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1427945681-29972-18-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/timekeeping.c | 36 +++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3be559b6fd0a..b7db4916415b 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1255,7 +1255,7 @@ void __init timekeeping_init(void)
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 }
 
-/* time in seconds when suspend began */
+/* time in seconds when suspend began for persistent clock */
 static struct timespec64 timekeeping_suspend_time;
 
 /**
@@ -1428,24 +1428,26 @@ int timekeeping_suspend(void)
 	timekeeping_forward_now(tk);
 	timekeeping_suspended = 1;
 
-	/*
-	 * To avoid drift caused by repeated suspend/resumes,
-	 * which each can add ~1 second drift error,
-	 * try to compensate so the difference in system time
-	 * and persistent_clock time stays close to constant.
-	 */
-	delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time);
-	delta_delta = timespec64_sub(delta, old_delta);
-	if (abs(delta_delta.tv_sec)  >= 2) {
+	if (has_persistent_clock()) {
 		/*
-		 * if delta_delta is too large, assume time correction
-		 * has occured and set old_delta to the current delta.
+		 * To avoid drift caused by repeated suspend/resumes,
+		 * which each can add ~1 second drift error,
+		 * try to compensate so the difference in system time
+		 * and persistent_clock time stays close to constant.
 		 */
-		old_delta = delta;
-	} else {
-		/* Otherwise try to adjust old_system to compensate */
-		timekeeping_suspend_time =
-			timespec64_add(timekeeping_suspend_time, delta_delta);
+		delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time);
+		delta_delta = timespec64_sub(delta, old_delta);
+		if (abs(delta_delta.tv_sec) >= 2) {
+			/*
+			 * if delta_delta is too large, assume time correction
+			 * has occurred and set old_delta to the current delta.
+			 */
+			old_delta = delta;
+		} else {
+			/* Otherwise try to adjust old_system to compensate */
+			timekeeping_suspend_time =
+				timespec64_add(timekeeping_suspend_time, delta_delta);
+		}
 	}
 
 	timekeeping_update(tk, TK_MIRROR);
-- 
cgit v1.2.3


From 0fa88cb4b82b5cf7429bc1cef9db006ca035754e Mon Sep 17 00:00:00 2001
From: Xunlei Pang <pang.xunlei@linaro.org>
Date: Wed, 1 Apr 2015 20:34:38 -0700
Subject: time, drivers/rtc: Don't bother with rtc_resume() for the nonstop
 clocksource

If a system does not provide a persistent_clock(), the time
will be updated on resume by rtc_resume(). With the addition
of the non-stop clocksources for suspend timing, those systems
set the time on resume in timekeeping_resume(), but may not
provide a valid persistent_clock().

This results in the rtc_resume() logic thinking no one has set
the time and it then will over-write the suspend time again,
which is not necessary and only increases clock error.

So, fix this for rtc_resume().

This patch also improves the name of persistent_clock_exist to
make it more grammatical.

Signed-off-by: Xunlei Pang <pang.xunlei@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1427945681-29972-19-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/timekeeping.c | 66 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 49 insertions(+), 17 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index b7db4916415b..79b9bc6e7876 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -64,9 +64,6 @@ static struct tk_fast tk_fast_raw  ____cacheline_aligned;
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
 
-/* Flag for if there is a persistent clock on this platform */
-bool __read_mostly persistent_clock_exist = false;
-
 static inline void tk_normalize_xtime(struct timekeeper *tk)
 {
 	while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {
@@ -1204,6 +1201,12 @@ void __weak read_boot_clock64(struct timespec64 *ts64)
 	*ts64 = timespec_to_timespec64(ts);
 }
 
+/* Flag for if timekeeping_resume() has injected sleeptime */
+static bool sleeptime_injected;
+
+/* Flag for if there is a persistent clock on this platform */
+static bool persistent_clock_exists;
+
 /*
  * timekeeping_init - Initializes the clocksource and common timekeeping values
  */
@@ -1221,7 +1224,7 @@ void __init timekeeping_init(void)
 		now.tv_sec = 0;
 		now.tv_nsec = 0;
 	} else if (now.tv_sec || now.tv_nsec)
-		persistent_clock_exist = true;
+		persistent_clock_exists = true;
 
 	read_boot_clock64(&boot);
 	if (!timespec64_valid_strict(&boot)) {
@@ -1281,12 +1284,48 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
 }
 
 #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE)
+/**
+ * We have three kinds of time sources to use for sleep time
+ * injection, the preference order is:
+ * 1) non-stop clocksource
+ * 2) persistent clock (ie: RTC accessible when irqs are off)
+ * 3) RTC
+ *
+ * 1) and 2) are used by timekeeping, 3) by RTC subsystem.
+ * If system has neither 1) nor 2), 3) will be used finally.
+ *
+ *
+ * If timekeeping has injected sleeptime via either 1) or 2),
+ * 3) becomes needless, so in this case we don't need to call
+ * rtc_resume(), and this is what timekeeping_rtc_skipresume()
+ * means.
+ */
+bool timekeeping_rtc_skipresume(void)
+{
+	return sleeptime_injected;
+}
+
+/**
+ * 1) can be determined whether to use or not only when doing
+ * timekeeping_resume() which is invoked after rtc_suspend(),
+ * so we can't skip rtc_suspend() surely if system has 1).
+ *
+ * But if system has 2), 2) will definitely be used, so in this
+ * case we don't need to call rtc_suspend(), and this is what
+ * timekeeping_rtc_skipsuspend() means.
+ */
+bool timekeeping_rtc_skipsuspend(void)
+{
+	return persistent_clock_exists;
+}
+
 /**
  * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values
  * @delta: pointer to a timespec64 delta value
  *
  * This hook is for architectures that cannot support read_persistent_clock64
  * because their RTC/persistent clock is only accessible when irqs are enabled.
+ * and also don't have an effective nonstop clocksource.
  *
  * This function should only be called by rtc_resume(), and allows
  * a suspend offset to be injected into the timekeeping values.
@@ -1296,13 +1335,6 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta)
 	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long flags;
 
-	/*
-	 * Make sure we don't set the clock twice, as timekeeping_resume()
-	 * already did it
-	 */
-	if (has_persistent_clock())
-		return;
-
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
 	write_seqcount_begin(&tk_core.seq);
 
@@ -1334,8 +1366,8 @@ void timekeeping_resume(void)
 	unsigned long flags;
 	struct timespec64 ts_new, ts_delta;
 	cycle_t cycle_now, cycle_delta;
-	bool suspendtime_found = false;
 
+	sleeptime_injected = false;
 	read_persistent_clock64(&ts_new);
 
 	clockevents_resume();
@@ -1381,13 +1413,13 @@ void timekeeping_resume(void)
 		nsec += ((u64) cycle_delta * mult) >> shift;
 
 		ts_delta = ns_to_timespec64(nsec);
-		suspendtime_found = true;
+		sleeptime_injected = true;
 	} else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) {
 		ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time);
-		suspendtime_found = true;
+		sleeptime_injected = true;
 	}
 
-	if (suspendtime_found)
+	if (sleeptime_injected)
 		__timekeeping_inject_sleeptime(tk, &ts_delta);
 
 	/* Re-base the last cycle value */
@@ -1421,14 +1453,14 @@ int timekeeping_suspend(void)
 	 * value returned, update the persistent_clock_exists flag.
 	 */
 	if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec)
-		persistent_clock_exist = true;
+		persistent_clock_exists = true;
 
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
 	write_seqcount_begin(&tk_core.seq);
 	timekeeping_forward_now(tk);
 	timekeeping_suspended = 1;
 
-	if (has_persistent_clock()) {
+	if (persistent_clock_exists) {
 		/*
 		 * To avoid drift caused by repeated suspend/resumes,
 		 * which each can add ~1 second drift error,
-- 
cgit v1.2.3


From 8e56f33f8439b2f8e7f4ae7f3d0bfe683ecc3b09 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 1 Apr 2015 20:34:39 -0700
Subject: clocksource: Improve comment explaining clocks_calc_max_nsecs()'s 50%
 safety margin

Ingo noted that the description of clocks_calc_max_nsecs()'s
50% safety margin was somewhat circular. So this patch tries
to improve the comment to better explain what we mean by the
50% safety margin and why we need it.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1427945681-29972-20-git-send-email-john.stultz@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/clocksource.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index c3be3c71bbad..15facb1b9c60 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -472,8 +472,11 @@ static u32 clocksource_max_adjustment(struct clocksource *cs)
  * @max_cyc:	maximum cycle value before potential overflow (does not include
  *		any safety margin)
  *
- * NOTE: This function includes a safety margin of 50%, so that bad clock values
- * can be detected.
+ * NOTE: This function includes a safety margin of 50%, in other words, we
+ * return half the number of nanoseconds the hardware counter can technically
+ * cover. This is done so that we can potentially detect problems caused by
+ * delayed timers or bad hardware, which might result in time intervals that
+ * are larger then what the math used can handle without overflows.
  */
 u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)
 {
-- 
cgit v1.2.3


From 592a438ff3fea61d303c5784c209b3f1fd3e16df Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 3 Apr 2015 02:01:10 +0200
Subject: clockevents: Provide explicit broadcast control functions

clockevents_notify() is a leftover from the early design of the
clockevents facility. It's really not a notification mechanism,
it's a multiplex call. We are way better off to have explicit
calls instead of this monstrosity.

Split out the broadcast control into a separate function and
provide inline helpers. Switch clockevents_notify() over. This
will go away once all callers are converted.

This also gets rid of the nested locking of clockevents_lock and
broadcast_lock. The broadcast control functions do not require
clockevents_lock. Only the managing functions
(setup/shutdown/suspend/resume of the broadcast device require
clockevents_lock.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Len Brown <lenb@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tony Lindgren <tony@atomide.com>
Link: http://lkml.kernel.org/r/8086559.ttsuS0n1Xr@vostro.rjw.lan
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/clockevents.c    |  6 ++++-
 kernel/time/tick-broadcast.c | 62 +++++++++++++++++++-------------------------
 kernel/time/tick-internal.h  |  2 --
 3 files changed, 32 insertions(+), 38 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 7af614829da1..599ff8d3fda5 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -656,9 +656,13 @@ int clockevents_notify(unsigned long reason, void *arg)
 
 	switch (reason) {
 	case CLOCK_EVT_NOTIFY_BROADCAST_ON:
+		tick_broadcast_enable();
+		break;
 	case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
+		tick_broadcast_disable();
+		break;
 	case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
-		tick_broadcast_on_off(reason, arg);
+		tick_broadcast_force();
 		break;
 
 	case CLOCK_EVT_NOTIFY_BROADCAST_ENTER:
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index f5e0fd5652dc..1a0bee04ef8c 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -33,7 +33,7 @@ static cpumask_var_t tick_broadcast_mask;
 static cpumask_var_t tick_broadcast_on;
 static cpumask_var_t tmpmask;
 static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
-static int tick_broadcast_force;
+static int tick_broadcast_forced;
 
 #ifdef CONFIG_TICK_ONESHOT
 static void tick_broadcast_clear_oneshot(int cpu);
@@ -326,49 +326,54 @@ unlock:
 	raw_spin_unlock(&tick_broadcast_lock);
 }
 
-/*
- * Powerstate information: The system enters/leaves a state, where
- * affected devices might stop
+/**
+ * tick_broadcast_control - Enable/disable or force broadcast mode
+ * @mode:	The selected broadcast mode
+ *
+ * Called when the system enters a state where affected tick devices
+ * might stop. Note: TICK_BROADCAST_FORCE cannot be undone.
+ *
+ * Called with interrupts disabled, so clockevents_lock is not
+ * required here because the local clock event device cannot go away
+ * under us.
  */
-static void tick_do_broadcast_on_off(unsigned long *reason)
+void tick_broadcast_control(enum tick_broadcast_mode mode)
 {
 	struct clock_event_device *bc, *dev;
 	struct tick_device *td;
-	unsigned long flags;
 	int cpu, bc_stopped;
 
-	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
-
-	cpu = smp_processor_id();
-	td = &per_cpu(tick_cpu_device, cpu);
+	td = this_cpu_ptr(&tick_cpu_device);
 	dev = td->evtdev;
-	bc = tick_broadcast_device.evtdev;
 
 	/*
 	 * Is the device not affected by the powerstate ?
 	 */
 	if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
-		goto out;
+		return;
 
 	if (!tick_device_is_functional(dev))
-		goto out;
+		return;
 
+	raw_spin_lock(&tick_broadcast_lock);
+	cpu = smp_processor_id();
+	bc = tick_broadcast_device.evtdev;
 	bc_stopped = cpumask_empty(tick_broadcast_mask);
 
-	switch (*reason) {
-	case CLOCK_EVT_NOTIFY_BROADCAST_ON:
-	case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
+	switch (mode) {
+	case TICK_BROADCAST_FORCE:
+		tick_broadcast_forced = 1;
+	case TICK_BROADCAST_ON:
 		cpumask_set_cpu(cpu, tick_broadcast_on);
 		if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
 			if (tick_broadcast_device.mode ==
 			    TICKDEV_MODE_PERIODIC)
 				clockevents_shutdown(dev);
 		}
-		if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
-			tick_broadcast_force = 1;
 		break;
-	case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
-		if (tick_broadcast_force)
+
+	case TICK_BROADCAST_OFF:
+		if (tick_broadcast_forced)
 			break;
 		cpumask_clear_cpu(cpu, tick_broadcast_on);
 		if (!tick_device_is_functional(dev))
@@ -390,22 +395,9 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
 		else
 			tick_broadcast_setup_oneshot(bc);
 	}
-out:
-	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
-}
-
-/*
- * Powerstate information: The system enters/leaves a state, where
- * affected devices might stop.
- */
-void tick_broadcast_on_off(unsigned long reason, int *oncpu)
-{
-	if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
-		printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
-		       "offline CPU #%d\n", *oncpu);
-	else
-		tick_do_broadcast_on_off(&reason);
+	raw_spin_unlock(&tick_broadcast_lock);
 }
+EXPORT_SYMBOL_GPL(tick_broadcast_control);
 
 /*
  * Set the periodic handler depending on broadcast on/off
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index b6ba0a44e740..62e331d1bc76 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -53,7 +53,6 @@ extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
 extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
 extern void tick_install_broadcast_device(struct clock_event_device *dev);
 extern int tick_is_broadcast_device(struct clock_event_device *dev);
-extern void tick_broadcast_on_off(unsigned long reason, int *oncpu);
 extern void tick_shutdown_broadcast(unsigned int *cpup);
 extern void tick_suspend_broadcast(void);
 extern void tick_resume_broadcast(void);
@@ -68,7 +67,6 @@ static inline void tick_install_broadcast_device(struct clock_event_device *dev)
 static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; }
 static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; }
 static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
-static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { }
 static inline void tick_shutdown_broadcast(unsigned int *cpup) { }
 static inline void tick_suspend_broadcast(void) { }
 static inline void tick_resume_broadcast(void) { }
-- 
cgit v1.2.3


From 89feddbfe7023ccfb4a6d7f5e3f5161d91b28b18 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 3 Apr 2015 02:03:42 +0200
Subject: clockevents: Remove the broadcast control leftovers

All users converted. Remove the notify leftovers.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/2076318.76XJZ8QYP3@vostro.rjw.lan
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/clockevents.c | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 599ff8d3fda5..dba0b83708b3 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -655,16 +655,6 @@ int clockevents_notify(unsigned long reason, void *arg)
 	raw_spin_lock_irqsave(&clockevents_lock, flags);
 
 	switch (reason) {
-	case CLOCK_EVT_NOTIFY_BROADCAST_ON:
-		tick_broadcast_enable();
-		break;
-	case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
-		tick_broadcast_disable();
-		break;
-	case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
-		tick_broadcast_force();
-		break;
-
 	case CLOCK_EVT_NOTIFY_BROADCAST_ENTER:
 	case CLOCK_EVT_NOTIFY_BROADCAST_EXIT:
 		ret = tick_broadcast_oneshot_control(reason);
-- 
cgit v1.2.3


From 1fe5d5c3c9ba0c4ade18e3325cba0ffe35127941 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 3 Apr 2015 02:05:15 +0200
Subject: clockevents: Provide explicit broadcast oneshot control functions

clockevents_notify() is a leftover from the early design of the
clockevents facility. It's really not a notification mechanism,
it's a multiplex call. We are way better off to have explicit
calls instead of this monstrosity.

Split out the broadcast oneshot control into a separate function
and provide inline helpers. Switch clockevents_notify() over.
This will go away once all callers are converted.

This also gets rid of the nested locking of clockevents_lock and
broadcast_lock. The broadcast oneshot control functions do not
require clockevents_lock. Only the managing functions
(setup/shutdown/suspend/resume of the broadcast device require
clockevents_lock.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Alexandre Courbot <gnurou@gmail.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Len Brown <lenb@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Warren <swarren@wwwdotorg.org>
Cc: Thierry Reding <thierry.reding@gmail.com>
Cc: Tony Lindgren <tony@atomide.com>
Link: http://lkml.kernel.org/r/13000649.8qZuEDV0OA@vostro.rjw.lan
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/clockevents.c    |  4 +++-
 kernel/time/tick-broadcast.c | 28 +++++++++++++++++-----------
 kernel/time/tick-internal.h  |  2 --
 3 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index dba0b83708b3..7791b1c94ef2 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -656,8 +656,10 @@ int clockevents_notify(unsigned long reason, void *arg)
 
 	switch (reason) {
 	case CLOCK_EVT_NOTIFY_BROADCAST_ENTER:
+		tick_broadcast_enter();
+		break;
 	case CLOCK_EVT_NOTIFY_BROADCAST_EXIT:
-		ret = tick_broadcast_oneshot_control(reason);
+		tick_broadcast_exit();
 		break;
 
 	case CLOCK_EVT_NOTIFY_CPU_DYING:
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 1a0bee04ef8c..55e43f20987a 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -687,18 +687,23 @@ void hotplug_cpu__broadcast_tick_pull(int deadcpu)
 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
 
-/*
- * Powerstate information: The system enters/leaves a state, where
- * affected devices might stop
+/**
+ * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode
+ * @state:	The target state (enter/exit)
+ *
+ * The system enters/leaves a state, where affected devices might stop
  * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups.
+ *
+ * Called with interrupts disabled, so clockevents_lock is not
+ * required here because the local clock event device cannot go away
+ * under us.
  */
-int tick_broadcast_oneshot_control(unsigned long reason)
+int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
 {
 	struct clock_event_device *bc, *dev;
 	struct tick_device *td;
-	unsigned long flags;
-	ktime_t now;
 	int cpu, ret = 0;
+	ktime_t now;
 
 	/*
 	 * Periodic mode does not care about the enter/exit of power
@@ -711,17 +716,17 @@ int tick_broadcast_oneshot_control(unsigned long reason)
 	 * We are called with preemtion disabled from the depth of the
 	 * idle code, so we can't be moved away.
 	 */
-	cpu = smp_processor_id();
-	td = &per_cpu(tick_cpu_device, cpu);
+	td = this_cpu_ptr(&tick_cpu_device);
 	dev = td->evtdev;
 
 	if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
 		return 0;
 
+	raw_spin_lock(&tick_broadcast_lock);
 	bc = tick_broadcast_device.evtdev;
+	cpu = smp_processor_id();
 
-	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
-	if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
+	if (state == TICK_BROADCAST_ENTER) {
 		if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
 			WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
 			broadcast_shutdown_local(bc, dev);
@@ -813,9 +818,10 @@ int tick_broadcast_oneshot_control(unsigned long reason)
 		}
 	}
 out:
-	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+	raw_spin_unlock(&tick_broadcast_lock);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control);
 
 /*
  * Reset the one shot broadcast for a cpu
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 62e331d1bc76..0266f9dbd114 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -117,7 +117,6 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
 /* Functions related to oneshot broadcasting */
 #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
 extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
-extern int tick_broadcast_oneshot_control(unsigned long reason);
 extern void tick_broadcast_switch_to_oneshot(void);
 extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
 extern int tick_broadcast_oneshot_active(void);
@@ -126,7 +125,6 @@ bool tick_broadcast_oneshot_available(void);
 extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
 #else /* !(BROADCAST && ONESHOT): */
 static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
-static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; }
 static inline void tick_broadcast_switch_to_oneshot(void) { }
 static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
 static inline int tick_broadcast_oneshot_active(void) { return 0; }
-- 
cgit v1.2.3


From ffa48c0d76803057ee89bf220305466d74256d7b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 3 Apr 2015 02:36:10 +0200
Subject: clockevents: Remove broadcast oneshot control leftovers

Now that all users are converted over to explicit calls into the
clockevents state machine, remove the notification chain leftovers.

Original-from: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/14018863.NQUzkFuafr@vostro.rjw.lan
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/clockevents.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 7791b1c94ef2..be9abf32c0b9 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -655,13 +655,6 @@ int clockevents_notify(unsigned long reason, void *arg)
 	raw_spin_lock_irqsave(&clockevents_lock, flags);
 
 	switch (reason) {
-	case CLOCK_EVT_NOTIFY_BROADCAST_ENTER:
-		tick_broadcast_enter();
-		break;
-	case CLOCK_EVT_NOTIFY_BROADCAST_EXIT:
-		tick_broadcast_exit();
-		break;
-
 	case CLOCK_EVT_NOTIFY_CPU_DYING:
 		tick_handover_do_timer(arg);
 		break;
-- 
cgit v1.2.3


From 52c063d1adbc16c76e70fffa20727fcd4e9343b3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 3 Apr 2015 02:37:24 +0200
Subject: clockevents: Make tick handover explicit

clockevents_notify() is a leftover from the early design of the
clockevents facility. It's really not a notification mechanism,
it's a multiplex call. We are way better off to have explicit
calls instead of this monstrosity.

Split out the tick_handover call and invoke it explicitely from
the hotplug code. Temporary solution will be cleaned up in later
patches.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[ Rebase ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/1658173.RkEEILFiQZ@vostro.rjw.lan
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/clockevents.c   | 4 ----
 kernel/time/hrtimer.c       | 4 ----
 kernel/time/tick-common.c   | 9 ++++++---
 kernel/time/tick-internal.h | 1 -
 4 files changed, 6 insertions(+), 12 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index be9abf32c0b9..88fb3b96c7cc 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -655,10 +655,6 @@ int clockevents_notify(unsigned long reason, void *arg)
 	raw_spin_lock_irqsave(&clockevents_lock, flags);
 
 	switch (reason) {
-	case CLOCK_EVT_NOTIFY_CPU_DYING:
-		tick_handover_do_timer(arg);
-		break;
-
 	case CLOCK_EVT_NOTIFY_CPU_DEAD:
 		tick_shutdown_broadcast_oneshot(arg);
 		tick_shutdown_broadcast(arg);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 721d29b99d10..6a7a64ec7d1b 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1707,10 +1707,6 @@ static int hrtimer_cpu_notify(struct notifier_block *self,
 		break;
 
 #ifdef CONFIG_HOTPLUG_CPU
-	case CPU_DYING:
-	case CPU_DYING_FROZEN:
-		clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu);
-		break;
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
 	{
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index e28ba5c044c5..055c868f3ec9 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -332,20 +332,23 @@ out_bc:
 	tick_install_broadcast_device(newdev);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 /*
  * Transfer the do_timer job away from a dying cpu.
  *
- * Called with interrupts disabled.
+ * Called with interrupts disabled. Not locking required. If
+ * tick_do_timer_cpu is owned by this cpu, nothing can change it.
  */
-void tick_handover_do_timer(int *cpup)
+void tick_handover_do_timer(void)
 {
-	if (*cpup == tick_do_timer_cpu) {
+	if (tick_do_timer_cpu == smp_processor_id()) {
 		int cpu = cpumask_first(cpu_online_mask);
 
 		tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
 			TICK_DO_TIMER_NONE;
 	}
 }
+#endif
 
 /*
  * Shutdown an event device on a given cpu:
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 0266f9dbd114..aabcb5d00cf2 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -20,7 +20,6 @@ extern int tick_do_timer_cpu __read_mostly;
 extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast);
 extern void tick_handle_periodic(struct clock_event_device *dev);
 extern void tick_check_new_device(struct clock_event_device *dev);
-extern void tick_handover_do_timer(int *cpup);
 extern void tick_shutdown(unsigned int *cpup);
 extern void tick_suspend(void);
 extern void tick_resume(void);
-- 
cgit v1.2.3


From a49b116dcb1265f238f3169507424257b0519069 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 3 Apr 2015 02:38:05 +0200
Subject: clockevents: Cleanup dead cpu explicitely

clockevents_notify() is a leftover from the early design of the
clockevents facility. It's really not a notification mechanism,
it's a multiplex call. We are way better off to have explicit
calls instead of this monstrosity.

Split out the cleanup function for a dead cpu and invoke it
directly from the cpu down code. Make it conditional on
CPU_HOTPLUG as well.

Temporary change, will be refined in the future.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[ Rebased, added clockevents_notify() removal ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1735025.raBZdQHM3m@vostro.rjw.lan
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/clockevents.c    | 51 ++++++++++++++++++--------------------------
 kernel/time/hrtimer.c        |  3 ---
 kernel/time/tick-broadcast.c | 39 ++++++++++++++++-----------------
 kernel/time/tick-common.c    |  6 +++---
 kernel/time/tick-internal.h  | 10 ++++-----
 5 files changed, 49 insertions(+), 60 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 88fb3b96c7cc..25d942d1da27 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -642,49 +642,40 @@ void clockevents_resume(void)
 			dev->resume(dev);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 /**
- * clockevents_notify - notification about relevant events
- * Returns 0 on success, any other value on error
+ * tick_cleanup_dead_cpu - Cleanup the tick and clockevents of a dead cpu
  */
-int clockevents_notify(unsigned long reason, void *arg)
+void tick_cleanup_dead_cpu(int cpu)
 {
 	struct clock_event_device *dev, *tmp;
 	unsigned long flags;
-	int cpu, ret = 0;
 
 	raw_spin_lock_irqsave(&clockevents_lock, flags);
 
-	switch (reason) {
-	case CLOCK_EVT_NOTIFY_CPU_DEAD:
-		tick_shutdown_broadcast_oneshot(arg);
-		tick_shutdown_broadcast(arg);
-		tick_shutdown(arg);
-		/*
-		 * Unregister the clock event devices which were
-		 * released from the users in the notify chain.
-		 */
-		list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
+	tick_shutdown_broadcast_oneshot(cpu);
+	tick_shutdown_broadcast(cpu);
+	tick_shutdown(cpu);
+	/*
+	 * Unregister the clock event devices which were
+	 * released from the users in the notify chain.
+	 */
+	list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
+		list_del(&dev->list);
+	/*
+	 * Now check whether the CPU has left unused per cpu devices
+	 */
+	list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
+		if (cpumask_test_cpu(cpu, dev->cpumask) &&
+		    cpumask_weight(dev->cpumask) == 1 &&
+		    !tick_is_broadcast_device(dev)) {
+			BUG_ON(dev->state != CLOCK_EVT_STATE_DETACHED);
 			list_del(&dev->list);
-		/*
-		 * Now check whether the CPU has left unused per cpu devices
-		 */
-		cpu = *((int *)arg);
-		list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
-			if (cpumask_test_cpu(cpu, dev->cpumask) &&
-			    cpumask_weight(dev->cpumask) == 1 &&
-			    !tick_is_broadcast_device(dev)) {
-				BUG_ON(dev->state != CLOCK_EVT_STATE_DETACHED);
-				list_del(&dev->list);
-			}
 		}
-		break;
-	default:
-		break;
 	}
 	raw_spin_unlock_irqrestore(&clockevents_lock, flags);
-	return ret;
 }
-EXPORT_SYMBOL_GPL(clockevents_notify);
+#endif
 
 #ifdef CONFIG_SYSFS
 struct bus_type clockevents_subsys = {
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 6a7a64ec7d1b..76d4bd962b19 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1709,11 +1709,8 @@ static int hrtimer_cpu_notify(struct notifier_block *self,
 #ifdef CONFIG_HOTPLUG_CPU
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
-	{
-		clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu);
 		migrate_hrtimers(scpu);
 		break;
-	}
 #endif
 
 	default:
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 55e43f20987a..7e8ca4f448a8 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -410,14 +410,14 @@ void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
 		dev->event_handler = tick_handle_periodic_broadcast;
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 /*
  * Remove a CPU from broadcasting
  */
-void tick_shutdown_broadcast(unsigned int *cpup)
+void tick_shutdown_broadcast(unsigned int cpu)
 {
 	struct clock_event_device *bc;
 	unsigned long flags;
-	unsigned int cpu = *cpup;
 
 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 
@@ -432,6 +432,7 @@ void tick_shutdown_broadcast(unsigned int *cpup)
 
 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
+#endif
 
 void tick_suspend_broadcast(void)
 {
@@ -672,21 +673,6 @@ static void broadcast_shutdown_local(struct clock_event_device *bc,
 	clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
 }
 
-void hotplug_cpu__broadcast_tick_pull(int deadcpu)
-{
-	struct clock_event_device *bc;
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
-	bc = tick_broadcast_device.evtdev;
-
-	if (bc && broadcast_needs_cpu(bc, deadcpu)) {
-		/* This moves the broadcast assignment to this CPU: */
-		clockevents_program_event(bc, bc->next_event, 1);
-	}
-	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
-}
-
 /**
  * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode
  * @state:	The target state (enter/exit)
@@ -908,14 +894,28 @@ void tick_broadcast_switch_to_oneshot(void)
 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+void hotplug_cpu__broadcast_tick_pull(int deadcpu)
+{
+	struct clock_event_device *bc;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
+	bc = tick_broadcast_device.evtdev;
+
+	if (bc && broadcast_needs_cpu(bc, deadcpu)) {
+		/* This moves the broadcast assignment to this CPU: */
+		clockevents_program_event(bc, bc->next_event, 1);
+	}
+	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+}
 
 /*
  * Remove a dead CPU from broadcasting
  */
-void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
+void tick_shutdown_broadcast_oneshot(unsigned int cpu)
 {
 	unsigned long flags;
-	unsigned int cpu = *cpup;
 
 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 
@@ -929,6 +929,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
 
 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
+#endif
 
 /*
  * Check, whether the broadcast device is in one shot mode
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 055c868f3ec9..fac3e98fec49 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -348,7 +348,6 @@ void tick_handover_do_timer(void)
 			TICK_DO_TIMER_NONE;
 	}
 }
-#endif
 
 /*
  * Shutdown an event device on a given cpu:
@@ -357,9 +356,9 @@ void tick_handover_do_timer(void)
  * access the hardware device itself.
  * We just set the mode and remove it from the lists.
  */
-void tick_shutdown(unsigned int *cpup)
+void tick_shutdown(unsigned int cpu)
 {
-	struct tick_device *td = &per_cpu(tick_cpu_device, *cpup);
+	struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
 	struct clock_event_device *dev = td->evtdev;
 
 	td->mode = TICKDEV_MODE_PERIODIC;
@@ -375,6 +374,7 @@ void tick_shutdown(unsigned int *cpup)
 		td->evtdev = NULL;
 	}
 }
+#endif
 
 /**
  * tick_suspend_local - Suspend the local tick device
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index aabcb5d00cf2..b64fdd8054c5 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -20,7 +20,7 @@ extern int tick_do_timer_cpu __read_mostly;
 extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast);
 extern void tick_handle_periodic(struct clock_event_device *dev);
 extern void tick_check_new_device(struct clock_event_device *dev);
-extern void tick_shutdown(unsigned int *cpup);
+extern void tick_shutdown(unsigned int cpu);
 extern void tick_suspend(void);
 extern void tick_resume(void);
 extern bool tick_check_replacement(struct clock_event_device *curdev,
@@ -52,7 +52,7 @@ extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
 extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
 extern void tick_install_broadcast_device(struct clock_event_device *dev);
 extern int tick_is_broadcast_device(struct clock_event_device *dev);
-extern void tick_shutdown_broadcast(unsigned int *cpup);
+extern void tick_shutdown_broadcast(unsigned int cpu);
 extern void tick_suspend_broadcast(void);
 extern void tick_resume_broadcast(void);
 extern bool tick_resume_check_broadcast(void);
@@ -66,7 +66,7 @@ static inline void tick_install_broadcast_device(struct clock_event_device *dev)
 static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; }
 static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; }
 static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
-static inline void tick_shutdown_broadcast(unsigned int *cpup) { }
+static inline void tick_shutdown_broadcast(unsigned int cpu) { }
 static inline void tick_suspend_broadcast(void) { }
 static inline void tick_resume_broadcast(void) { }
 static inline bool tick_resume_check_broadcast(void) { return false; }
@@ -117,7 +117,7 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
 #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
 extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
 extern void tick_broadcast_switch_to_oneshot(void);
-extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
+extern void tick_shutdown_broadcast_oneshot(unsigned int cpu);
 extern int tick_broadcast_oneshot_active(void);
 extern void tick_check_oneshot_broadcast_this_cpu(void);
 bool tick_broadcast_oneshot_available(void);
@@ -125,7 +125,7 @@ extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
 #else /* !(BROADCAST && ONESHOT): */
 static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
 static inline void tick_broadcast_switch_to_oneshot(void) { }
-static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
+static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { }
 static inline int tick_broadcast_oneshot_active(void) { return 0; }
 static inline void tick_check_oneshot_broadcast_this_cpu(void) { }
 static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); }
-- 
cgit v1.2.3


From 347c6f6dda1098318088feb8e60188f0161e743d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 3 Apr 2015 02:39:05 +0200
Subject: timekeeping: Get rid of stale comment

Arch specific management of xtime/jiffies/wall_to_monotonic is
gone for quite a while. Zap the stale comment.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: John Stultz <john.stultz@linaro.org>
Link: http://lkml.kernel.org/r/2422730.dmO29q661S@vostro.rjw.lan
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/timekeeping.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 79b9bc6e7876..946acb72179f 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1354,10 +1354,6 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta)
 
 /**
  * timekeeping_resume - Resumes the generic timekeeping subsystem.
- *
- * This is for the generic clocksource timekeeping.
- * xtime/wall_to_monotonic/jiffies/etc are
- * still managed by arch specific suspend/resume code.
  */
 void timekeeping_resume(void)
 {
-- 
cgit v1.2.3


From 422fe7502e3f16dc1c680f22d31f59f022edc10d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 3 Apr 2015 15:21:51 +0200
Subject: timers/PM: Fix up tick_unfreeze()

A recent conflict resolution has left tick_resume() in
tick_unfreeze() which leads to an unbalanced execution of
tick_resume_broadcast() every time that function runs.

Fix that by replacing the tick_resume() in tick_unfreeze()
with tick_resume_local() as appropriate.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: boris.ostrovsky@oracle.com
Cc: david.vrabel@citrix.com
Cc: konrad.wilk@oracle.com
Cc: peterz@infradead.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/8099075.V0LvN3pQAV@vostro.rjw.lan
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/tick-common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/time')

diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index fac3e98fec49..ad66a51ca4fa 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -482,7 +482,7 @@ void tick_unfreeze(void)
 	if (tick_freeze_depth == num_online_cpus())
 		timekeeping_resume();
 	else
-		tick_resume();
+		tick_resume_local();
 
 	tick_freeze_depth--;
 
-- 
cgit v1.2.3


From def747087e83aa5f6a71582cfa71e18341988688 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 3 Apr 2015 15:31:32 +0200
Subject: timers/PM: Drop unnecessary braces from tick_freeze()

Some braces in tick_freeze() are not necessary, so drop them.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: peterz@infradead.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1534128.H5hN3KBFB4@vostro.rjw.lan
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/tick-common.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index ad66a51ca4fa..3ae6afa1eb98 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -457,11 +457,10 @@ void tick_freeze(void)
 	raw_spin_lock(&tick_freeze_lock);
 
 	tick_freeze_depth++;
-	if (tick_freeze_depth == num_online_cpus()) {
+	if (tick_freeze_depth == num_online_cpus())
 		timekeeping_suspend();
-	} else {
+	else
 		tick_suspend_local();
-	}
 
 	raw_spin_unlock(&tick_freeze_lock);
 }
-- 
cgit v1.2.3