From e1e41b6ce5f9c1a80bf4f2404ec5ab11c6c5a2ad Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Mon, 18 Mar 2019 20:55:56 +0100
Subject: timekeeping: Consistently use unsigned int for seqcount snapshot

The timekeeping code uses a random mix of "unsigned long" and "unsigned
int" for the seqcount snapshots (ratio 14:12). Since the seqlock.h API is
entirely based on unsigned int, use that throughout.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Stephen Boyd <sboyd@kernel.org>
Link: https://lkml.kernel.org/r/20190318195557.20773-1-linux@rasmusvillemoes.dk
---
 kernel/time/jiffies.c     |  2 +-
 kernel/time/sched_clock.c |  4 ++--
 kernel/time/tick-common.c |  2 +-
 kernel/time/tick-sched.c  |  3 ++-
 kernel/time/timekeeping.c | 18 +++++++++---------
 5 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index dc1b6f1929f9..95f8f3304c19 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -63,7 +63,7 @@ __cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
 #if (BITS_PER_LONG < 64)
 u64 get_jiffies_64(void)
 {
-	unsigned long seq;
+	unsigned int seq;
 	u64 ret;
 
 	do {
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 094b82ca95e5..16b80c2b4fe8 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -94,7 +94,7 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
 unsigned long long notrace sched_clock(void)
 {
 	u64 cyc, res;
-	unsigned long seq;
+	unsigned int seq;
 	struct clock_read_data *rd;
 
 	do {
@@ -267,7 +267,7 @@ void __init generic_sched_clock_init(void)
  */
 static u64 notrace suspended_sched_clock_read(void)
 {
-	unsigned long seq = raw_read_seqcount(&cd.seq);
+	unsigned int seq = raw_read_seqcount(&cd.seq);
 
 	return cd.read_data[seq & 1].epoch_cyc;
 }
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 529143b4c8d2..561641b2153f 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -149,7 +149,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
 	    !tick_broadcast_oneshot_active()) {
 		clockevents_switch_state(dev, CLOCK_EVT_STATE_PERIODIC);
 	} else {
-		unsigned long seq;
+		unsigned int seq;
 		ktime_t next;
 
 		do {
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 6fa52cd6df0b..b50f6f22c88e 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -645,7 +645,8 @@ static inline bool local_timer_softirq_pending(void)
 static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 {
 	u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
-	unsigned long seq, basejiff;
+	unsigned long basejiff;
+	unsigned int seq;
 
 	/* Read jiffies and the time when jiffies were updated last */
 	do {
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f986e1918d12..540145da33da 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -720,7 +720,7 @@ static void timekeeping_forward_now(struct timekeeper *tk)
 void ktime_get_real_ts64(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	unsigned long seq;
+	unsigned int seq;
 	u64 nsecs;
 
 	WARN_ON(timekeeping_suspended);
@@ -829,7 +829,7 @@ EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset);
 ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs)
 {
 	ktime_t *offset = offsets[offs];
-	unsigned long seq;
+	unsigned int seq;
 	ktime_t tconv;
 
 	do {
@@ -960,7 +960,7 @@ time64_t __ktime_get_real_seconds(void)
 void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	unsigned long seq;
+	unsigned int seq;
 	ktime_t base_raw;
 	ktime_t base_real;
 	u64 nsec_raw;
@@ -1122,7 +1122,7 @@ int get_device_system_crosststamp(int (*get_time_fn)
 	ktime_t base_real, base_raw;
 	u64 nsec_real, nsec_raw;
 	u8 cs_was_changed_seq;
-	unsigned long seq;
+	unsigned int seq;
 	bool do_interp;
 	int ret;
 
@@ -1409,7 +1409,7 @@ int timekeeping_notify(struct clocksource *clock)
 void ktime_get_raw_ts64(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	unsigned long seq;
+	unsigned int seq;
 	u64 nsecs;
 
 	do {
@@ -1431,7 +1431,7 @@ EXPORT_SYMBOL(ktime_get_raw_ts64);
 int timekeeping_valid_for_hres(void)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	unsigned long seq;
+	unsigned int seq;
 	int ret;
 
 	do {
@@ -1450,7 +1450,7 @@ int timekeeping_valid_for_hres(void)
 u64 timekeeping_max_deferment(void)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	unsigned long seq;
+	unsigned int seq;
 	u64 ret;
 
 	do {
@@ -2150,7 +2150,7 @@ EXPORT_SYMBOL_GPL(getboottime64);
 void ktime_get_coarse_real_ts64(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	unsigned long seq;
+	unsigned int seq;
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
@@ -2164,7 +2164,7 @@ void ktime_get_coarse_ts64(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	struct timespec64 now, mono;
-	unsigned long seq;
+	unsigned int seq;
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-- 
cgit v1.2.3


From 1b72d43237980eab9b6ae6bb8181e51c840377e6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 21 Mar 2019 16:39:20 +0100
Subject: tick: Remove outgoing CPU from broadcast masks

Valentin reported that unplugging a CPU occasionally results in a warning
in the tick broadcast code which is triggered when an offline CPU is in the
broadcast mask.

This happens because the outgoing CPU is not removing itself from the
broadcast masks, especially not from the broadcast_force_mask. The removal
happens on the control CPU after the outgoing CPU is dead. It's a long
standing issue, but the warning is harmless.

Rework the hotplug mechanism so that the outgoing CPU removes itself from
the broadcast masks after disabling interrupts and removing itself from the
online mask.

Reported-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Valentin Schneider <valentin.schneider@arm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1903211540180.1784@nanos.tec.linutronix.de
---
 kernel/time/clockevents.c    | 18 ++++++++++++++++--
 kernel/time/tick-broadcast.c | 40 +++++++++++++++++++---------------------
 kernel/time/tick-internal.h  | 10 ++++++----
 3 files changed, 41 insertions(+), 27 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 5e77662dd2d9..f5490222e134 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -611,6 +611,22 @@ void clockevents_resume(void)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
+
+# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+/**
+ * tick_offline_cpu - Take CPU out of the broadcast mechanism
+ * @cpu:	The outgoing CPU
+ *
+ * Called on the outgoing CPU after it took itself offline.
+ */
+void tick_offline_cpu(unsigned int cpu)
+{
+	raw_spin_lock(&clockevents_lock);
+	tick_broadcast_offline(cpu);
+	raw_spin_unlock(&clockevents_lock);
+}
+# endif
+
 /**
  * tick_cleanup_dead_cpu - Cleanup the tick and clockevents of a dead cpu
  */
@@ -621,8 +637,6 @@ void tick_cleanup_dead_cpu(int cpu)
 
 	raw_spin_lock_irqsave(&clockevents_lock, flags);
 
-	tick_shutdown_broadcast_oneshot(cpu);
-	tick_shutdown_broadcast(cpu);
 	tick_shutdown(cpu);
 	/*
 	 * Unregister the clock event devices which were
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index ee834d4fb814..0283523de045 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -36,10 +36,12 @@ static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
 static void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
 static void tick_broadcast_clear_oneshot(int cpu);
 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
+static void tick_broadcast_oneshot_offline(unsigned int cpu);
 #else
 static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
 static inline void tick_broadcast_clear_oneshot(int cpu) { }
 static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
+static inline void tick_broadcast_oneshot_offline(unsigned int cpu) { }
 #endif
 
 /*
@@ -433,27 +435,29 @@ void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-/*
- * Remove a CPU from broadcasting
- */
-void tick_shutdown_broadcast(unsigned int cpu)
+static void tick_shutdown_broadcast(void)
 {
-	struct clock_event_device *bc;
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
-
-	bc = tick_broadcast_device.evtdev;
-	cpumask_clear_cpu(cpu, tick_broadcast_mask);
-	cpumask_clear_cpu(cpu, tick_broadcast_on);
+	struct clock_event_device *bc = tick_broadcast_device.evtdev;
 
 	if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
 		if (bc && cpumask_empty(tick_broadcast_mask))
 			clockevents_shutdown(bc);
 	}
+}
 
-	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+/*
+ * Remove a CPU from broadcasting
+ */
+void tick_broadcast_offline(unsigned int cpu)
+{
+	raw_spin_lock(&tick_broadcast_lock);
+	cpumask_clear_cpu(cpu, tick_broadcast_mask);
+	cpumask_clear_cpu(cpu, tick_broadcast_on);
+	tick_broadcast_oneshot_offline(cpu);
+	tick_shutdown_broadcast();
+	raw_spin_unlock(&tick_broadcast_lock);
 }
+
 #endif
 
 void tick_suspend_broadcast(void)
@@ -950,14 +954,10 @@ void hotplug_cpu__broadcast_tick_pull(int deadcpu)
 }
 
 /*
- * Remove a dead CPU from broadcasting
+ * Remove a dying CPU from broadcasting
  */
-void tick_shutdown_broadcast_oneshot(unsigned int cpu)
+static void tick_broadcast_oneshot_offline(unsigned int cpu)
 {
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
-
 	/*
 	 * Clear the broadcast masks for the dead cpu, but do not stop
 	 * the broadcast device!
@@ -965,8 +965,6 @@ void tick_shutdown_broadcast_oneshot(unsigned int cpu)
 	cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
 	cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
 	cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
-
-	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
 #endif
 
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index e277284c2831..7b2496136729 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -64,7 +64,6 @@ extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
 extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
 extern void tick_install_broadcast_device(struct clock_event_device *dev);
 extern int tick_is_broadcast_device(struct clock_event_device *dev);
-extern void tick_shutdown_broadcast(unsigned int cpu);
 extern void tick_suspend_broadcast(void);
 extern void tick_resume_broadcast(void);
 extern bool tick_resume_check_broadcast(void);
@@ -78,7 +77,6 @@ static inline void tick_install_broadcast_device(struct clock_event_device *dev)
 static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; }
 static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; }
 static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
-static inline void tick_shutdown_broadcast(unsigned int cpu) { }
 static inline void tick_suspend_broadcast(void) { }
 static inline void tick_resume_broadcast(void) { }
 static inline bool tick_resume_check_broadcast(void) { return false; }
@@ -128,19 +126,23 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
 /* Functions related to oneshot broadcasting */
 #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
 extern void tick_broadcast_switch_to_oneshot(void);
-extern void tick_shutdown_broadcast_oneshot(unsigned int cpu);
 extern int tick_broadcast_oneshot_active(void);
 extern void tick_check_oneshot_broadcast_this_cpu(void);
 bool tick_broadcast_oneshot_available(void);
 extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
 #else /* !(BROADCAST && ONESHOT): */
 static inline void tick_broadcast_switch_to_oneshot(void) { }
-static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { }
 static inline int tick_broadcast_oneshot_active(void) { return 0; }
 static inline void tick_check_oneshot_broadcast_this_cpu(void) { }
 static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); }
 #endif /* !(BROADCAST && ONESHOT) */
 
+#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_HOTPLUG_CPU)
+extern void tick_broadcast_offline(unsigned int cpu);
+#else
+static inline void tick_broadcast_offline(unsigned int cpu) { }
+#endif
+
 /* NO_HZ_FULL internal */
 #ifdef CONFIG_NO_HZ_FULL
 extern void tick_nohz_init(void);
-- 
cgit v1.2.3


From d6b87eaf10bd061914f6d277d7428b3285d8850e Mon Sep 17 00:00:00 2001
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Thu, 21 Mar 2019 13:09:18 +0100
Subject: tick/sched: Update tick_sched struct documentation

Adapt the documentation order of struct members to the effective order of
struct members and add missing descriptions.

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: fweisbec@gmail.com
Cc: peterz@infradead.org
Link: https://lkml.kernel.org/r/20190321120921.16463-2-anna-maria@linutronix.de
---
 kernel/time/tick-sched.h | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 6de959a854b2..4fb06527cf64 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -24,12 +24,19 @@ enum tick_nohz_mode {
  * struct tick_sched - sched tick emulation and no idle tick control/stats
  * @sched_timer:	hrtimer to schedule the periodic tick in high
  *			resolution mode
+ * @check_clocks:	Notification mechanism about clocksource changes
+ * @nohz_mode:		Mode - one state of tick_nohz_mode
+ * @inidle:		Indicator that the CPU is in the tick idle mode
+ * @tick_stopped:	Indicator that the idle tick has been stopped
+ * @idle_active:	Indicator that the CPU is actively in the tick idle mode;
+ *			it is resetted during irq handling phases.
+ * @do_timer_lst:	CPU was the last one doing do_timer before going idle
+ * @got_idle_tick:	Tick timer function has run with @inidle set
  * @last_tick:		Store the last tick expiry time when the tick
  *			timer is modified for nohz sleeps. This is necessary
  *			to resume the tick timer operation in the timeline
  *			when the CPU returns from nohz sleep.
  * @next_tick:		Next tick to be fired when in dynticks mode.
- * @tick_stopped:	Indicator that the idle tick has been stopped
  * @idle_jiffies:	jiffies at the entry to idle for idle time accounting
  * @idle_calls:		Total number of idle calls
  * @idle_sleeps:	Number of idle calls, where the sched tick was stopped
@@ -40,8 +47,8 @@ enum tick_nohz_mode {
  * @iowait_sleeptime:	Sum of the time slept in idle with sched tick stopped, with IO outstanding
  * @timer_expires:	Anticipated timer expiration time (in case sched tick is stopped)
  * @timer_expires_base:	Base time clock monotonic for @timer_expires
- * @do_timer_lst:	CPU was the last one doing do_timer before going idle
- * @got_idle_tick:	Tick timer function has run with @inidle set
+ * @next_timer:		Expiry time of next expiring timer for debugging purpose only
+ * @tick_dep_mask:	Tick dependency mask - is set, if someone needs the tick
  */
 struct tick_sched {
 	struct hrtimer			sched_timer;
-- 
cgit v1.2.3


From dc1e7dc5ac6254ba0502323381a7ec847e408f1d Mon Sep 17 00:00:00 2001
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Thu, 21 Mar 2019 13:09:19 +0100
Subject: timer: Move trace point to get proper index

When placing the timer_start trace point before the timer wheel bucket
index is calculated, the index information in the trace point is useless.

It is not possible to simply move the debug_activate() call after the index
calculation, because debug_object_activate() needs to be called before
touching the object.

Therefore split debug_activate() and move the trace point into
enqueue_timer() after the new index has been calculated. The
debug_object_activate() call remains at the original place.

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: fweisbec@gmail.com
Cc: peterz@infradead.org
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: https://lkml.kernel.org/r/20190321120921.16463-3-anna-maria@linutronix.de
---
 kernel/time/timer.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 2fce056f8a49..8d7918ae4d0c 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -536,6 +536,8 @@ static void enqueue_timer(struct timer_base *base, struct timer_list *timer,
 	hlist_add_head(&timer->entry, base->vectors + idx);
 	__set_bit(idx, base->pending_map);
 	timer_set_idx(timer, idx);
+
+	trace_timer_start(timer, timer->expires, timer->flags);
 }
 
 static void
@@ -757,13 +759,6 @@ static inline void debug_init(struct timer_list *timer)
 	trace_timer_init(timer);
 }
 
-static inline void
-debug_activate(struct timer_list *timer, unsigned long expires)
-{
-	debug_timer_activate(timer);
-	trace_timer_start(timer, expires, timer->flags);
-}
-
 static inline void debug_deactivate(struct timer_list *timer)
 {
 	debug_timer_deactivate(timer);
@@ -1037,7 +1032,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option
 		}
 	}
 
-	debug_activate(timer, expires);
+	debug_timer_activate(timer);
 
 	timer->expires = expires;
 	/*
@@ -1171,7 +1166,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
 	}
 	forward_timer_base(base);
 
-	debug_activate(timer, timer->expires);
+	debug_timer_activate(timer);
 	internal_add_timer(base, timer);
 	raw_spin_unlock_irqrestore(&base->lock, flags);
 }
-- 
cgit v1.2.3


From f28d3d5346e97e60c81f933ac89ccf015430e5cf Mon Sep 17 00:00:00 2001
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Thu, 21 Mar 2019 13:09:21 +0100
Subject: timer/trace: Improve timer tracing

Timers are added to the timer wheel off by one. This is required in
case a timer is queued directly before incrementing jiffies to prevent
early timer expiry.

When reading a timer trace and relying only on the expiry time of the timer
in the timer_start trace point and on the now in the timer_expiry_entry
trace point, it seems that the timer fires late. With the current
timer_expiry_entry trace point information only now=jiffies is printed but
not the value of base->clk. This makes it impossible to draw a conclusion
to the index of base->clk and makes it impossible to examine timer problems
without additional trace points.

Therefore add the base->clk value to the timer_expire_entry trace
point, to be able to calculate the index the timer base is located at
during collecting expired timers.

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: fweisbec@gmail.com
Cc: peterz@infradead.org
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: https://lkml.kernel.org/r/20190321120921.16463-5-anna-maria@linutronix.de
---
 kernel/time/timer.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 8d7918ae4d0c..a9b1bbc2d88d 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1293,7 +1293,9 @@ int del_timer_sync(struct timer_list *timer)
 EXPORT_SYMBOL(del_timer_sync);
 #endif
 
-static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list *))
+static void call_timer_fn(struct timer_list *timer,
+			  void (*fn)(struct timer_list *),
+			  unsigned long baseclk)
 {
 	int count = preempt_count();
 
@@ -1316,7 +1318,7 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list
 	 */
 	lock_map_acquire(&lockdep_map);
 
-	trace_timer_expire_entry(timer);
+	trace_timer_expire_entry(timer, baseclk);
 	fn(timer);
 	trace_timer_expire_exit(timer);
 
@@ -1337,6 +1339,13 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list
 
 static void expire_timers(struct timer_base *base, struct hlist_head *head)
 {
+	/*
+	 * This value is required only for tracing. base->clk was
+	 * incremented directly before expire_timers was called. But expiry
+	 * is related to the old base->clk value.
+	 */
+	unsigned long baseclk = base->clk - 1;
+
 	while (!hlist_empty(head)) {
 		struct timer_list *timer;
 		void (*fn)(struct timer_list *);
@@ -1350,11 +1359,11 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head)
 
 		if (timer->flags & TIMER_IRQSAFE) {
 			raw_spin_unlock(&base->lock);
-			call_timer_fn(timer, fn);
+			call_timer_fn(timer, fn, baseclk);
 			raw_spin_lock(&base->lock);
 		} else {
 			raw_spin_unlock_irq(&base->lock);
-			call_timer_fn(timer, fn);
+			call_timer_fn(timer, fn, baseclk);
 			raw_spin_lock_irq(&base->lock);
 		}
 	}
-- 
cgit v1.2.3


From 7a8e61f8478639072d402a26789055a4a4de8f77 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 23 Mar 2019 11:36:19 +0100
Subject: timekeeping: Force upper bound for setting CLOCK_REALTIME

Several people reported testing failures after setting CLOCK_REALTIME close
to the limits of the kernel internal representation in nanoseconds,
i.e. year 2262.

The failures are exposed in subsequent operations, i.e. when arming timers
or when the advancing CLOCK_MONOTONIC makes the calculation of
CLOCK_REALTIME overflow into negative space.

Now people start to paper over the underlying problem by clamping
calculations to the valid range, but that's just wrong because such
workarounds will prevent detection of real issues as well.

It is reasonable to force an upper bound for the various methods of setting
CLOCK_REALTIME. Year 2262 is the absolute upper bound. Assume a maximum
uptime of 30 years which is plenty enough even for esoteric embedded
systems. That results in an upper bound of year 2232 for setting the time.

Once that limit is reached in reality this limit is only a small part of
the problem space. But until then this stops people from trying to paper
over the problem at the wrong places.

Reported-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Reported-by: Hongbo Yao <yaohongbo@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: Miroslav Lichvar <mlichvar@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1903231125480.2157@nanos.tec.linutronix.de
---
 kernel/time/time.c        | 2 +-
 kernel/time/timekeeping.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/time.c b/kernel/time/time.c
index c3f756f8534b..86656bbac232 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -171,7 +171,7 @@ int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz
 	static int firsttime = 1;
 	int error = 0;
 
-	if (tv && !timespec64_valid(tv))
+	if (tv && !timespec64_valid_settod(tv))
 		return -EINVAL;
 
 	error = security_settime64(tv, tz);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 540145da33da..5716e28bfa3c 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1221,7 +1221,7 @@ int do_settimeofday64(const struct timespec64 *ts)
 	unsigned long flags;
 	int ret = 0;
 
-	if (!timespec64_valid_strict(ts))
+	if (!timespec64_valid_settod(ts))
 		return -EINVAL;
 
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
@@ -1278,7 +1278,7 @@ static int timekeeping_inject_offset(const struct timespec64 *ts)
 	/* Make sure the proposed value is valid */
 	tmp = timespec64_add(tk_xtime(tk), *ts);
 	if (timespec64_compare(&tk->wall_to_monotonic, ts) > 0 ||
-	    !timespec64_valid_strict(&tmp)) {
+	    !timespec64_valid_settod(&tmp)) {
 		ret = -EINVAL;
 		goto error;
 	}
@@ -1527,7 +1527,7 @@ void __init timekeeping_init(void)
 	unsigned long flags;
 
 	read_persistent_wall_and_boot_offset(&wall_time, &boot_offset);
-	if (timespec64_valid_strict(&wall_time) &&
+	if (timespec64_valid_settod(&wall_time) &&
 	    timespec64_to_ns(&wall_time) > 0) {
 		persistent_clock_exists = true;
 	} else if (timespec64_to_ns(&wall_time) != 0) {
-- 
cgit v1.2.3


From aba0954327c831f593702e3a81ef3ad4bec7a838 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Fri, 29 Mar 2019 11:28:52 +0100
Subject: tick/broadcast: Fix warning about undefined
 tick_broadcast_oneshot_offline()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Randconfig builds with

  CONFIG_TICK_ONESHOT=y
  CONFIG_HOTPLUG_CPU=n

trigger

  kernel/time/tick-broadcast.c:39:13: warning: ‘tick_broadcast_oneshot_offline’ \
	  declared ‘static’ but never defined [-Wunused-function]

due to that function's definition missing.

Move the CONFIG_HOTPLUG_CPU ifdeffery around its declaration too.

Fixes: 1b72d4323798 ("tick: Remove outgoing CPU from broadcast masks")
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mukesh Ojha <mojha@codeaurora.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: x86@kernel.org
Link: https://lkml.kernel.org/r/20190329110508.6621-1-bp@alien8.de
---
 kernel/time/tick-broadcast.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'kernel/time')

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 0283523de045..7541cbca695e 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -36,12 +36,16 @@ static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
 static void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
 static void tick_broadcast_clear_oneshot(int cpu);
 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
+# ifdef CONFIG_HOTPLUG_CPU
 static void tick_broadcast_oneshot_offline(unsigned int cpu);
+# endif
 #else
 static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
 static inline void tick_broadcast_clear_oneshot(int cpu) { }
 static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
+# ifdef CONFIG_HOTPLUG_CPU
 static inline void tick_broadcast_oneshot_offline(unsigned int cpu) { }
+# endif
 #endif
 
 /*
-- 
cgit v1.2.3


From 3b15d09f7e6db44065aaba5fd16dc7420035c5ad Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Thu, 28 Feb 2019 13:13:26 +0800
Subject: time: Introduce jiffies64_to_msecs()

there is a similar helper in net/netfilter/nf_tables_api.c,
this maybe become a common request someday, so move it to
time.c

Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Acked-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 kernel/time/time.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'kernel/time')

diff --git a/kernel/time/time.c b/kernel/time/time.c
index c3f756f8534b..9e3f79d4f5a8 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -783,6 +783,16 @@ u64 jiffies64_to_nsecs(u64 j)
 }
 EXPORT_SYMBOL(jiffies64_to_nsecs);
 
+u64 jiffies64_to_msecs(const u64 j)
+{
+#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
+	return (MSEC_PER_SEC / HZ) * j;
+#else
+	return div_u64(j * HZ_TO_MSEC_NUM, HZ_TO_MSEC_DEN);
+#endif
+}
+EXPORT_SYMBOL(jiffies64_to_msecs);
+
 /**
  * nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64
  *
-- 
cgit v1.2.3


From d75f773c86a2b8b7278e2c33343b46a4024bc002 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Mon, 25 Mar 2019 21:32:28 +0200
Subject: treewide: Switch printk users from %pf and %pF to %ps and %pS,
 respectively

%pF and %pf are functionally equivalent to %pS and %ps conversion
specifiers. The former are deprecated, therefore switch the current users
to use the preferred variant.

The changes have been produced by the following command:

	git grep -l '%p[fF]' | grep -v '^\(tools\|Documentation\)/' | \
	while read i; do perl -i -pe 's/%pf/%ps/g; s/%pF/%pS/g;' $i; done

And verifying the result.

Link: http://lkml.kernel.org/r/20190325193229.23390-1-sakari.ailus@linux.intel.com
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: sparclinux@vger.kernel.org
Cc: linux-um@lists.infradead.org
Cc: xen-devel@lists.xenproject.org
Cc: linux-acpi@vger.kernel.org
Cc: linux-pm@vger.kernel.org
Cc: drbd-dev@lists.linbit.com
Cc: linux-block@vger.kernel.org
Cc: linux-mmc@vger.kernel.org
Cc: linux-nvdimm@lists.01.org
Cc: linux-pci@vger.kernel.org
Cc: linux-scsi@vger.kernel.org
Cc: linux-btrfs@vger.kernel.org
Cc: linux-f2fs-devel@lists.sourceforge.net
Cc: linux-mm@kvack.org
Cc: ceph-devel@vger.kernel.org
Cc: netdev@vger.kernel.org
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: David Sterba <dsterba@suse.com> (for btrfs)
Acked-by: Mike Rapoport <rppt@linux.ibm.com> (for mm/memblock.c)
Acked-by: Bjorn Helgaas <bhelgaas@google.com> (for drivers/pci)
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 kernel/time/sched_clock.c | 2 +-
 kernel/time/timer.c       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 094b82ca95e5..1002cf61700a 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -231,7 +231,7 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
 	if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
 		enable_sched_clock_irqtime();
 
-	pr_debug("Registered %pF as sched_clock source\n", read);
+	pr_debug("Registered %pS as sched_clock source\n", read);
 }
 
 void __init generic_sched_clock_init(void)
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 2fce056f8a49..6502c3ed317e 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1328,7 +1328,7 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list
 	lock_map_release(&lockdep_map);
 
 	if (count != preempt_count()) {
-		WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
+		WARN_ONCE(1, "timer: %pS preempt leak: %08x -> %08x\n",
 			  fn, count, preempt_count());
 		/*
 		 * Restore the preempt count. That gives us a decent
-- 
cgit v1.2.3


From 6f9b83ac877fb5558d76b9f78590f3afd1bdf421 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 27 Mar 2019 15:35:47 +0100
Subject: cpuidle: Export the next timer expiration for CPUs

To be able to predict the sleep duration for a CPU entering idle, it
is essential to know the expiration time of the next timer.  Both the
teo and the menu cpuidle governors already use this information for
CPU idle state selection.

Moving forward, a similar prediction needs to be made for a group of
idle CPUs rather than for a single one and the following changes
implement a new genpd governor for that purpose.

In order to support that feature, add a new function called
tick_nohz_get_next_hrtimer() that will return the next hrtimer
expiration time of a given CPU to be invoked after deciding
whether or not to stop the scheduler tick on that CPU.

Make the cpuidle core call tick_nohz_get_next_hrtimer() right
before invoking the ->enter() callback provided by the cpuidle
driver for the given state and store its return value in the
per-CPU struct cpuidle_device, so as to make it available to code
outside of cpuidle.

Note that at the point when cpuidle calls tick_nohz_get_next_hrtimer(),
the governor's ->select() callback has already returned and indicated
whether or not the tick should be stopped, so in fact the value
returned by tick_nohz_get_next_hrtimer() always is the next hrtimer
expiration time for the given CPU, possibly including the tick (if
it hasn't been stopped).

Co-developed-by: Lina Iyer <lina.iyer@linaro.org>
Co-developed-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
[ rjw: Subject & changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/time/tick-sched.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'kernel/time')

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 6fa52cd6df0b..8d18e03124ff 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1022,6 +1022,18 @@ bool tick_nohz_idle_got_tick(void)
 	return false;
 }
 
+/**
+ * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer
+ * or the tick, whatever that expires first. Note that, if the tick has been
+ * stopped, it returns the next hrtimer.
+ *
+ * Called from power state control code with interrupts disabled
+ */
+ktime_t tick_nohz_get_next_hrtimer(void)
+{
+	return __this_cpu_read(tick_cpu_device.evtdev)->next_event;
+}
+
 /**
  * tick_nohz_get_sleep_length - return the expected length of the current sleep
  * @delta_next: duration until the next event if the tick cannot be stopped
-- 
cgit v1.2.3


From 2d87a0674bd60d855e4008e2d84f5b23d7cb9b7d Mon Sep 17 00:00:00 2001
From: Ondrej Mosnacek <omosnace@redhat.com>
Date: Wed, 10 Apr 2019 11:14:19 +0200
Subject: timekeeping: Audit clock adjustments

Emit an audit record whenever the system clock is changed (i.e. shifted
by a non-zero offset) by a syscall from userspace. The syscalls than can
(at the time of writing) trigger such record are:
  - settimeofday(2), stime(2), clock_settime(2) -- via
    do_settimeofday64()
  - adjtimex(2), clock_adjtime(2) -- via do_adjtimex()

The new records have type AUDIT_TIME_INJOFFSET and contain the following
fields:
  - sec -- the 'seconds' part of the offset
  - nsec -- the 'nanoseconds' part of the offset

Example record (time was shifted backwards by ~15.875 seconds):

type=TIME_INJOFFSET msg=audit(1530616049.652:13): sec=-16 nsec=124887145

The records of this type will be associated with the corresponding
syscall records.

Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Reviewed-by: Richard Guy Briggs <rgb@redhat.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
[PM: fixed a line width problem in __audit_tk_injoffset()]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 kernel/time/timekeeping.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f986e1918d12..3d24be4cd607 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -21,6 +21,7 @@
 #include <linux/stop_machine.h>
 #include <linux/pvclock_gtod.h>
 #include <linux/compiler.h>
+#include <linux/audit.h>
 
 #include "tick-internal.h"
 #include "ntp_internal.h"
@@ -1250,6 +1251,9 @@ out:
 	/* signal hrtimers about time change */
 	clock_was_set();
 
+	if (!ret)
+		audit_tk_injoffset(ts_delta);
+
 	return ret;
 }
 EXPORT_SYMBOL(do_settimeofday64);
@@ -2322,6 +2326,8 @@ int do_adjtimex(struct __kernel_timex *txc)
 		ret = timekeeping_inject_offset(&delta);
 		if (ret)
 			return ret;
+
+		audit_tk_injoffset(delta);
 	}
 
 	ktime_get_real_ts64(&ts);
-- 
cgit v1.2.3


From 7e8eda734d30de81d06a949c9bf9853c445ede4e Mon Sep 17 00:00:00 2001
From: Ondrej Mosnacek <omosnace@redhat.com>
Date: Wed, 10 Apr 2019 11:14:20 +0200
Subject: ntp: Audit NTP parameters adjustment

Emit an audit record every time selected NTP parameters are modified
from userspace (via adjtimex(2) or clock_adjtime(2)). These parameters
may be used to indirectly change system clock, and thus their
modifications should be audited.

Such events will now generate records of type AUDIT_TIME_ADJNTPVAL
containing the following fields:
  - op -- which value was adjusted:
    - offset -- corresponding to the time_offset variable
    - freq   -- corresponding to the time_freq variable
    - status -- corresponding to the time_status variable
    - adjust -- corresponding to the time_adjust variable
    - tick   -- corresponding to the tick_usec variable
    - tai    -- corresponding to the timekeeping's TAI offset
  - old -- the old value
  - new -- the new value

Example records:

type=TIME_ADJNTPVAL msg=audit(1530616044.507:7): op=status old=64 new=8256
type=TIME_ADJNTPVAL msg=audit(1530616044.511:11): op=freq old=0 new=49180377088000

The records of this type will be associated with the corresponding
syscall records.

An overview of parameter changes that can be done via do_adjtimex()
(based on information from Miroslav Lichvar) and whether they are
audited:
  __timekeeping_set_tai_offset() -- sets the offset from the
                                    International Atomic Time
                                    (AUDITED)
  NTP variables:
    time_offset -- can adjust the clock by up to 0.5 seconds per call
                   and also speed it up or slow down by up to about
                   0.05% (43 seconds per day) (AUDITED)
    time_freq -- can speed up or slow down by up to about 0.05%
                 (AUDITED)
    time_status -- can insert/delete leap seconds and it also enables/
                   disables synchronization of the hardware real-time
                   clock (AUDITED)
    time_maxerror, time_esterror -- change error estimates used to
                                    inform userspace applications
                                    (NOT AUDITED)
    time_constant -- controls the speed of the clock adjustments that
                     are made when time_offset is set (NOT AUDITED)
    time_adjust -- can temporarily speed up or slow down the clock by up
                   to 0.05% (AUDITED)
    tick_usec -- a more extreme version of time_freq; can speed up or
                 slow down the clock by up to 10% (AUDITED)

Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Reviewed-by: Richard Guy Briggs <rgb@redhat.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 kernel/time/ntp.c          | 22 +++++++++++++++++++---
 kernel/time/ntp_internal.h |  4 +++-
 kernel/time/timekeeping.c  |  7 ++++++-
 3 files changed, 28 insertions(+), 5 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 92a90014a925..ac5555e25733 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -17,6 +17,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/rtc.h>
+#include <linux/audit.h>
 
 #include "ntp_internal.h"
 #include "timekeeping_internal.h"
@@ -709,7 +710,7 @@ static inline void process_adjtimex_modes(const struct __kernel_timex *txc,
  * kernel time-keeping variables. used by xntpd.
  */
 int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts,
-		  s32 *time_tai)
+		  s32 *time_tai, struct audit_ntp_data *ad)
 {
 	int result;
 
@@ -720,14 +721,29 @@ int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts,
 			/* adjtime() is independent from ntp_adjtime() */
 			time_adjust = txc->offset;
 			ntp_update_frequency();
+
+			audit_ntp_set_old(ad, AUDIT_NTP_ADJUST,	save_adjust);
+			audit_ntp_set_new(ad, AUDIT_NTP_ADJUST,	time_adjust);
 		}
 		txc->offset = save_adjust;
 	} else {
-
 		/* If there are input parameters, then process them: */
-		if (txc->modes)
+		if (txc->modes) {
+			audit_ntp_set_old(ad, AUDIT_NTP_OFFSET,	time_offset);
+			audit_ntp_set_old(ad, AUDIT_NTP_FREQ,	time_freq);
+			audit_ntp_set_old(ad, AUDIT_NTP_STATUS,	time_status);
+			audit_ntp_set_old(ad, AUDIT_NTP_TAI,	*time_tai);
+			audit_ntp_set_old(ad, AUDIT_NTP_TICK,	tick_usec);
+
 			process_adjtimex_modes(txc, time_tai);
 
+			audit_ntp_set_new(ad, AUDIT_NTP_OFFSET,	time_offset);
+			audit_ntp_set_new(ad, AUDIT_NTP_FREQ,	time_freq);
+			audit_ntp_set_new(ad, AUDIT_NTP_STATUS,	time_status);
+			audit_ntp_set_new(ad, AUDIT_NTP_TAI,	*time_tai);
+			audit_ntp_set_new(ad, AUDIT_NTP_TICK,	tick_usec);
+		}
+
 		txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
 				  NTP_SCALE_SHIFT);
 		if (!(time_status & STA_NANO))
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index 40e6122e634e..908ecaa65fc3 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -8,6 +8,8 @@ extern void ntp_clear(void);
 extern u64 ntp_tick_length(void);
 extern ktime_t ntp_get_next_leap(void);
 extern int second_overflow(time64_t secs);
-extern int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts, s32 *time_tai);
+extern int __do_adjtimex(struct __kernel_timex *txc,
+			 const struct timespec64 *ts,
+			 s32 *time_tai, struct audit_ntp_data *ad);
 extern void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts);
 #endif /* _LINUX_NTP_INTERNAL_H */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3d24be4cd607..f366f2fdf1b0 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -2307,6 +2307,7 @@ static int timekeeping_validate_timex(const struct __kernel_timex *txc)
 int do_adjtimex(struct __kernel_timex *txc)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
+	struct audit_ntp_data ad;
 	unsigned long flags;
 	struct timespec64 ts;
 	s32 orig_tai, tai;
@@ -2330,13 +2331,15 @@ int do_adjtimex(struct __kernel_timex *txc)
 		audit_tk_injoffset(delta);
 	}
 
+	audit_ntp_init(&ad);
+
 	ktime_get_real_ts64(&ts);
 
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
 	write_seqcount_begin(&tk_core.seq);
 
 	orig_tai = tai = tk->tai_offset;
-	ret = __do_adjtimex(txc, &ts, &tai);
+	ret = __do_adjtimex(txc, &ts, &tai, &ad);
 
 	if (tai != orig_tai) {
 		__timekeeping_set_tai_offset(tk, tai);
@@ -2347,6 +2350,8 @@ int do_adjtimex(struct __kernel_timex *txc)
 	write_seqcount_end(&tk_core.seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
+	audit_ntp_log(&ad);
+
 	/* Update the multiplier immediately if frequency was set directly */
 	if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK))
 		timekeeping_advance(TK_ADV_FREQ);
-- 
cgit v1.2.3


From 13e792a19d4e3a1c64e94197ba357685fd584ded Mon Sep 17 00:00:00 2001
From: Laurent Gauthier <laurent.gauthier@soccasys.com>
Date: Sat, 5 Jan 2019 00:07:45 +0100
Subject: tick: Fix typos in comments

Signed-off-by: Laurent Gauthier <laurent.gauthier@soccasys.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/tick-broadcast.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 7541cbca695e..e51778c312f1 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -809,13 +809,13 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
 			 * either the CPU handling the broadcast
 			 * interrupt or we got woken by something else.
 			 *
-			 * We are not longer in the broadcast mask, so
+			 * We are no longer in the broadcast mask, so
 			 * if the cpu local expiry time is already
 			 * reached, we would reprogram the cpu local
 			 * timer with an already expired event.
 			 *
 			 * This can lead to a ping-pong when we return
-			 * to idle and therefor rearm the broadcast
+			 * to idle and therefore rearm the broadcast
 			 * timer before the cpu local timer was able
 			 * to fire. This happens because the forced
 			 * reprogramming makes sure that the event
-- 
cgit v1.2.3


From 08ae95f4fd3b38b257f5dc7e6507e071c27ba0d5 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 11 Apr 2019 13:34:48 +1000
Subject: nohz_full: Allow the boot CPU to be nohz_full

Allow the boot CPU/CPU0 to be nohz_full. Have the boot CPU take the
do_timer duty during boot until a housekeeping CPU can take over.

This is supported when CONFIG_PM_SLEEP_SMP is not configured, or when
it is configured and the arch allows suspend on non-zero CPUs.

nohz_full has been trialed at a large supercomputer site and found to
significantly reduce jitter. In order to deploy it in production, they
need CPU0 to be nohz_full because their job control system requires
the application CPUs to start from 0, and the housekeeping CPUs are
placed higher. An equivalent job scheduling that uses CPU0 for
housekeeping could be achieved by modifying their system, but it is
preferable if nohz_full can support their environment without
modification.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J . Wysocki <rafael.j.wysocki@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linuxppc-dev@lists.ozlabs.org
Link: https://lkml.kernel.org/r/20190411033448.20842-6-npiggin@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/tick-common.c | 50 +++++++++++++++++++++++++++++++++++++++++++----
 kernel/time/tick-sched.c  | 34 ++++++++++++++++++++++----------
 2 files changed, 70 insertions(+), 14 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index df401463a191..e49e8091f9ac 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -46,6 +46,14 @@ ktime_t tick_period;
  *    procedure also covers cpu hotplug.
  */
 int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
+#ifdef CONFIG_NO_HZ_FULL
+/*
+ * tick_do_timer_boot_cpu indicates the boot CPU temporarily owns
+ * tick_do_timer_cpu and it should be taken over by an eligible secondary
+ * when one comes online.
+ */
+static int tick_do_timer_boot_cpu __read_mostly = -1;
+#endif
 
 /*
  * Debugging: see timer_list.c
@@ -167,6 +175,26 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
 	}
 }
 
+#ifdef CONFIG_NO_HZ_FULL
+static void giveup_do_timer(void *info)
+{
+	int cpu = *(unsigned int *)info;
+
+	WARN_ON(tick_do_timer_cpu != smp_processor_id());
+
+	tick_do_timer_cpu = cpu;
+}
+
+static void tick_take_do_timer_from_boot(void)
+{
+	int cpu = smp_processor_id();
+	int from = tick_do_timer_boot_cpu;
+
+	if (from >= 0 && from != cpu)
+		smp_call_function_single(from, giveup_do_timer, &cpu, 1);
+}
+#endif
+
 /*
  * Setup the tick device
  */
@@ -186,12 +214,26 @@ static void tick_setup_device(struct tick_device *td,
 		 * this cpu:
 		 */
 		if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
-			if (!tick_nohz_full_cpu(cpu))
-				tick_do_timer_cpu = cpu;
-			else
-				tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+			tick_do_timer_cpu = cpu;
+
 			tick_next_period = ktime_get();
 			tick_period = NSEC_PER_SEC / HZ;
+#ifdef CONFIG_NO_HZ_FULL
+			/*
+			 * The boot CPU may be nohz_full, in which case set
+			 * tick_do_timer_boot_cpu so the first housekeeping
+			 * secondary that comes up will take do_timer from
+			 * us.
+			 */
+			if (tick_nohz_full_cpu(cpu))
+				tick_do_timer_boot_cpu = cpu;
+
+		} else if (tick_do_timer_boot_cpu != -1 &&
+						!tick_nohz_full_cpu(cpu)) {
+			tick_take_do_timer_from_boot();
+			tick_do_timer_boot_cpu = -1;
+			WARN_ON(tick_do_timer_cpu != cpu);
+#endif
 		}
 
 		/*
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 6fa52cd6df0b..4aa917acbe1c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -121,10 +121,16 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
 	 * into a long sleep. If two CPUs happen to assign themselves to
 	 * this duty, then the jiffies update is still serialized by
 	 * jiffies_lock.
+	 *
+	 * If nohz_full is enabled, this should not happen because the
+	 * tick_do_timer_cpu never relinquishes.
 	 */
-	if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)
-	    && !tick_nohz_full_cpu(cpu))
+	if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
+#ifdef CONFIG_NO_HZ_FULL
+		WARN_ON(tick_nohz_full_running);
+#endif
 		tick_do_timer_cpu = cpu;
+	}
 #endif
 
 	/* Check, if the jiffies need an update */
@@ -395,8 +401,8 @@ void __init tick_nohz_full_setup(cpumask_var_t cpumask)
 static int tick_nohz_cpu_down(unsigned int cpu)
 {
 	/*
-	 * The boot CPU handles housekeeping duty (unbound timers,
-	 * workqueues, timekeeping, ...) on behalf of full dynticks
+	 * The tick_do_timer_cpu CPU handles housekeeping duty (unbound
+	 * timers, workqueues, timekeeping, ...) on behalf of full dynticks
 	 * CPUs. It must remain online when nohz full is enabled.
 	 */
 	if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
@@ -423,12 +429,15 @@ void __init tick_nohz_init(void)
 		return;
 	}
 
-	cpu = smp_processor_id();
+	if (IS_ENABLED(CONFIG_PM_SLEEP_SMP) &&
+			!IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) {
+		cpu = smp_processor_id();
 
-	if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
-		pr_warn("NO_HZ: Clearing %d from nohz_full range for timekeeping\n",
-			cpu);
-		cpumask_clear_cpu(cpu, tick_nohz_full_mask);
+		if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
+			pr_warn("NO_HZ: Clearing %d from nohz_full range "
+				"for timekeeping\n", cpu);
+			cpumask_clear_cpu(cpu, tick_nohz_full_mask);
+		}
 	}
 
 	for_each_cpu(cpu, tick_nohz_full_mask)
@@ -904,8 +913,13 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
 		/*
 		 * Boot safety: make sure the timekeeping duty has been
 		 * assigned before entering dyntick-idle mode,
+		 * tick_do_timer_cpu is TICK_DO_TIMER_BOOT
 		 */
-		if (tick_do_timer_cpu == TICK_DO_TIMER_NONE)
+		if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_BOOT))
+			return false;
+
+		/* Should not happen for nohz-full */
+		if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
 			return false;
 	}
 
-- 
cgit v1.2.3


From fdc6bae940ee9eb869e493990540098b8c0fd6ab Mon Sep 17 00:00:00 2001
From: Miroslav Lichvar <mlichvar@redhat.com>
Date: Wed, 17 Apr 2019 10:48:33 +0200
Subject: ntp: Allow TAI-UTC offset to be set to zero

The ADJ_TAI adjtimex mode sets the TAI-UTC offset of the system clock.
It is typically set by NTP/PTP implementations and it is automatically
updated by the kernel on leap seconds. The initial value is zero (which
applications may interpret as unknown), but this value cannot be set by
adjtimex. This limitation seems to go back to the original "nanokernel"
implementation by David Mills.

Change the ADJ_TAI check to accept zero as a valid TAI-UTC offset in
order to allow setting it back to the initial value.

Fixes: 153b5d054ac2 ("ntp: support for TAI")
Suggested-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Miroslav Lichvar <mlichvar@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Link: https://lkml.kernel.org/r/20190417084833.7401-1-mlichvar@redhat.com
---
 kernel/time/ntp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/time')

diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 92a90014a925..f43d47c8c3b6 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -690,7 +690,7 @@ static inline void process_adjtimex_modes(const struct __kernel_timex *txc,
 		time_constant = max(time_constant, 0l);
 	}
 
-	if (txc->modes & ADJ_TAI && txc->constant > 0)
+	if (txc->modes & ADJ_TAI && txc->constant >= 0)
 		*time_tai = txc->constant;
 
 	if (txc->modes & ADJ_OFFSET)
-- 
cgit v1.2.3


From ec8f24b7faaf3d4799a7c3f4c1b87f6b02778ad1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 19 May 2019 13:07:45 +0100
Subject: treewide: Add SPDX license identifier - Makefile/Kconfig

Add SPDX license identifiers to all Make/Kconfig files which:

 - Have no license information of any form

These files fall under the project license, GPL v2 only. The resulting SPDX
license identifier is:

  GPL-2.0-only

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/time/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel/time')

diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index e2c038d6c13c..fcc42353f125 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Timer subsystem related configuration options
 #
-- 
cgit v1.2.3


From e3ff9c3678b4d80e22d2557b68726174578eaf52 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 13 Jun 2019 21:40:45 +0200
Subject: timekeeping: Repair ktime_get_coarse*() granularity

Jason reported that the coarse ktime based time getters advance only once
per second and not once per tick as advertised.

The code reads only the monotonic base time, which advances once per
second. The nanoseconds are accumulated on every tick in xtime_nsec up to
a second and the regular time getters take this nanoseconds offset into
account, but the ktime_get_coarse*() implementation fails to do so.

Add the accumulated xtime_nsec value to the monotonic base time to get the
proper per tick advancing coarse tinme.

Fixes: b9ff604cff11 ("timekeeping: Add ktime_get_coarse_with_offset")
Reported-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: Sultan Alsawaf <sultan@kerneltoast.com>
Cc: Waiman Long <longman@redhat.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1906132136280.1791@nanos.tec.linutronix.de
---
 kernel/time/timekeeping.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 85f5912d8f70..44b726bab4bd 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -808,17 +808,18 @@ ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs)
 	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned int seq;
 	ktime_t base, *offset = offsets[offs];
+	u64 nsecs;
 
 	WARN_ON(timekeeping_suspended);
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 		base = ktime_add(tk->tkr_mono.base, *offset);
+		nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
-	return base;
-
+	return base + nsecs;
 }
 EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset);
 
-- 
cgit v1.2.3