19 files changed, 148 insertions, 74 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 2845623fb582..6ac894244d39 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1101,7 +1101,7 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
 	cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
 	if (cpu_limit != RLIM_INFINITY) {
 		sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
-		sig->cputimer.running = 1;
+		sig->cputimer.running = true;
 	}
 
 	/* The timer lists. */
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 95354bb07a14..6b0c0b74a2a1 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -228,11 +228,7 @@ static void msi_domain_update_chip_ops(struct msi_domain_info *info)
 {
 	struct irq_chip *chip = info->chip;
 
-	BUG_ON(!chip);
-	if (!chip->irq_mask)
-		chip->irq_mask = pci_msi_mask_irq;
-	if (!chip->irq_unmask)
-		chip->irq_unmask = pci_msi_unmask_irq;
+	BUG_ON(!chip || !chip->irq_mask || !chip->irq_unmask);
 	if (!chip->irq_set_affinity)
 		chip->irq_set_affinity = msi_domain_set_affinity;
 }
diff --git a/kernel/kmod.c b/kernel/kmod.c
index da98d0593de2..0277d1216f80 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -327,9 +327,13 @@ static void call_usermodehelper_exec_work(struct work_struct *work)
 		call_usermodehelper_exec_sync(sub_info);
 	} else {
 		pid_t pid;
-
+		/*
+		 * Use CLONE_PARENT to reparent it to kthreadd; we do not
+		 * want to pollute current->children, and we need a parent
+		 * that always ignores SIGCHLD to ensure auto-reaping.
+		 */
 		pid = kernel_thread(call_usermodehelper_exec_async, sub_info,
-				    SIGCHLD);
+				    CLONE_PARENT | SIGCHLD);
 		if (pid < 0) {
 			sub_info->retval = pid;
 			umh_complete(sub_info);
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 72b0c66628b6..9d6b55587eaa 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -24,6 +24,16 @@ __weak void __iomem *ioremap_cache(resource_size_t offset, unsigned long size)
 }
 #endif
 
+static void *try_ram_remap(resource_size_t offset, size_t size)
+{
+	struct page *page = pfn_to_page(offset >> PAGE_SHIFT);
+
+	/* In the simple case just return the existing linear address */
+	if (!PageHighMem(page))
+		return __va(offset);
+	return NULL; /* fallback to ioremap_cache */
+}
+
 /**
  * memremap() - remap an iomem_resource as cacheable memory
  * @offset: iomem resource start address
@@ -66,8 +76,8 @@ void *memremap(resource_size_t offset, size_t size, unsigned long flags)
 		 * the requested range is potentially in "System RAM"
 		 */
 		if (is_ram == REGION_INTERSECTS)
-			addr = __va(offset);
-		else
+			addr = try_ram_remap(offset, size);
+		if (!addr)
 			addr = ioremap_cache(offset, size);
 	}
 
diff --git a/kernel/module.c b/kernel/module.c
index b86b7bf1be38..8f051a106676 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1063,11 +1063,15 @@ void symbol_put_addr(void *addr)
 	if (core_kernel_text(a))
 		return;
 
-	/* module_text_address is safe here: we're supposed to have reference
-	 * to module from symbol_get, so it can't go away. */
+	/*
+	 * Even though we hold a reference on the module; we still need to
+	 * disable preemption in order to safely traverse the data structure.
+	 */
+	preempt_disable();
 	modaddr = __module_text_address(a);
 	BUG_ON(!modaddr);
 	module_put(modaddr);
+	preempt_enable();
 }
 EXPORT_SYMBOL_GPL(symbol_put_addr);
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 10a8faa1b0d4..bcd214e4b4d6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2366,8 +2366,15 @@ void wake_up_new_task(struct task_struct *p)
 	trace_sched_wakeup_new(p);
 	check_preempt_curr(rq, p, WF_FORK);
 #ifdef CONFIG_SMP
-	if (p->sched_class->task_woken)
+	if (p->sched_class->task_woken) {
+		/*
+		 * Nothing relies on rq->lock after this, so its fine to
+		 * drop it.
+		 */
+		lockdep_unpin_lock(&rq->lock);
 		p->sched_class->task_woken(rq, p);
+		lockdep_pin_lock(&rq->lock);
+	}
 #endif
 	task_rq_unlock(rq, p, &flags);
 }
@@ -7238,9 +7245,6 @@ void __init sched_init_smp(void)
 	alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
 	alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
 
-	/* nohz_full won't take effect without isolating the cpus. */
-	tick_nohz_full_add_cpus_to(cpu_isolated_map);
-
 	sched_init_numa();
 
 	/*
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index fc8f01083527..8b0a15e285f9 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -668,8 +668,15 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
 	 * Queueing this task back might have overloaded rq, check if we need
 	 * to kick someone away.
 	 */
-	if (has_pushable_dl_tasks(rq))
+	if (has_pushable_dl_tasks(rq)) {
+		/*
+		 * Nothing relies on rq->lock after this, so its safe to drop
+		 * rq->lock.
+		 */
+		lockdep_unpin_lock(&rq->lock);
 		push_dl_task(rq);
+		lockdep_pin_lock(&rq->lock);
+	}
 #endif
 
 unlock:
@@ -1066,8 +1073,9 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
 		int target = find_later_rq(p);
 
 		if (target != -1 &&
-				dl_time_before(p->dl.deadline,
-					cpu_rq(target)->dl.earliest_dl.curr))
+				(dl_time_before(p->dl.deadline,
+					cpu_rq(target)->dl.earliest_dl.curr) ||
+				(cpu_rq(target)->dl.dl_nr_running == 0)))
 			cpu = target;
 	}
 	rcu_read_unlock();
@@ -1417,7 +1425,8 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
 
 		later_rq = cpu_rq(cpu);
 
-		if (!dl_time_before(task->dl.deadline,
+		if (later_rq->dl.dl_nr_running &&
+		    !dl_time_before(task->dl.deadline,
 					later_rq->dl.earliest_dl.curr)) {
 			/*
 			 * Target rq has tasks of equal or earlier deadline,
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6e2e3483b1ec..9a5e60fe721a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2363,7 +2363,7 @@ static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq)
 	 */
 	tg_weight = atomic_long_read(&tg->load_avg);
 	tg_weight -= cfs_rq->tg_load_avg_contrib;
-	tg_weight += cfs_rq_load_avg(cfs_rq);
+	tg_weight += cfs_rq->load.weight;
 
 	return tg_weight;
 }
@@ -2373,7 +2373,7 @@ static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg)
 	long tg_weight, load, shares;
 
 	tg_weight = calc_tg_weight(tg, cfs_rq);
-	load = cfs_rq_load_avg(cfs_rq);
+	load = cfs_rq->load.weight;
 
 	shares = (tg->shares * load);
 	if (tg_weight)
@@ -2664,13 +2664,14 @@ static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
 /* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
 static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
 {
-	int decayed;
 	struct sched_avg *sa = &cfs_rq->avg;
+	int decayed, removed = 0;
 
 	if (atomic_long_read(&cfs_rq->removed_load_avg)) {
 		long r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
 		sa->load_avg = max_t(long, sa->load_avg - r, 0);
 		sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0);
+		removed = 1;
 	}
 
 	if (atomic_long_read(&cfs_rq->removed_util_avg)) {
@@ -2688,7 +2689,7 @@ static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
 	cfs_rq->load_last_update_time_copy = sa->last_update_time;
 #endif
 
-	return decayed;
+	return decayed || removed;
 }
 
 /* Update task and its cfs_rq load average */
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 8f177c73ae19..4a2ef5a02fd3 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -57,9 +57,11 @@ static inline int cpu_idle_poll(void)
 	rcu_idle_enter();
 	trace_cpu_idle_rcuidle(0, smp_processor_id());
 	local_irq_enable();
+	stop_critical_timings();
 	while (!tif_need_resched() &&
 		(cpu_idle_force_poll || tick_check_broadcast_expired()))
 		cpu_relax();
+	start_critical_timings();
 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
 	rcu_idle_exit();
 	return 1;
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 3a38775b50c2..0d8fe8b8f727 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -479,7 +479,7 @@ static u32 clocksource_max_adjustment(struct clocksource *cs)
  * return half the number of nanoseconds the hardware counter can technically
  * cover. This is done so that we can potentially detect problems caused by
  * delayed timers or bad hardware, which might result in time intervals that
- * are larger then what the math used can handle without overflows.
+ * are larger than what the math used can handle without overflows.
  */
 u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)
 {
@@ -595,16 +595,15 @@ static void __clocksource_select(bool skipcur)
  */
 static void clocksource_select(void)
 {
-	return __clocksource_select(false);
+	__clocksource_select(false);
 }
 
 static void clocksource_select_fallback(void)
 {
-	return __clocksource_select(true);
+	__clocksource_select(true);
 }
 
 #else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */
-
 static inline void clocksource_select(void) { }
 static inline void clocksource_select_fallback(void) { }
 
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 457a373e2181..435b8850dd80 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -59,7 +59,7 @@
 /*
  * The timer bases:
  *
- * There are more clockids then hrtimer bases. Thus, we index
+ * There are more clockids than hrtimer bases. Thus, we index
  * into the timer bases by the hrtimer_base_type enum. When trying
  * to reach a base using a clockid, hrtimer_clockid_to_base()
  * is used to convert from clockid to the proper hrtimer_base_type.
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index df68cb875248..149cc8086aea 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -99,7 +99,7 @@ static time64_t			ntp_next_leap_sec = TIME64_MAX;
 static int pps_valid;		/* signal watchdog counter */
 static long pps_tf[3];		/* phase median filter */
 static long pps_jitter;		/* current jitter (ns) */
-static struct timespec pps_fbase; /* beginning of the last freq interval */
+static struct timespec64 pps_fbase; /* beginning of the last freq interval */
 static int pps_shift;		/* current interval duration (s) (shift) */
 static int pps_intcnt;		/* interval counter */
 static s64 pps_freq;		/* frequency offset (scaled ns/s) */
@@ -509,7 +509,7 @@ static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock);
 static void sync_cmos_clock(struct work_struct *work)
 {
 	struct timespec64 now;
-	struct timespec next;
+	struct timespec64 next;
 	int fail = 1;
 
 	/*
@@ -559,7 +559,7 @@ static void sync_cmos_clock(struct work_struct *work)
 		next.tv_nsec -= NSEC_PER_SEC;
 	}
 	queue_delayed_work(system_power_efficient_wq,
-			   &sync_cmos_work, timespec_to_jiffies(&next));
+			   &sync_cmos_work, timespec64_to_jiffies(&next));
 }
 
 void ntp_notify_cmos_timer(void)
@@ -773,13 +773,13 @@ int __do_adjtimex(struct timex *txc, struct timespec64 *ts, s32 *time_tai)
  * pps_normtime.nsec has a range of ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ]
  * while timespec.tv_nsec has a range of [0, NSEC_PER_SEC) */
 struct pps_normtime {
-	__kernel_time_t	sec;	/* seconds */
+	s64		sec;	/* seconds */
 	long		nsec;	/* nanoseconds */
 };
 
 /* normalize the timestamp so that nsec is in the
    ( -NSEC_PER_SEC / 2, NSEC_PER_SEC / 2 ] interval */
-static inline struct pps_normtime pps_normalize_ts(struct timespec ts)
+static inline struct pps_normtime pps_normalize_ts(struct timespec64 ts)
 {
 	struct pps_normtime norm = {
 		.sec = ts.tv_sec,
@@ -861,7 +861,7 @@ static long hardpps_update_freq(struct pps_normtime freq_norm)
 		pps_errcnt++;
 		pps_dec_freq_interval();
 		printk_deferred(KERN_ERR
-			"hardpps: PPSERROR: interval too long - %ld s\n",
+			"hardpps: PPSERROR: interval too long - %lld s\n",
 			freq_norm.sec);
 		return 0;
 	}
@@ -948,7 +948,7 @@ static void hardpps_update_phase(long error)
  * This code is based on David Mills's reference nanokernel
  * implementation. It was mostly rewritten but keeps the same idea.
  */
-void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
+void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts)
 {
 	struct pps_normtime pts_norm, freq_norm;
 
@@ -969,7 +969,7 @@ void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
 	}
 
 	/* ok, now we have a base for frequency calculation */
-	freq_norm = pps_normalize_ts(timespec_sub(*raw_ts, pps_fbase));
+	freq_norm = pps_normalize_ts(timespec64_sub(*raw_ts, pps_fbase));
 
 	/* check that the signal is in the range
 	 * [1s - MAXFREQ us, 1s + MAXFREQ us], otherwise reject it */
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index 65430504ca26..af924470eac0 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -9,5 +9,5 @@ extern ktime_t ntp_get_next_leap(void);
 extern int second_overflow(unsigned long secs);
 extern int ntp_validate_timex(struct timex *);
 extern int __do_adjtimex(struct timex *, struct timespec64 *, s32 *);
-extern void __hardpps(const struct timespec *, const struct timespec *);
+extern void __hardpps(const struct timespec64 *, const struct timespec64 *);
 #endif /* _LINUX_NTP_INTERNAL_H */
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 892e3dae0aac..f5e86d282d52 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -249,7 +249,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
 		 * but barriers are not required because update_gt_cputime()
 		 * can handle concurrent updates.
 		 */
-		WRITE_ONCE(cputimer->running, 1);
+		WRITE_ONCE(cputimer->running, true);
 	}
 	sample_cputime_atomic(times, &cputimer->cputime_atomic);
 }
@@ -864,6 +864,13 @@ static void check_thread_timers(struct task_struct *tsk,
 	unsigned long long expires;
 	unsigned long soft;
 
+	/*
+	 * If cputime_expires is zero, then there are no active
+	 * per thread CPU timers.
+	 */
+	if (task_cputime_zero(&tsk->cputime_expires))
+		return;
+
 	expires = check_timers_list(timers, firing, prof_ticks(tsk));
 	tsk_expires->prof_exp = expires_to_cputime(expires);
 
@@ -911,7 +918,7 @@ static inline void stop_process_timers(struct signal_struct *sig)
 	struct thread_group_cputimer *cputimer = &sig->cputimer;
 
 	/* Turn off cputimer->running. This is done without locking. */
-	WRITE_ONCE(cputimer->running, 0);
+	WRITE_ONCE(cputimer->running, false);
 }
 
 static u32 onecputick;
@@ -962,6 +969,19 @@ static void check_process_timers(struct task_struct *tsk,
 	unsigned long soft;
 
 	/*
+	 * If cputimer is not running, then there are no active
+	 * process wide timers (POSIX 1.b, itimers, RLIMIT_CPU).
+	 */
+	if (!READ_ONCE(tsk->signal->cputimer.running))
+		return;
+
+        /*
+	 * Signify that a thread is checking for process timers.
+	 * Write access to this field is protected by the sighand lock.
+	 */
+	sig->cputimer.checking_timer = true;
+
+	/*
 	 * Collect the current process totals.
 	 */
 	thread_group_cputimer(tsk, &cputime);
@@ -1015,6 +1035,8 @@ static void check_process_timers(struct task_struct *tsk,
 	sig->cputime_expires.sched_exp = sched_expires;
 	if (task_cputime_zero(&sig->cputime_expires))
 		stop_process_timers(sig);
+
+	sig->cputimer.checking_timer = false;
 }
 
 /*
@@ -1117,24 +1139,33 @@ static inline int task_cputime_expired(const struct task_cputime *sample,
 static inline int fastpath_timer_check(struct task_struct *tsk)
 {
 	struct signal_struct *sig;
-	cputime_t utime, stime;
-
-	task_cputime(tsk, &utime, &stime);
 
 	if (!task_cputime_zero(&tsk->cputime_expires)) {
-		struct task_cputime task_sample = {
-			.utime = utime,
-			.stime = stime,
-			.sum_exec_runtime = tsk->se.sum_exec_runtime
-		};
+		struct task_cputime task_sample;
 
+		task_cputime(tsk, &task_sample.utime, &task_sample.stime);
+		task_sample.sum_exec_runtime = tsk->se.sum_exec_runtime;
 		if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
 			return 1;
 	}
 
 	sig = tsk->signal;
-	/* Check if cputimer is running. This is accessed without locking. */
-	if (READ_ONCE(sig->cputimer.running)) {
+	/*
+	 * Check if thread group timers expired when the cputimer is
+	 * running and no other thread in the group is already checking
+	 * for thread group cputimers. These fields are read without the
+	 * sighand lock. However, this is fine because this is meant to
+	 * be a fastpath heuristic to determine whether we should try to
+	 * acquire the sighand lock to check/handle timers.
+	 *
+	 * In the worst case scenario, if 'running' or 'checking_timer' gets
+	 * set but the current thread doesn't see the change yet, we'll wait
+	 * until the next thread in the group gets a scheduler interrupt to
+	 * handle the timer. This isn't an issue in practice because these
+	 * types of delays with signals actually getting sent are expected.
+	 */
+	if (READ_ONCE(sig->cputimer.running) &&
+	    !READ_ONCE(sig->cputimer.checking_timer)) {
 		struct task_cputime group_sample;
 
 		sample_cputime_atomic(&group_sample, &sig->cputimer.cputime_atomic);
@@ -1174,12 +1205,8 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 	 * put them on the firing list.
 	 */
 	check_thread_timers(tsk, &firing);
-	/*
-	 * If there are any active process wide timers (POSIX 1.b, itimers,
-	 * RLIMIT_CPU) cputimer must be running.
-	 */
-	if (READ_ONCE(tsk->signal->cputimer.running))
-		check_process_timers(tsk, &firing);
+
+	check_process_timers(tsk, &firing);
 
 	/*
 	 * We must release these locks before taking any timer's lock.
diff --git a/kernel/time/timeconst.bc b/kernel/time/timeconst.bc
index c7388dee8635..c48688904f9f 100644
--- a/kernel/time/timeconst.bc
+++ b/kernel/time/timeconst.bc
@@ -39,7 +39,7 @@ define fmuls(b,n,d) {
 }
 
 define timeconst(hz) {
-	print "/* Automatically generated by kernel/timeconst.bc */\n"
+	print "/* Automatically generated by kernel/time/timeconst.bc */\n"
 	print "/* Time conversion constants for HZ == ", hz, " */\n"
 	print "\n"
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3739ac6aa473..b1356b7ae570 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -849,7 +849,7 @@ EXPORT_SYMBOL_GPL(ktime_get_real_seconds);
 #ifdef CONFIG_NTP_PPS
 
 /**
- * getnstime_raw_and_real - get day and raw monotonic time in timespec format
+ * ktime_get_raw_and_real_ts64 - get day and raw monotonic time in timespec format
  * @ts_raw:	pointer to the timespec to be set to raw monotonic time
  * @ts_real:	pointer to the timespec to be set to the time of day
  *
@@ -857,7 +857,7 @@ EXPORT_SYMBOL_GPL(ktime_get_real_seconds);
  * same time atomically and stores the resulting timestamps in timespec
  * format.
  */
-void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
+void ktime_get_raw_and_real_ts64(struct timespec64 *ts_raw, struct timespec64 *ts_real)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long seq;
@@ -868,7 +868,7 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
 
-		*ts_raw = timespec64_to_timespec(tk->raw_time);
+		*ts_raw = tk->raw_time;
 		ts_real->tv_sec = tk->xtime_sec;
 		ts_real->tv_nsec = 0;
 
@@ -877,10 +877,10 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
 
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
-	timespec_add_ns(ts_raw, nsecs_raw);
-	timespec_add_ns(ts_real, nsecs_real);
+	timespec64_add_ns(ts_raw, nsecs_raw);
+	timespec64_add_ns(ts_real, nsecs_real);
 }
-EXPORT_SYMBOL(getnstime_raw_and_real);
+EXPORT_SYMBOL(ktime_get_raw_and_real_ts64);
 
 #endif /* CONFIG_NTP_PPS */
 
@@ -1251,7 +1251,7 @@ void __init timekeeping_init(void)
 	set_normalized_timespec64(&tmp, -boot.tv_sec, -boot.tv_nsec);
 	tk_set_wall_to_mono(tk, tmp);
 
-	timekeeping_update(tk, TK_MIRROR);
+	timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
 
 	write_seqcount_end(&tk_core.seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -1674,7 +1674,7 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
 /**
  * accumulate_nsecs_to_secs - Accumulates nsecs into secs
  *
- * Helper function that accumulates a the nsecs greater then a second
+ * Helper function that accumulates the nsecs greater than a second
  * from the xtime_nsec field to the xtime_secs field.
  * It also calls into the NTP code to handle leapsecond processing.
  *
@@ -1726,7 +1726,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
 	cycle_t interval = tk->cycle_interval << shift;
 	u64 raw_nsecs;
 
-	/* If the offset is smaller then a shifted interval, do nothing */
+	/* If the offset is smaller than a shifted interval, do nothing */
 	if (offset < interval)
 		return offset;
 
@@ -2025,7 +2025,7 @@ int do_adjtimex(struct timex *txc)
 /**
  * hardpps() - Accessor function to NTP __hardpps function
  */
-void hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts)
+void hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts)
 {
 	unsigned long flags;
 
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 84190f02b521..74591ba9474f 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -461,10 +461,17 @@ void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr)
 
 static void timer_stats_account_timer(struct timer_list *timer)
 {
-	if (likely(!timer->start_site))
+	void *site;
+
+	/*
+	 * start_site can be concurrently reset by
+	 * timer_stats_timer_clear_start_info()
+	 */
+	site = READ_ONCE(timer->start_site);
+	if (likely(!site))
 		return;
 
-	timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
+	timer_stats_update_stats(timer, timer->start_pid, site,
 				 timer->function, timer->start_comm,
 				 timer->flags);
 }
@@ -867,7 +874,7 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
 	if (mask == 0)
 		return expires;
 
-	bit = find_last_bit(&mask, BITS_PER_LONG);
+	bit = __fls(mask);
 
 	mask = (1UL << bit) - 1;
 
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index b746399ab59c..8abf1ba18085 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -85,9 +85,19 @@ check_stack(unsigned long ip, unsigned long *stack)
 	if (!object_is_on_stack(stack))
 		return;
 
+	/* Can't do this from NMI context (can cause deadlocks) */
+	if (in_nmi())
+		return;
+
 	local_irq_save(flags);
 	arch_spin_lock(&max_stack_lock);
 
+	/*
+	 * RCU may not be watching, make it see us.
+	 * The stack trace code uses rcu_sched.
+	 */
+	rcu_irq_enter();
+
 	/* In case another CPU set the tracer_frame on us */
 	if (unlikely(!frame_size))
 		this_size -= tracer_frame;
@@ -169,6 +179,7 @@ check_stack(unsigned long ip, unsigned long *stack)
 	}
 
  out:
+	rcu_irq_exit();
 	arch_spin_unlock(&max_stack_lock);
 	local_irq_restore(flags);
 }
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ca71582fcfab..bcb14cafe007 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1458,13 +1458,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
 	timer_stats_timer_set_start_info(&dwork->timer);
 
 	dwork->wq = wq;
+	/* timer isn't guaranteed to run in this cpu, record earlier */
+	if (cpu == WORK_CPU_UNBOUND)
+		cpu = raw_smp_processor_id();
 	dwork->cpu = cpu;
 	timer->expires = jiffies + delay;
 
-	if (unlikely(cpu != WORK_CPU_UNBOUND))
-		add_timer_on(timer, cpu);
-	else
-		add_timer(timer);
+	add_timer_on(timer, cpu);
 }
 
 /**