From a1ee1932aa6bea0bb074f5e3ced112664e4637ed Mon Sep 17 00:00:00 2001
From: Joshua Hunt <johunt@akamai.com>
Date: Thu, 17 Mar 2016 14:17:23 -0700
Subject: watchdog: don't run proc_watchdog_update if new value is same as old

While working on a script to restore all sysctl params before a series of
tests I found that writing any value into the
/proc/sys/kernel/{nmi_watchdog,soft_watchdog,watchdog,watchdog_thresh}
causes them to call proc_watchdog_update().

  NMI watchdog: enabled on all CPUs, permanently consumes one hw-PMU counter.
  NMI watchdog: enabled on all CPUs, permanently consumes one hw-PMU counter.
  NMI watchdog: enabled on all CPUs, permanently consumes one hw-PMU counter.
  NMI watchdog: enabled on all CPUs, permanently consumes one hw-PMU counter.

There doesn't appear to be a reason for doing this work every time a write
occurs, so only do it when the values change.

Signed-off-by: Josh Hunt <johunt@akamai.com>
Acked-by: Don Zickus <dzickus@redhat.com>
Reviewed-by: Aaron Tomlin <atomlin@redhat.com>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Cc: <stable@vger.kernel.org>	[4.1.x+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/watchdog.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index b3ace6ebbba3..9acb29f280ec 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -923,6 +923,9 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write,
 		 * both lockup detectors are disabled if proc_watchdog_update()
 		 * returns an error.
 		 */
+		if (old == new)
+			goto out;
+
 		err = proc_watchdog_update();
 	}
 out:
@@ -967,7 +970,7 @@ int proc_soft_watchdog(struct ctl_table *table, int write,
 int proc_watchdog_thresh(struct ctl_table *table, int write,
 			 void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	int err, old;
+	int err, old, new;
 
 	get_online_cpus();
 	mutex_lock(&watchdog_proc_mutex);
@@ -987,6 +990,10 @@ int proc_watchdog_thresh(struct ctl_table *table, int write,
 	/*
 	 * Update the sample period. Restore on failure.
 	 */
+	new = ACCESS_ONCE(watchdog_thresh);
+	if (old == new)
+		goto out;
+
 	set_sample_period();
 	err = proc_watchdog_update();
 	if (err) {
-- 
cgit v1.2.3


From 12580e4b54ba8a1b22ec977c200be0174ca42348 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@virtuozzo.com>
Date: Thu, 17 Mar 2016 14:17:38 -0700
Subject: mm: memcontrol: report kernel stack usage in cgroup2 memory.stat

Show how much memory is allocated to kernel stacks.

Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/fork.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/fork.c b/kernel/fork.c
index 2e391c754ae7..accb7221d547 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -164,12 +164,20 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
 	struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP,
 						  THREAD_SIZE_ORDER);
 
+	if (page)
+		memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK,
+					    1 << THREAD_SIZE_ORDER);
+
 	return page ? page_address(page) : NULL;
 }
 
 static inline void free_thread_info(struct thread_info *ti)
 {
-	free_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
+	struct page *page = virt_to_page(ti);
+
+	memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK,
+				    -(1 << THREAD_SIZE_ORDER));
+	__free_kmem_pages(page, THREAD_SIZE_ORDER);
 }
 # else
 static struct kmem_cache *thread_info_cache;
-- 
cgit v1.2.3


From 795ae7a0de6b834a0cc202aa55c190ef81496665 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 17 Mar 2016 14:19:14 -0700
Subject: mm: scale kswapd watermarks in proportion to memory

In machines with 140G of memory and enterprise flash storage, we have
seen read and write bursts routinely exceed the kswapd watermarks and
cause thundering herds in direct reclaim.  Unfortunately, the only way
to tune kswapd aggressiveness is through adjusting min_free_kbytes - the
system's emergency reserves - which is entirely unrelated to the
system's latency requirements.  In order to get kswapd to maintain a
250M buffer of free memory, the emergency reserves need to be set to 1G.
That is a lot of memory wasted for no good reason.

On the other hand, it's reasonable to assume that allocation bursts and
overall allocation concurrency scale with memory capacity, so it makes
sense to make kswapd aggressiveness a function of that as well.

Change the kswapd watermark scale factor from the currently fixed 25% of
the tunable emergency reserve to a tunable 0.1% of memory.

Beyond 1G of memory, this will produce bigger watermark steps than the
current formula in default settings.  Ensure that the new formula never
chooses steps smaller than that, i.e.  25% of the emergency reserve.

On a 140G machine, this raises the default watermark steps - the
distance between min and low, and low and high - from 16M to 143M.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sysctl.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f5102fabef7f..725587f10667 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -126,6 +126,7 @@ static int __maybe_unused two = 2;
 static int __maybe_unused four = 4;
 static unsigned long one_ul = 1;
 static int one_hundred = 100;
+static int one_thousand = 1000;
 #ifdef CONFIG_PRINTK
 static int ten_thousand = 10000;
 #endif
@@ -1403,6 +1404,15 @@ static struct ctl_table vm_table[] = {
 		.proc_handler	= min_free_kbytes_sysctl_handler,
 		.extra1		= &zero,
 	},
+	{
+		.procname	= "watermark_scale_factor",
+		.data		= &watermark_scale_factor,
+		.maxlen		= sizeof(watermark_scale_factor),
+		.mode		= 0644,
+		.proc_handler	= watermark_scale_factor_sysctl_handler,
+		.extra1		= &one,
+		.extra2		= &one_thousand,
+	},
 	{
 		.procname	= "percpu_pagelist_fraction",
 		.data		= &percpu_pagelist_fraction,
-- 
cgit v1.2.3


From da8b44d5a9f8bf26da637b7336508ca534d6b319 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 17 Mar 2016 14:20:51 -0700
Subject: timer: convert timer_slack_ns from unsigned long to u64

This patchset introduces a /proc/<pid>/timerslack_ns interface which
would allow controlling processes to be able to set the timerslack value
on other processes in order to save power by avoiding wakeups (Something
Android currently does via out-of-tree patches).

The first patch tries to fix the internal timer_slack_ns usage which was
defined as a long, which limits the slack range to ~4 seconds on 32bit
systems.  It converts it to a u64, which provides the same basically
unlimited slack (500 years) on both 32bit and 64bit machines.

The second patch introduces the /proc/<pid>/timerslack_ns interface
which allows the full 64bit slack range for a task to be read or set on
both 32bit and 64bit machines.

With these two patches, on a 32bit machine, after setting the slack on
bash to 10 seconds:

$ time sleep 1

real    0m10.747s
user    0m0.001s
sys     0m0.005s

The first patch is a little ugly, since I had to chase the slack delta
arguments through a number of functions converting them to u64s.  Let me
know if it makes sense to break that up more or not.

Other than that things are fairly straightforward.

This patch (of 2):

The timer_slack_ns value in the task struct is currently a unsigned
long.  This means that on 32bit applications, the maximum slack is just
over 4 seconds.  However, on 64bit machines, its much much larger (~500
years).

This disparity could make application development a little (as well as
the default_slack) to a u64.  This means both 32bit and 64bit systems
have the same effective internal slack range.

Now the existing ABI via PR_GET_TIMERSLACK and PR_SET_TIMERSLACK specify
the interface as a unsigned long, so we preserve that limitation on
32bit systems, where SET_TIMERSLACK can only set the slack to a unsigned
long value, and GET_TIMERSLACK will return ULONG_MAX if the slack is
actually larger then what can be stored by an unsigned long.

This patch also modifies hrtimer functions which specified the slack
delta as a unsigned long.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Oren Laadan <orenl@cellrox.com>
Cc: Ruchi Kandoi <kandoiruchi@google.com>
Cc: Rom Lemarchand <romlem@android.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Android Kernel Team <kernel-team@android.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sys.c          | 5 ++++-
 kernel/time/hrtimer.c | 8 ++++----
 kernel/time/timer.c   | 4 ++--
 3 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sys.c b/kernel/sys.c
index 78947de6f969..cf8ba545c7d3 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2169,7 +2169,10 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		error = perf_event_task_enable();
 		break;
 	case PR_GET_TIMERSLACK:
-		error = current->timer_slack_ns;
+		if (current->timer_slack_ns > ULONG_MAX)
+			error = ULONG_MAX;
+		else
+			error = current->timer_slack_ns;
 		break;
 	case PR_SET_TIMERSLACK:
 		if (arg2 <= 0)
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index fa909f9fd559..58a321c34cfb 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -979,7 +979,7 @@ static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,
  *		relative (HRTIMER_MODE_REL)
  */
 void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
-			    unsigned long delta_ns, const enum hrtimer_mode mode)
+			    u64 delta_ns, const enum hrtimer_mode mode)
 {
 	struct hrtimer_clock_base *base, *new_base;
 	unsigned long flags;
@@ -1548,7 +1548,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
 	struct restart_block *restart;
 	struct hrtimer_sleeper t;
 	int ret = 0;
-	unsigned long slack;
+	u64 slack;
 
 	slack = current->timer_slack_ns;
 	if (dl_task(current) || rt_task(current))
@@ -1724,7 +1724,7 @@ void __init hrtimers_init(void)
  * @clock:	timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME
  */
 int __sched
-schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
+schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
 			       const enum hrtimer_mode mode, int clock)
 {
 	struct hrtimer_sleeper t;
@@ -1792,7 +1792,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
  *
  * Returns 0 when the timer has expired otherwise -EINTR
  */
-int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
+int __sched schedule_hrtimeout_range(ktime_t *expires, u64 delta,
 				     const enum hrtimer_mode mode)
 {
 	return schedule_hrtimeout_range_clock(expires, delta, mode,
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index bbc5d1114583..d1798fa0c743 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1698,10 +1698,10 @@ EXPORT_SYMBOL(msleep_interruptible);
 static void __sched do_usleep_range(unsigned long min, unsigned long max)
 {
 	ktime_t kmin;
-	unsigned long delta;
+	u64 delta;
 
 	kmin = ktime_set(0, min * NSEC_PER_USEC);
-	delta = (max - min) * NSEC_PER_USEC;
+	delta = (u64)(max - min) * NSEC_PER_USEC;
 	schedule_hrtimeout_range(&kmin, delta, HRTIMER_MODE_REL);
 }
 
-- 
cgit v1.2.3


From a8199371afc27946d72f0d53e938e78d2ea0bae3 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Thu, 17 Mar 2016 14:21:20 -0700
Subject: printk: move can_use_console() out of console_trylock_for_printk()

console_unlock() allows to cond_resched() if its caller has set
`console_may_schedule' to 1 (this functionality is present since
8d91f8b15361 ("printk: do cond_resched() between lines while outputting
to consoles").

The rules are:
-- console_lock() always sets `console_may_schedule' to 1
-- console_trylock() always sets `console_may_schedule' to 0

printk() calls console_unlock() with preemption desabled, which
basically can lead to RCU stalls, watchdog soft lockups, etc.  if
something is simultaneously calling printk() frequent enough (IOW,
console_sem owner always has new data to send to console divers and
can't leave console_unlock() for a long time).

printk()->console_trylock() callers do not necessarily execute in atomic
contexts, and some of them can cond_resched() in console_unlock().
console_trylock() can set `console_may_schedule' to 1 (allow
cond_resched() later in consoe_unlock()) when it's safe.

This patch (of 3):

vprintk_emit() disables preemption around console_trylock_for_printk()
and console_unlock() calls for a strong reason -- can_use_console()
check.  The thing is that vprintl_emit() can be called on a CPU that is
not fully brought up yet (!cpu_online()), which potentially can cause
problems if console driver wants to access per-cpu data.  A console
driver can explicitly state that it's safe to call it from !online cpu
by setting CON_ANYTIME bit in console ->flags.  That's why for
!cpu_online() can_use_console() iterates all the console to find out if
there is a CON_ANYTIME console, otherwise console_unlock() must be
avoided.

can_use_console() ensures that console_unlock() call is safe in
vprintk_emit() only; console_lock() and console_trylock() are not
covered by this check.  Even though call_console_drivers(), invoked from
console_cont_flush() and console_unlock(), tests `!cpu_online() &&
CON_ANYTIME' for_each_console(), it may be too late, which can result in
messages loss.

Assume that we have 2 cpus -- CPU0 is online, CPU1 is !online, and no
CON_ANYTIME consoles available.

CPU0 online                        CPU1 !online
                                 console_trylock()
                                 ...
                                 console_unlock()
                                   console_cont_flush
                                     spin_lock logbuf_lock
                                     if (!cont.len) {
                                        spin_unlock logbuf_lock
                                        return
                                     }
                                   for (;;) {
vprintk_emit
  spin_lock logbuf_lock
  log_store
  spin_unlock logbuf_lock
                                     spin_lock logbuf_lock
  !console_trylock_for_printk        msg_print_text
 return                              console_idx = log_next()
                                     console_seq++
                                     console_prev = msg->flags
                                     spin_unlock logbuf_lock

                                     call_console_drivers()
                                       for_each_console(con) {
                                         if (!cpu_online() &&
                                             !(con->flags & CON_ANYTIME))
                                                 continue;
                                         }
                                   /*
                                    * no message printed, we lost it
                                    */
vprintk_emit
  spin_lock logbuf_lock
  log_store
  spin_unlock logbuf_lock
  !console_trylock_for_printk
 return
                                   /*
                                    * go to the beginning of the loop,
                                    * find out there are new messages,
                                    * lose it
                                    */
                                   }

console_trylock()/console_lock() call on CPU1 may come from cpu
notifiers registered on that CPU.  Since notifiers are not getting
unregistered when CPU is going DOWN, all of the notifiers receive
notifications during CPU UP.  For example, on my x86_64, I see around 50
notification sent from offline CPU to itself

 [swapper/2] from cpu:2 to:2 action:CPU_STARTING hotplug_hrtick
 [swapper/2] from cpu:2 to:2 action:CPU_STARTING blk_mq_main_cpu_notify
 [swapper/2] from cpu:2 to:2 action:CPU_STARTING blk_mq_queue_reinit_notify
 [swapper/2] from cpu:2 to:2 action:CPU_STARTING console_cpu_notify

while doing
  echo 0 > /sys/devices/system/cpu/cpu2/online
  echo 1 > /sys/devices/system/cpu/cpu2/online

So grabbing the console_sem lock while CPU is !online is possible,
in theory.

This patch moves can_use_console() check out of
console_trylock_for_printk().  Instead it calls it in console_unlock(),
so now console_lock()/console_unlock() are also 'protected' by
can_use_console().  This also means that console_trylock_for_printk() is
not really needed anymore and can be removed.

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Jan Kara <jack@suse.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Kyle McMartin <kyle@kernel.org>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Calvin Owens <calvinowens@fb.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/printk/printk.c | 97 ++++++++++++++++++++++----------------------------
 1 file changed, 42 insertions(+), 55 deletions(-)

(limited to 'kernel')

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index c963ba534a78..2523332bd998 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1483,58 +1483,6 @@ static void zap_locks(void)
 	sema_init(&console_sem, 1);
 }
 
-/*
- * Check if we have any console that is capable of printing while cpu is
- * booting or shutting down. Requires console_sem.
- */
-static int have_callable_console(void)
-{
-	struct console *con;
-
-	for_each_console(con)
-		if (con->flags & CON_ANYTIME)
-			return 1;
-
-	return 0;
-}
-
-/*
- * Can we actually use the console at this time on this cpu?
- *
- * Console drivers may assume that per-cpu resources have been allocated. So
- * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
- * call them until this CPU is officially up.
- */
-static inline int can_use_console(unsigned int cpu)
-{
-	return cpu_online(cpu) || have_callable_console();
-}
-
-/*
- * Try to get console ownership to actually show the kernel
- * messages from a 'printk'. Return true (and with the
- * console_lock held, and 'console_locked' set) if it
- * is successful, false otherwise.
- */
-static int console_trylock_for_printk(void)
-{
-	unsigned int cpu = smp_processor_id();
-
-	if (!console_trylock())
-		return 0;
-	/*
-	 * If we can't use the console, we need to release the console
-	 * semaphore by hand to avoid flushing the buffer. We need to hold the
-	 * console semaphore in order to do this test safely.
-	 */
-	if (!can_use_console(cpu)) {
-		console_locked = 0;
-		up_console_sem();
-		return 0;
-	}
-	return 1;
-}
-
 int printk_delay_msec __read_mostly;
 
 static inline void printk_delay(void)
@@ -1681,7 +1629,6 @@ asmlinkage int vprintk_emit(int facility, int level,
 	boot_delay_msec(level);
 	printk_delay();
 
-	/* This stops the holder of console_sem just where we want him */
 	local_irq_save(flags);
 	this_cpu = smp_processor_id();
 
@@ -1705,6 +1652,7 @@ asmlinkage int vprintk_emit(int facility, int level,
 	}
 
 	lockdep_off();
+	/* This stops the holder of console_sem just where we want him */
 	raw_spin_lock(&logbuf_lock);
 	logbuf_cpu = this_cpu;
 
@@ -1821,7 +1769,7 @@ asmlinkage int vprintk_emit(int facility, int level,
 		 * semaphore.  The release will print out buffers and wake up
 		 * /dev/kmsg and syslog() users.
 		 */
-		if (console_trylock_for_printk())
+		if (console_trylock())
 			console_unlock();
 		preempt_enable();
 		lockdep_on();
@@ -2184,6 +2132,33 @@ int is_console_locked(void)
 	return console_locked;
 }
 
+/*
+ * Check if we have any console that is capable of printing while cpu is
+ * booting or shutting down. Requires console_sem.
+ */
+static int have_callable_console(void)
+{
+	struct console *con;
+
+	for_each_console(con)
+		if (con->flags & CON_ANYTIME)
+			return 1;
+
+	return 0;
+}
+
+/*
+ * Can we actually use the console at this time on this cpu?
+ *
+ * Console drivers may assume that per-cpu resources have been allocated. So
+ * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
+ * call them until this CPU is officially up.
+ */
+static inline int can_use_console(void)
+{
+	return cpu_online(raw_smp_processor_id()) || have_callable_console();
+}
+
 static void console_cont_flush(char *text, size_t size)
 {
 	unsigned long flags;
@@ -2254,9 +2229,21 @@ void console_unlock(void)
 	do_cond_resched = console_may_schedule;
 	console_may_schedule = 0;
 
+again:
+	/*
+	 * We released the console_sem lock, so we need to recheck if
+	 * cpu is online and (if not) is there at least one CON_ANYTIME
+	 * console.
+	 */
+	if (!can_use_console()) {
+		console_locked = 0;
+		up_console_sem();
+		return;
+	}
+
 	/* flush buffered message fragment immediately to console */
 	console_cont_flush(text, sizeof(text));
-again:
+
 	for (;;) {
 		struct printk_log *msg;
 		size_t ext_len = 0;
-- 
cgit v1.2.3


From 6b97a20d3a7909daa06625d4440c2c52d7bf08d7 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Thu, 17 Mar 2016 14:21:23 -0700
Subject: printk: set may_schedule for some of console_trylock() callers

console_unlock() allows to cond_resched() if its caller has set
`console_may_schedule' to 1, since 8d91f8b15361 ("printk: do
cond_resched() between lines while outputting to consoles").

The rules are:
-- console_lock() always sets `console_may_schedule' to 1
-- console_trylock() always sets `console_may_schedule' to 0

However, console_trylock() callers (among them is printk()) do not
always call printk() from atomic contexts, and some of them can
cond_resched() in console_unlock(), so console_trylock() can set
`console_may_schedule' to 1 for such processes.

For !CONFIG_PREEMPT_COUNT kernels, however, console_trylock() always
sets `console_may_schedule' to 0.

It's possible to drop explicit preempt_disable()/preempt_enable() in
vprintk_emit(), because console_unlock() and console_trylock() are now
smart enough:
 a) console_unlock() does not cond_resched() when it's unsafe
    (console_trylock() takes care of that)
 b) console_unlock() does can_use_console() check.

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Jan Kara <jack@suse.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Kyle McMartin <kyle@kernel.org>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Calvin Owens <calvinowens@fb.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/printk/printk.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'kernel')

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 2523332bd998..a6d023c3b852 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1757,13 +1757,6 @@ asmlinkage int vprintk_emit(int facility, int level,
 	/* If called from the scheduler, we can not call up(). */
 	if (!in_sched) {
 		lockdep_off();
-		/*
-		 * Disable preemption to avoid being preempted while holding
-		 * console_sem which would prevent anyone from printing to
-		 * console
-		 */
-		preempt_disable();
-
 		/*
 		 * Try to acquire and then immediately release the console
 		 * semaphore.  The release will print out buffers and wake up
@@ -1771,7 +1764,6 @@ asmlinkage int vprintk_emit(int facility, int level,
 		 */
 		if (console_trylock())
 			console_unlock();
-		preempt_enable();
 		lockdep_on();
 	}
 
@@ -2122,7 +2114,20 @@ int console_trylock(void)
 		return 0;
 	}
 	console_locked = 1;
-	console_may_schedule = 0;
+	/*
+	 * When PREEMPT_COUNT disabled we can't reliably detect if it's
+	 * safe to schedule (e.g. calling printk while holding a spin_lock),
+	 * because preempt_disable()/preempt_enable() are just barriers there
+	 * and preempt_count() is always 0.
+	 *
+	 * RCU read sections have a separate preemption counter when
+	 * PREEMPT_RCU enabled thus we must take extra care and check
+	 * rcu_preempt_depth(), otherwise RCU read sections modify
+	 * preempt_count().
+	 */
+	console_may_schedule = !oops_in_progress &&
+			preemptible() &&
+			!rcu_preempt_depth();
 	return 1;
 }
 EXPORT_SYMBOL(console_trylock);
-- 
cgit v1.2.3


From adaf6590ee7db23c3a124fb9f213c90c15cecf96 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Thu, 17 Mar 2016 14:21:27 -0700
Subject: printk: check CON_ENABLED in have_callable_console()

have_callable_console() must also test CON_ENABLED bit, not just
CON_ANYTIME.  We may have disabled CON_ANYTIME console so printk can
wrongly assume that it's safe to call_console_drivers().

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Jan Kara <jack@suse.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Kyle McMartin <kyle@kernel.org>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Calvin Owens <calvinowens@fb.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/printk/printk.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index a6d023c3b852..d5fd844e5b08 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2146,7 +2146,8 @@ static int have_callable_console(void)
 	struct console *con;
 
 	for_each_console(con)
-		if (con->flags & CON_ANYTIME)
+		if ((con->flags & CON_ENABLED) &&
+				(con->flags & CON_ANYTIME))
 			return 1;
 
 	return 0;
-- 
cgit v1.2.3


From f468908bb55a0b01d9424c74f8ec8eb906835150 Mon Sep 17 00:00:00 2001
From: Ivan Delalande <colona@arista.com>
Date: Thu, 17 Mar 2016 14:21:30 -0700
Subject: printk: add clear_idx symbol to vmcoreinfo

This allows us to extract from the vmcore only the messages emitted
since the last time the ring buffer was cleared.  We just have to make
sure its value is always up-to-date, when old messages are discarded to
free space in log_make_free_space() for example.

Signed-off-by: Zeyu Zhao <zzy8200@gmail.com>
Signed-off-by: Ivan Delalande <colona@arista.com>
Cc: Kay Sievers <kay@vrfy.org>
Cc: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/printk/printk.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'kernel')

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index d5fd844e5b08..bfbf284e4218 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -367,16 +367,20 @@ static int logbuf_has_space(u32 msg_size, bool empty)
 
 static int log_make_free_space(u32 msg_size)
 {
-	while (log_first_seq < log_next_seq) {
-		if (logbuf_has_space(msg_size, false))
-			return 0;
+	while (log_first_seq < log_next_seq &&
+	       !logbuf_has_space(msg_size, false)) {
 		/* drop old messages until we have enough contiguous space */
 		log_first_idx = log_next(log_first_idx);
 		log_first_seq++;
 	}
 
+	if (clear_seq < log_first_seq) {
+		clear_seq = log_first_seq;
+		clear_idx = log_first_idx;
+	}
+
 	/* sequence numbers are equal, so the log buffer is empty */
-	if (logbuf_has_space(msg_size, true))
+	if (logbuf_has_space(msg_size, log_first_seq == log_next_seq))
 		return 0;
 
 	return -ENOMEM;
@@ -854,6 +858,7 @@ void log_buf_kexec_setup(void)
 	VMCOREINFO_SYMBOL(log_buf);
 	VMCOREINFO_SYMBOL(log_buf_len);
 	VMCOREINFO_SYMBOL(log_first_idx);
+	VMCOREINFO_SYMBOL(clear_idx);
 	VMCOREINFO_SYMBOL(log_next_idx);
 	/*
 	 * Export struct printk_log size and field offsets. User space tools can
@@ -1216,12 +1221,6 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
 		u32 idx;
 		enum log_flags prev;
 
-		if (clear_seq < log_first_seq) {
-			/* messages are gone, move to first available one */
-			clear_seq = log_first_seq;
-			clear_idx = log_first_idx;
-		}
-
 		/*
 		 * Find first record that fits, including all following records,
 		 * into the user-provided buffer for this dump.
-- 
cgit v1.2.3


From 4cc7ecb7f2a60e8deb783b8fbf7c1ae467acb920 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Mar 2016 14:23:00 -0700
Subject: param: convert some "on"/"off" users to strtobool

This changes several users of manual "on"/"off" parsing to use
strtobool.

Some side-effects:
- these uses will now parse y/n/1/0 meaningfully too
- the early_param uses will now bubble up parse errors

Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: Amitkumar Karwar <akarwar@marvell.com>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Joe Perches <joe@perches.com>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Nishant Sarmukadam <nishants@marvell.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Steve French <sfrench@samba.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/time/hrtimer.c    | 10 ++--------
 kernel/time/tick-sched.c | 10 ++--------
 2 files changed, 4 insertions(+), 16 deletions(-)

(limited to 'kernel')

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 58a321c34cfb..fa0b983290cf 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -515,7 +515,7 @@ static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
 /*
  * High resolution timer enabled ?
  */
-static int hrtimer_hres_enabled __read_mostly  = 1;
+static bool hrtimer_hres_enabled __read_mostly  = true;
 unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC;
 EXPORT_SYMBOL_GPL(hrtimer_resolution);
 
@@ -524,13 +524,7 @@ EXPORT_SYMBOL_GPL(hrtimer_resolution);
  */
 static int __init setup_hrtimer_hres(char *str)
 {
-	if (!strcmp(str, "off"))
-		hrtimer_hres_enabled = 0;
-	else if (!strcmp(str, "on"))
-		hrtimer_hres_enabled = 1;
-	else
-		return 0;
-	return 1;
+	return (kstrtobool(str, &hrtimer_hres_enabled) == 0);
 }
 
 __setup("highres=", setup_hrtimer_hres);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 969e6704c3c9..195fe7d2caad 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -486,20 +486,14 @@ void __init tick_nohz_init(void)
 /*
  * NO HZ enabled ?
  */
-int tick_nohz_enabled __read_mostly = 1;
+bool tick_nohz_enabled __read_mostly  = true;
 unsigned long tick_nohz_active  __read_mostly;
 /*
  * Enable / Disable tickless mode
  */
 static int __init setup_tick_nohz(char *str)
 {
-	if (!strcmp(str, "off"))
-		tick_nohz_enabled = 0;
-	else if (!strcmp(str, "on"))
-		tick_nohz_enabled = 1;
-	else
-		return 0;
-	return 1;
+	return (kstrtobool(str, &tick_nohz_enabled) == 0);
 }
 
 __setup("nohz=", setup_tick_nohz);
-- 
cgit v1.2.3


From 2553b67a1fbe7bf202e4e8070ab0b00d3d3a06a2 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Thu, 17 Mar 2016 14:23:04 -0700
Subject: lib/bug.c: use common WARN helper

The traceoff_on_warning option doesn't have any effect on s390, powerpc,
arm64, parisc, and sh because there are two different types of WARN
implementations:

1) The above mentioned architectures treat WARN() as a special case of a
   BUG() exception.  They handle warnings in report_bug() in lib/bug.c.

2) All other architectures just call warn_slowpath_*() directly.  Their
   warnings are handled in warn_slowpath_common() in kernel/panic.c.

Support traceoff_on_warning on all architectures and prevent any future
divergence by using a single common function to emit the warning.

Also remove the '()' from '%pS()', because the parentheses look funky:

  [   45.607629] WARNING: at /root/warn_mod/warn_mod.c:17 .init_dummy+0x20/0x40 [warn_mod]()

Reported-by: Chunyu Hu <chuhu@redhat.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Tested-by: Prarit Bhargava <prarit@redhat.com>
Acked-by: Prarit Bhargava <prarit@redhat.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/panic.c | 41 ++++++++++++++++++++++++++---------------
 1 file changed, 26 insertions(+), 15 deletions(-)

(limited to 'kernel')

diff --git a/kernel/panic.c b/kernel/panic.c
index d96469de72dc..fa400852bf6c 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -24,6 +24,7 @@
 #include <linux/init.h>
 #include <linux/nmi.h>
 #include <linux/console.h>
+#include <linux/bug.h>
 
 #define PANIC_TIMER_STEP 100
 #define PANIC_BLINK_SPD 18
@@ -449,20 +450,25 @@ void oops_exit(void)
 	kmsg_dump(KMSG_DUMP_OOPS);
 }
 
-#ifdef WANT_WARN_ON_SLOWPATH
-struct slowpath_args {
+struct warn_args {
 	const char *fmt;
 	va_list args;
 };
 
-static void warn_slowpath_common(const char *file, int line, void *caller,
-				 unsigned taint, struct slowpath_args *args)
+void __warn(const char *file, int line, void *caller, unsigned taint,
+	    struct pt_regs *regs, struct warn_args *args)
 {
 	disable_trace_on_warning();
 
 	pr_warn("------------[ cut here ]------------\n");
-	pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS()\n",
-		raw_smp_processor_id(), current->pid, file, line, caller);
+
+	if (file)
+		pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n",
+			raw_smp_processor_id(), current->pid, file, line,
+			caller);
+	else
+		pr_warn("WARNING: CPU: %d PID: %d at %pS\n",
+			raw_smp_processor_id(), current->pid, caller);
 
 	if (args)
 		vprintk(args->fmt, args->args);
@@ -479,20 +485,27 @@ static void warn_slowpath_common(const char *file, int line, void *caller,
 	}
 
 	print_modules();
-	dump_stack();
+
+	if (regs)
+		show_regs(regs);
+	else
+		dump_stack();
+
 	print_oops_end_marker();
+
 	/* Just a warning, don't kill lockdep. */
 	add_taint(taint, LOCKDEP_STILL_OK);
 }
 
+#ifdef WANT_WARN_ON_SLOWPATH
 void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...)
 {
-	struct slowpath_args args;
+	struct warn_args args;
 
 	args.fmt = fmt;
 	va_start(args.args, fmt);
-	warn_slowpath_common(file, line, __builtin_return_address(0),
-			     TAINT_WARN, &args);
+	__warn(file, line, __builtin_return_address(0), TAINT_WARN, NULL,
+	       &args);
 	va_end(args.args);
 }
 EXPORT_SYMBOL(warn_slowpath_fmt);
@@ -500,20 +513,18 @@ EXPORT_SYMBOL(warn_slowpath_fmt);
 void warn_slowpath_fmt_taint(const char *file, int line,
 			     unsigned taint, const char *fmt, ...)
 {
-	struct slowpath_args args;
+	struct warn_args args;
 
 	args.fmt = fmt;
 	va_start(args.args, fmt);
-	warn_slowpath_common(file, line, __builtin_return_address(0),
-			     taint, &args);
+	__warn(file, line, __builtin_return_address(0), taint, NULL, &args);
 	va_end(args.args);
 }
 EXPORT_SYMBOL(warn_slowpath_fmt_taint);
 
 void warn_slowpath_null(const char *file, int line)
 {
-	warn_slowpath_common(file, line, __builtin_return_address(0),
-			     TAINT_WARN, NULL);
+	__warn(file, line, __builtin_return_address(0), TAINT_WARN, NULL, NULL);
 }
 EXPORT_SYMBOL(warn_slowpath_null);
 #endif
-- 
cgit v1.2.3