From d0b6e0a8ef24b1b07078ababe5d91bcdf4f4264a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 12 Sep 2017 21:36:55 +0200
Subject: watchdog/hardlockup: Provide interface to stop/restart perf events

Provide an interface to stop and restart perf NMI watchdog events on all
CPUs. This is only usable during init and especially for handling the perf
HT bug on Intel machines. It's safe to use it this way as nothing can
start/stop the NMI watchdog in parallel.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Don Zickus <dzickus@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Link: http://lkml.kernel.org/r/20170912194146.167649596@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/nmi.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index a36abe2da13e..b24d4a58674a 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -75,7 +75,11 @@ static inline void hardlockup_detector_disable(void) {}
 
 #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
 extern void arch_touch_nmi_watchdog(void);
+extern void hardlockup_detector_perf_stop(void);
+extern void hardlockup_detector_perf_restart(void);
 #else
+static inline void hardlockup_detector_perf_stop(void) { }
+static inline void hardlockup_detector_perf_restart(void) { }
 #if !defined(CONFIG_HAVE_NMI_WATCHDOG)
 static inline void arch_touch_nmi_watchdog(void) {}
 #endif
-- 
cgit v1.2.3


From 6554fd8cf06db86f861bb24d7487b2873ca444c4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 12 Sep 2017 21:36:57 +0200
Subject: watchdog/core: Provide interface to stop from poweroff()

PARISC has a a busy looping power off routine. If the watchdog is enabled
the watchdog timer will still fire, but the thread is not running, which
causes the softlockup watchdog to trigger.

Provide a interface which allows to turn the watchdog off.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Don Zickus <dzickus@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Cc: linux-parisc@vger.kernel.org
Link: http://lkml.kernel.org/r/20170912194146.327343752@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/nmi.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index b24d4a58674a..85bb268be39c 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -12,10 +12,10 @@
 
 #ifdef CONFIG_LOCKUP_DETECTOR
 void lockup_detector_init(void);
+void lockup_detector_soft_poweroff(void);
 #else
-static inline void lockup_detector_init(void)
-{
-}
+static inline void lockup_detector_init(void) { }
+static inline void lockup_detector_soft_poweroff(void) { }
 #endif
 
 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
-- 
cgit v1.2.3


From 5490125d77a43016b26f629d4b485e2c62172551 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 12 Sep 2017 21:36:59 +0200
Subject: watchdog/core: Remove broken suspend/resume interfaces

This interface has several issues:

 - It's causing recursive locking of the hotplug lock.

 - It's complete overkill to teardown all threads and then recreate them

The same can be achieved with the simple hardlockup_detector_perf_stop /
restart() interfaces. The abuse from the busy looping poweroff() loop of
PARISC has been solved as well.

Remove the cruft.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Don Zickus <dzickus@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Link: http://lkml.kernel.org/r/20170912194146.487537732@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/nmi.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 85bb268be39c..7eefe7abf44b 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -164,7 +164,6 @@ extern int watchdog_thresh;
 extern unsigned long watchdog_enabled;
 extern struct cpumask watchdog_cpumask;
 extern unsigned long *watchdog_cpumask_bits;
-extern int __read_mostly watchdog_suspended;
 #ifdef CONFIG_SMP
 extern int sysctl_softlockup_all_cpu_backtrace;
 extern int sysctl_hardlockup_all_cpu_backtrace;
@@ -192,17 +191,6 @@ extern int proc_watchdog_thresh(struct ctl_table *, int ,
 				void __user *, size_t *, loff_t *);
 extern int proc_watchdog_cpumask(struct ctl_table *, int,
 				 void __user *, size_t *, loff_t *);
-extern int lockup_detector_suspend(void);
-extern void lockup_detector_resume(void);
-#else
-static inline int lockup_detector_suspend(void)
-{
-	return 0;
-}
-
-static inline void lockup_detector_resume(void)
-{
-}
 #endif
 
 #ifdef CONFIG_HAVE_ACPI_APEI_NMI
-- 
cgit v1.2.3


From 941154bd6937a710ae9193a3c733c0029e5ae7b8 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 12 Sep 2017 21:37:04 +0200
Subject: watchdog/hardlockup/perf: Prevent CPU hotplug deadlock

The following deadlock is possible in the watchdog hotplug code:

  cpus_write_lock()
    ...
      takedown_cpu()
        smpboot_park_threads()
          smpboot_park_thread()
            kthread_park()
              ->park() := watchdog_disable()
                watchdog_nmi_disable()
                  perf_event_release_kernel();
                    put_event()
                      _free_event()
                        ->destroy() := hw_perf_event_destroy()
                          x86_release_hardware()
                            release_ds_buffers()
                              get_online_cpus()

when a per cpu watchdog perf event is destroyed which drops the last
reference to the PMU hardware. The cleanup code there invokes
get_online_cpus() which instantly deadlocks because the hotplug percpu
rwsem is write locked.

To solve this add a deferring mechanism:

  cpus_write_lock()
			   kthread_park()
			    watchdog_nmi_disable(deferred)
			      perf_event_disable(event);
			      move_event_to_deferred(event);
			   ....
  cpus_write_unlock()
  cleaup_deferred_events()
    perf_event_release_kernel()

This is still properly serialized against concurrent hotplug via the
cpu_add_remove_lock, which is held by the task which initiated the hotplug
event.

This is also used to handle event destruction when the watchdog threads are
parked via other mechanisms than CPU hotplug.

Analyzed-by: Peter Zijlstra <peterz@infradead.org>

Reported-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Don Zickus <dzickus@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Link: http://lkml.kernel.org/r/20170912194146.884469246@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/nmi.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 7eefe7abf44b..80354e6fa86d 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -13,9 +13,11 @@
 #ifdef CONFIG_LOCKUP_DETECTOR
 void lockup_detector_init(void);
 void lockup_detector_soft_poweroff(void);
+void lockup_detector_cleanup(void);
 #else
 static inline void lockup_detector_init(void) { }
 static inline void lockup_detector_soft_poweroff(void) { }
+static inline void lockup_detector_cleanup(void) { }
 #endif
 
 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
@@ -77,9 +79,13 @@ static inline void hardlockup_detector_disable(void) {}
 extern void arch_touch_nmi_watchdog(void);
 extern void hardlockup_detector_perf_stop(void);
 extern void hardlockup_detector_perf_restart(void);
+extern void hardlockup_detector_perf_disable(void);
+extern void hardlockup_detector_perf_cleanup(void);
 #else
 static inline void hardlockup_detector_perf_stop(void) { }
 static inline void hardlockup_detector_perf_restart(void) { }
+static inline void hardlockup_detector_perf_disable(void) { }
+static inline void hardlockup_detector_perf_cleanup(void) { }
 #if !defined(CONFIG_HAVE_NMI_WATCHDOG)
 static inline void arch_touch_nmi_watchdog(void) {}
 #endif
-- 
cgit v1.2.3


From 01f0a02701cbcf32d22cfc9d1ab9a3f0ff2ba68c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 12 Sep 2017 21:37:05 +0200
Subject: watchdog/core: Remove the park_in_progress obfuscation

Commit:

  b94f51183b06 ("kernel/watchdog: prevent false hardlockup on overloaded system")

tries to fix the following issue:

proc_write()
   set_sample_period()    <--- New sample period becoms visible
			  <----- Broken starts
   proc_watchdog_update()
     watchdog_enable_all_cpus()		watchdog_hrtimer_fn()
     update_watchdog_all_cpus()		   restart_timer(sample_period)
        watchdog_park_threads()

					thread->park()
					  disable_nmi()
			  <----- Broken ends

The reason why this is broken is that the update of the watchdog threshold
becomes immediately effective and visible for the hrtimer function which
uses that value to rearm the timer. But the NMI/perf side still uses the
old value up to the point where it is disabled. If the rate has been
lowered then the NMI can run fast enough to 'detect' a hard lockup because
the timer has not fired due to the longer period.

The patch 'fixed' this by adding a variable:

proc_write()
   set_sample_period()
					<----- Broken starts
   proc_watchdog_update()
     watchdog_enable_all_cpus()		watchdog_hrtimer_fn()
     update_watchdog_all_cpus()		   restart_timer(sample_period)
         watchdog_park_threads()
	  park_in_progress = 1
					<----- Broken ends
				        nmi_watchdog()
					  if (park_in_progress)
					     return;

The only effect of this variable was to make the window where the breakage
can hit small enough that it was not longer observable in testing. From a
correctness point of view it is a pointless bandaid which merily papers
over the root cause: the unsychronized update of the variable.

Looking deeper into the related code pathes unearthed similar problems in
the watchdog_start()/stop() functions.

 watchdog_start()
	perf_nmi_event_start()
	hrtimer_start()

 watchdog_stop()
	hrtimer_cancel()
	perf_nmi_event_stop()

In both cases the call order is wrong because if the tasks gets preempted
or the VM gets scheduled out long enough after the first call, then there is
a chance that the next NMI will see a stale hrtimer interrupt count and
trigger a false positive hard lockup splat.

Get rid of park_in_progress so the code can be gradually deobfuscated and
pruned from several layers of duct tape papering over the root cause,
which has been either ignored or not understood at all.

Once this is removed the underlying problem will be fixed by rewriting the
proc interface to do a proper synchronized update.

Address the start/stop() ordering problem as well by reverting the call
order, so this part is at least correct now.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Don Zickus <dzickus@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1709052038270.2393@nanos
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/nmi.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 80354e6fa86d..91a3a4a4c8ae 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -27,7 +27,6 @@ extern void touch_softlockup_watchdog_sync(void);
 extern void touch_all_softlockup_watchdogs(void);
 extern unsigned int  softlockup_panic;
 extern int soft_watchdog_enabled;
-extern atomic_t watchdog_park_in_progress;
 #else
 static inline void touch_softlockup_watchdog_sched(void)
 {
-- 
cgit v1.2.3


From 0d85923c7a81719567311ba0eae8ecb2efd4c8a0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 12 Sep 2017 21:37:09 +0200
Subject: smpboot/threads, watchdog/core: Avoid runtime allocation

smpboot_update_cpumask_threads_percpu() allocates a temporary cpumask at
runtime. This is suboptimal because the call site needs more code size for
proper error handling than a statically allocated temporary mask requires
data size.

Add static temporary cpumask. The function is globaly serialized, so no
further protection required.

Remove the half baken error handling in the watchdog code and get rid of
the export as there are no in tree modular users of that function.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Don Zickus <dzickus@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Link: http://lkml.kernel.org/r/20170912194147.297288838@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/smpboot.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h
index 12910cf19869..c149aa7bedf3 100644
--- a/include/linux/smpboot.h
+++ b/include/linux/smpboot.h
@@ -55,7 +55,7 @@ smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread)
 }
 
 void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread);
-int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
-					 const struct cpumask *);
+void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
+					  const struct cpumask *);
 
 #endif
-- 
cgit v1.2.3


From 3b371b5936e7777c819619c00ca60f196a8e13fa Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 12 Sep 2017 21:37:13 +0200
Subject: watchdog/core: Clean up header mess

Having the same #ifdef in various places does not make it more
readable. Collect stuff into one place.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Don Zickus <dzickus@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Link: http://lkml.kernel.org/r/20170912194147.627096864@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/nmi.h | 60 ++++++++++++++++++++++-------------------------------
 1 file changed, 25 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 91a3a4a4c8ae..cfebb3bc4eed 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -14,11 +14,29 @@
 void lockup_detector_init(void);
 void lockup_detector_soft_poweroff(void);
 void lockup_detector_cleanup(void);
+bool is_hardlockup(void);
+
+extern int watchdog_user_enabled;
+extern int nmi_watchdog_enabled;
+extern int soft_watchdog_enabled;
+extern int watchdog_thresh;
+extern unsigned long watchdog_enabled;
+
+extern struct cpumask watchdog_cpumask;
+extern unsigned long *watchdog_cpumask_bits;
+#ifdef CONFIG_SMP
+extern int sysctl_softlockup_all_cpu_backtrace;
+extern int sysctl_hardlockup_all_cpu_backtrace;
 #else
+#define sysctl_softlockup_all_cpu_backtrace 0
+#define sysctl_hardlockup_all_cpu_backtrace 0
+#endif /* !CONFIG_SMP */
+
+#else /* CONFIG_LOCKUP_DETECTOR */
 static inline void lockup_detector_init(void) { }
 static inline void lockup_detector_soft_poweroff(void) { }
 static inline void lockup_detector_cleanup(void) { }
-#endif
+#endif /* !CONFIG_LOCKUP_DETECTOR */
 
 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
 extern void touch_softlockup_watchdog_sched(void);
@@ -26,28 +44,17 @@ extern void touch_softlockup_watchdog(void);
 extern void touch_softlockup_watchdog_sync(void);
 extern void touch_all_softlockup_watchdogs(void);
 extern unsigned int  softlockup_panic;
-extern int soft_watchdog_enabled;
 #else
-static inline void touch_softlockup_watchdog_sched(void)
-{
-}
-static inline void touch_softlockup_watchdog(void)
-{
-}
-static inline void touch_softlockup_watchdog_sync(void)
-{
-}
-static inline void touch_all_softlockup_watchdogs(void)
-{
-}
+static inline void touch_softlockup_watchdog_sched(void) { }
+static inline void touch_softlockup_watchdog(void) { }
+static inline void touch_softlockup_watchdog_sync(void) { }
+static inline void touch_all_softlockup_watchdogs(void) { }
 #endif
 
 #ifdef CONFIG_DETECT_HUNG_TASK
 void reset_hung_task_detector(void);
 #else
-static inline void reset_hung_task_detector(void)
-{
-}
+static inline void reset_hung_task_detector(void) { }
 #endif
 
 /*
@@ -92,7 +99,7 @@ static inline void arch_touch_nmi_watchdog(void) {}
 
 /**
  * touch_nmi_watchdog - restart NMI watchdog timeout.
- * 
+ *
  * If the architecture supports the NMI watchdog, touch_nmi_watchdog()
  * may be used to reset the timeout - for code which intentionally
  * disables interrupts for a long time. This call is stateless.
@@ -162,21 +169,6 @@ static inline bool trigger_single_cpu_backtrace(int cpu)
 u64 hw_nmi_get_sample_period(int watchdog_thresh);
 #endif
 
-#ifdef CONFIG_LOCKUP_DETECTOR
-extern int nmi_watchdog_enabled;
-extern int watchdog_user_enabled;
-extern int watchdog_thresh;
-extern unsigned long watchdog_enabled;
-extern struct cpumask watchdog_cpumask;
-extern unsigned long *watchdog_cpumask_bits;
-#ifdef CONFIG_SMP
-extern int sysctl_softlockup_all_cpu_backtrace;
-extern int sysctl_hardlockup_all_cpu_backtrace;
-#else
-#define sysctl_softlockup_all_cpu_backtrace 0
-#define sysctl_hardlockup_all_cpu_backtrace 0
-#endif
-
 #if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \
     defined(CONFIG_HARDLOCKUP_DETECTOR)
 void watchdog_update_hrtimer_threshold(u64 period);
@@ -184,7 +176,6 @@ void watchdog_update_hrtimer_threshold(u64 period);
 static inline void watchdog_update_hrtimer_threshold(u64 period) { }
 #endif
 
-extern bool is_hardlockup(void);
 struct ctl_table;
 extern int proc_watchdog(struct ctl_table *, int ,
 			 void __user *, size_t *, loff_t *);
@@ -196,7 +187,6 @@ extern int proc_watchdog_thresh(struct ctl_table *, int ,
 				void __user *, size_t *, loff_t *);
 extern int proc_watchdog_cpumask(struct ctl_table *, int,
 				 void __user *, size_t *, loff_t *);
-#endif
 
 #ifdef CONFIG_HAVE_ACPI_APEI_NMI
 #include <asm/nmi.h>
-- 
cgit v1.2.3


From 51d4052b01ca555e0d1d5fe297b309beb6c64aa0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 12 Sep 2017 21:37:14 +0200
Subject: watchdog/sysctl: Get rid of the #ifdeffery

The sysctl of the nmi_watchdog file prevents writes by setting:

    min = max = 0

if none of the users is enabled. That involves ifdeffery and is competely
non obvious.

If none of the facilities is enabeld, then the file can simply be made read
only. Move the ifdeffery into the header and use a constant for file
permissions.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Don Zickus <dzickus@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Link: http://lkml.kernel.org/r/20170912194147.706073616@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/nmi.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index cfebb3bc4eed..5774b443dba1 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -81,6 +81,12 @@ extern unsigned int hardlockup_panic;
 static inline void hardlockup_detector_disable(void) {}
 #endif
 
+#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
+# define NMI_WATCHDOG_SYSCTL_PERM	0644
+#else
+# define NMI_WATCHDOG_SYSCTL_PERM	0444
+#endif
+
 #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
 extern void arch_touch_nmi_watchdog(void);
 extern void hardlockup_detector_perf_stop(void);
-- 
cgit v1.2.3


From 7feeb9cd4f5b34476ffb9e6d58d58c5416375b19 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 12 Sep 2017 21:37:15 +0200
Subject: watchdog/sysctl: Clean up sysctl variable name space

Reflect that these variables are user interface related and remove the
whitespace damage in the sysctl table while at it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Don Zickus <dzickus@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Link: http://lkml.kernel.org/r/20170912194147.783210221@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/nmi.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 5774b443dba1..4a8d1037364e 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -17,8 +17,8 @@ void lockup_detector_cleanup(void);
 bool is_hardlockup(void);
 
 extern int watchdog_user_enabled;
-extern int nmi_watchdog_enabled;
-extern int soft_watchdog_enabled;
+extern int nmi_watchdog_user_enabled;
+extern int soft_watchdog_user_enabled;
 extern int watchdog_thresh;
 extern unsigned long watchdog_enabled;
 
@@ -62,12 +62,12 @@ static inline void reset_hung_task_detector(void) { }
  * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
  * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
  *
- * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled'
- * are variables that are only used as an 'interface' between the parameters
- * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The
- * 'watchdog_thresh' variable is handled differently because its value is not
- * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh'
- * is equal zero.
+ * 'watchdog_user_enabled', 'nmi_watchdog_user_enabled' and
+ * 'soft_watchdog_user_enabled' are variables that are only used as an
+ * 'interface' between the parameters in /proc/sys/kernel and the internal
+ * state bits in 'watchdog_enabled'. The 'watchdog_thresh' variable is
+ * handled differently because its value is not boolean, and the lockup
+ * detectors are 'suspended' while 'watchdog_thresh' is equal zero.
  */
 #define NMI_WATCHDOG_ENABLED_BIT   0
 #define SOFT_WATCHDOG_ENABLED_BIT  1
-- 
cgit v1.2.3


From 6592ad2fcc8f15b4f99b36c1db7d9f65510c203b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 12 Sep 2017 21:37:16 +0200
Subject: watchdog/core, powerpc: Make watchdog_nmi_reconfigure() two stage

Both the perf reconfiguration and the powerpc watchdog_nmi_reconfigure()
need to be done in two steps.

     1) Stop all NMIs
     2) Read the new parameters and start NMIs

Right now watchdog_nmi_reconfigure() is a combination of both. To allow a
clean reconfiguration add a 'run' argument and split the functionality in
powerpc.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Don Zickus <dzickus@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/20170912194147.862865570@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/nmi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 4a8d1037364e..eee255bc0fd6 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -103,6 +103,8 @@ static inline void arch_touch_nmi_watchdog(void) {}
 #endif
 #endif
 
+void watchdog_nmi_reconfigure(bool run);
+
 /**
  * touch_nmi_watchdog - restart NMI watchdog timeout.
  *
-- 
cgit v1.2.3


From 178b9f7a36d2c74a38274b66dd89f53611298a19 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 12 Sep 2017 21:37:18 +0200
Subject: watchdog/hardlockup/perf: Implement init time perf validation

The watchdog tries to create perf events even after it figured out that
perf is not functional or the requested event is not supported.

That's braindead as this can be done once at init time and if not supported
the NMI watchdog can be turned off unconditonally.

Implement the perf hardlockup detector functionality for that. This creates
a new event create function, which will replace the unholy mess of the
existing one in later patches.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Don Zickus <dzickus@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Link: http://lkml.kernel.org/r/20170912194148.019090547@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/nmi.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index eee255bc0fd6..72c62a809e92 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -93,14 +93,18 @@ extern void hardlockup_detector_perf_stop(void);
 extern void hardlockup_detector_perf_restart(void);
 extern void hardlockup_detector_perf_disable(void);
 extern void hardlockup_detector_perf_cleanup(void);
+extern int hardlockup_detector_perf_init(void);
 #else
 static inline void hardlockup_detector_perf_stop(void) { }
 static inline void hardlockup_detector_perf_restart(void) { }
 static inline void hardlockup_detector_perf_disable(void) { }
 static inline void hardlockup_detector_perf_cleanup(void) { }
-#if !defined(CONFIG_HAVE_NMI_WATCHDOG)
+# if !defined(CONFIG_HAVE_NMI_WATCHDOG)
+static inline int hardlockup_detector_perf_init(void) { return -ENODEV; }
 static inline void arch_touch_nmi_watchdog(void) {}
-#endif
+# else
+static inline int hardlockup_detector_perf_init(void) { return 0; }
+# endif
 #endif
 
 void watchdog_nmi_reconfigure(bool run);
-- 
cgit v1.2.3


From 2a1b8ee4f5665b4291e43e4a25d964c3eb2f4c32 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 12 Sep 2017 21:37:20 +0200
Subject: watchdog/hardlockup/perf: Implement CPU enable replacement

watchdog_nmi_enable() is an unparseable mess, Provide a clean perf specific
implementation, which will be used when the existing setup/teardown mess is
replaced.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Don Zickus <dzickus@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Link: http://lkml.kernel.org/r/20170912194148.180215498@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/nmi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 72c62a809e92..89ba8b23c6fe 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -92,12 +92,14 @@ extern void arch_touch_nmi_watchdog(void);
 extern void hardlockup_detector_perf_stop(void);
 extern void hardlockup_detector_perf_restart(void);
 extern void hardlockup_detector_perf_disable(void);
+extern void hardlockup_detector_perf_enable(void);
 extern void hardlockup_detector_perf_cleanup(void);
 extern int hardlockup_detector_perf_init(void);
 #else
 static inline void hardlockup_detector_perf_stop(void) { }
 static inline void hardlockup_detector_perf_restart(void) { }
 static inline void hardlockup_detector_perf_disable(void) { }
+static inline void hardlockup_detector_perf_enable(void) { }
 static inline void hardlockup_detector_perf_cleanup(void) { }
 # if !defined(CONFIG_HAVE_NMI_WATCHDOG)
 static inline int hardlockup_detector_perf_init(void) { return -ENODEV; }
-- 
cgit v1.2.3


From 850fdec8d2fd1eebfa003fea39bec08cd69b6155 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 18 Sep 2017 12:17:57 +0200
Subject: driver core: remove DRIVER_ATTR

DRIVER_ATTR is no longer in use, and driver authors should be using
DRIVER_ATTR_RW() or DRIVER_ATTR_RO() or DRIVER_ATTR_WO() instead in
order to always get the permissions correct.  So remove it so that no
one can use it anymore.

Acked-by: Alan Tull <atull@kernel.org>
Reviewed-by: Moritz Fischer <mdf@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index c6f27207dbe8..2bc70ddda09b 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -307,8 +307,6 @@ struct driver_attribute {
 			 size_t count);
 };
 
-#define DRIVER_ATTR(_name, _mode, _show, _store) \
-	struct driver_attribute driver_attr_##_name = __ATTR(_name, _mode, _show, _store)
 #define DRIVER_ATTR_RW(_name) \
 	struct driver_attribute driver_attr_##_name = __ATTR_RW(_name)
 #define DRIVER_ATTR_RO(_name) \
-- 
cgit v1.2.3


From 7fc10de8d49a748c476532c9d8e8fe19e548dd67 Mon Sep 17 00:00:00 2001
From: Dragos Bogdan <dragos.bogdan@analog.com>
Date: Tue, 5 Sep 2017 15:14:45 +0300
Subject: iio: ad_sigma_delta: Implement a dedicated reset function

Since most of the SD ADCs have the option of reseting the serial
interface by sending a number of SCLKs with CS = 0 and DIN = 1,
a dedicated function that can do this is usefull.

Needed for the patch:  iio: ad7793: Fix the serial interface reset
Signed-off-by: Dragos Bogdan <dragos.bogdan@analog.com>
Acked-by: Lars-Peter Clausen <lars@metafoo.de>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/adc/ad_sigma_delta.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h
index 5ba430cc9a87..1fc7abd28b0b 100644
--- a/include/linux/iio/adc/ad_sigma_delta.h
+++ b/include/linux/iio/adc/ad_sigma_delta.h
@@ -111,6 +111,9 @@ int ad_sd_write_reg(struct ad_sigma_delta *sigma_delta, unsigned int reg,
 int ad_sd_read_reg(struct ad_sigma_delta *sigma_delta, unsigned int reg,
 	unsigned int size, unsigned int *val);
 
+int ad_sd_reset(struct ad_sigma_delta *sigma_delta,
+	unsigned int reset_length);
+
 int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev,
 	const struct iio_chan_spec *chan, int *val);
 int ad_sd_calibrate_all(struct ad_sigma_delta *sigma_delta,
-- 
cgit v1.2.3


From 99d3cd27f755d63fd6cf85169eaa873d90769aa5 Mon Sep 17 00:00:00 2001
From: Inbar Karmy <inbark@mellanox.com>
Date: Thu, 24 Aug 2017 17:21:44 +0300
Subject: net/mlx5: Fix FPGA capability location

Currently, FPGA capability is located in (mdev)->caps.hca_cur,
change the location to be (mdev)->caps.fpga,
since hca_cur is reserved for HCA device capabilities.

Fixes: e29341fb3a5b ("net/mlx5: FPGA, Add basic support for Innova")
Signed-off-by: Inbar Karmy <inbark@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/device.h | 5 ++---
 include/linux/mlx5/driver.h | 1 +
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index eaf4ad209c8f..e32dbc4934db 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -980,7 +980,6 @@ enum mlx5_cap_type {
 	MLX5_CAP_RESERVED,
 	MLX5_CAP_VECTOR_CALC,
 	MLX5_CAP_QOS,
-	MLX5_CAP_FPGA,
 	/* NUM OF CAP Types */
 	MLX5_CAP_NUM
 };
@@ -1110,10 +1109,10 @@ enum mlx5_mcam_feature_groups {
 	MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_feature_cap_mask.enhanced_features.fld)
 
 #define MLX5_CAP_FPGA(mdev, cap) \
-	MLX5_GET(fpga_cap, (mdev)->caps.hca_cur[MLX5_CAP_FPGA], cap)
+	MLX5_GET(fpga_cap, (mdev)->caps.fpga, cap)
 
 #define MLX5_CAP64_FPGA(mdev, cap) \
-	MLX5_GET64(fpga_cap, (mdev)->caps.hca_cur[MLX5_CAP_FPGA], cap)
+	MLX5_GET64(fpga_cap, (mdev)->caps.fpga, cap)
 
 enum {
 	MLX5_CMD_STAT_OK			= 0x0,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 02ff700e4f30..401c8972cc3a 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -774,6 +774,7 @@ struct mlx5_core_dev {
 		u32 hca_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)];
 		u32 pcam[MLX5_ST_SZ_DW(pcam_reg)];
 		u32 mcam[MLX5_ST_SZ_DW(mcam_reg)];
+		u32 fpga[MLX5_ST_SZ_DW(fpga_cap)];
 	} caps;
 	phys_addr_t		iseg_base;
 	struct mlx5_init_seg __iomem *iseg;
-- 
cgit v1.2.3


From 16f1c5bb3ed75b3cf3ced537db40f7e1a244debe Mon Sep 17 00:00:00 2001
From: Raed Salem <raeds@mellanox.com>
Date: Sun, 30 Jul 2017 11:02:51 +0300
Subject: net/mlx5: Check device capability for maximum flow counters

Added check for the maximal number of flow counters attached
to rule (FTE).

Fixes: bd5251dbf156b ('net/mlx5_core: Introduce flow steering destination of type counter')
Signed-off-by: Raed Salem <raeds@mellanox.com>
Reviewed-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index a528b35a022e..69772347f866 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -327,7 +327,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
 	u8         reserved_at_80[0x18];
 	u8         log_max_destination[0x8];
 
-	u8         reserved_at_a0[0x18];
+	u8         log_max_flow_counter[0x8];
+	u8         reserved_at_a8[0x10];
 	u8         log_max_flow[0x8];
 
 	u8         reserved_at_c0[0x40];
-- 
cgit v1.2.3


From e6b72ee88a56bcfe63f72e9c30766484c45bec72 Mon Sep 17 00:00:00 2001
From: Artem Savkov <asavkov@redhat.com>
Date: Tue, 26 Sep 2017 18:35:45 +0200
Subject: netfilter: ebtables: fix race condition in frame_filter_net_init()

It is possible for ebt_in_hook to be triggered before ebt_table is assigned
resulting in a NULL-pointer dereference. Make sure hooks are
registered as the last step.

Fixes: aee12a0a3727 ("ebtables: remove nf_hook_register usage")
Signed-off-by: Artem Savkov <asavkov@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge/ebtables.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 2c2a5514b0df..528b24c78308 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -108,9 +108,10 @@ struct ebt_table {
 
 #define EBT_ALIGN(s) (((s) + (__alignof__(struct _xt_align)-1)) & \
 		     ~(__alignof__(struct _xt_align)-1))
-extern struct ebt_table *ebt_register_table(struct net *net,
-					    const struct ebt_table *table,
-					    const struct nf_hook_ops *);
+extern int ebt_register_table(struct net *net,
+			      const struct ebt_table *table,
+			      const struct nf_hook_ops *ops,
+			      struct ebt_table **res);
 extern void ebt_unregister_table(struct net *net, struct ebt_table *table,
 				 const struct nf_hook_ops *);
 extern unsigned int ebt_do_table(struct sk_buff *skb,
-- 
cgit v1.2.3


From fa87b91c94a8fd2c8502b6761be2d08a8e9bcf55 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Tue, 3 Oct 2017 16:14:24 -0700
Subject: include/linux/mm.h: fix typo in VM_MPX definition

There's a typo in recent change of VM_MPX definition.  We want it to be
VM_HIGH_ARCH_4, not VM_HIGH_ARCH_BIT_4.

This bug does cause visible regressions.  In arch_vma_name the vmflags
are tested against VM_MPX.  With the incorrect value of VM_MPX, a number
of vmas (such as the stack) test positive and end up being marked as
"[mpx]" in /proc/N/maps instead of their correct names.

This confuses tools like rr which expect to be able to find familiar
vmas.

Fixes: df3735c5b40f ("x86,mpx: make mpx depend on x86-64 to free up VMA flag")
Link: http://lkml.kernel.org/r/20170918140253.36856-1-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Kyle Huey <me@kylehuey.com>
Cc: <stable@vger.kernel.org>	[4.14+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f8c10d336e42..065d99deb847 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -240,7 +240,7 @@ extern unsigned int kobjsize(const void *objp);
 
 #if defined(CONFIG_X86_INTEL_MPX)
 /* MPX specific bounds table or bounds directory */
-# define VM_MPX		VM_HIGH_ARCH_BIT_4
+# define VM_MPX		VM_HIGH_ARCH_4
 #else
 # define VM_MPX		VM_NONE
 #endif
-- 
cgit v1.2.3


From 4d4bbd8526a8fbeb2c090ea360211fceff952383 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Tue, 3 Oct 2017 16:14:50 -0700
Subject: mm, oom_reaper: skip mm structs with mmu notifiers

Andrea has noticed that the oom_reaper doesn't invalidate the range via
mmu notifiers (mmu_notifier_invalidate_range_start/end) and that can
corrupt the memory of the kvm guest for example.

tlb_flush_mmu_tlbonly already invokes mmu notifiers but that is not
sufficient as per Andrea:

 "mmu_notifier_invalidate_range cannot be used in replacement of
  mmu_notifier_invalidate_range_start/end. For KVM
  mmu_notifier_invalidate_range is a noop and rightfully so. A MMU
  notifier implementation has to implement either ->invalidate_range
  method or the invalidate_range_start/end methods, not both. And if you
  implement invalidate_range_start/end like KVM is forced to do, calling
  mmu_notifier_invalidate_range in common code is a noop for KVM.

  For those MMU notifiers that can get away only implementing
  ->invalidate_range, the ->invalidate_range is implicitly called by
  mmu_notifier_invalidate_range_end(). And only those secondary MMUs
  that share the same pagetable with the primary MMU (like AMD iommuv2)
  can get away only implementing ->invalidate_range"

As the callback is allowed to sleep and the implementation is out of
hand of the MM it is safer to simply bail out if there is an mmu
notifier registered.  In order to not fail too early make the
mm_has_notifiers check under the oom_lock and have a little nap before
failing to give the current oom victim some more time to exit.

[akpm@linux-foundation.org: coding-style fixes]
Link: http://lkml.kernel.org/r/20170913113427.2291-1-mhocko@kernel.org
Fixes: aac453635549 ("mm, oom: introduce oom reaper")
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmu_notifier.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 7b2e31b1745a..6866e8126982 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -400,6 +400,11 @@ extern void mmu_notifier_synchronize(void);
 
 #else /* CONFIG_MMU_NOTIFIER */
 
+static inline int mm_has_notifiers(struct mm_struct *mm)
+{
+	return 0;
+}
+
 static inline void mmu_notifier_release(struct mm_struct *mm)
 {
 }
-- 
cgit v1.2.3


From a1b2289cef92ef0e9a92afcd2e1ea71d5bcaaf64 Mon Sep 17 00:00:00 2001
From: Sherry Yang <sherryy@android.com>
Date: Tue, 3 Oct 2017 16:15:00 -0700
Subject: android: binder: drop lru lock in isolate callback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop the global lru lock in isolate callback before calling
zap_page_range which calls cond_resched, and re-acquire the global lru
lock before returning.  Also change return code to LRU_REMOVED_RETRY.

Use mmput_async when fail to acquire mmap sem in an atomic context.

Fix "BUG: sleeping function called from invalid context"
errors when CONFIG_DEBUG_ATOMIC_SLEEP is enabled.

Also restore mmput_async, which was initially introduced in commit
ec8d7c14ea14 ("mm, oom_reaper: do not mmput synchronously from the oom
reaper context"), and was removed in commit 212925802454 ("mm: oom: let
oom_reap_task and exit_mmap run concurrently").

Link: http://lkml.kernel.org/r/20170914182231.90908-1-sherryy@android.com
Fixes: f2517eb76f1f2 ("android: binder: Add global lru shrinker to binder")
Signed-off-by: Sherry Yang <sherryy@android.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reported-by: Kyle Yan <kyan@codeaurora.org>
Acked-by: Arve Hjønnevåg <arve@android.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Martijn Coenen <maco@google.com>
Cc: Todd Kjos <tkjos@google.com>
Cc: Riley Andrews <riandrews@android.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Hoeun Ryu <hoeun.ryu@gmail.com>
Cc: Christopher Lameter <cl@linux.com>
Cc: Vegard Nossum <vegard.nossum@oracle.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched/mm.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 3a19c253bdb1..ae53e413fb13 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -84,6 +84,12 @@ static inline bool mmget_not_zero(struct mm_struct *mm)
 
 /* mmput gets rid of the mappings and all user-space */
 extern void mmput(struct mm_struct *);
+#ifdef CONFIG_MMU
+/* same as above but performs the slow path from the async context. Can
+ * be called from the atomic context as well
+ */
+void mmput_async(struct mm_struct *);
+#endif
 
 /* Grab a reference to a task's mm, if it is not already going away */
 extern struct mm_struct *get_task_mm(struct task_struct *task);
-- 
cgit v1.2.3


From c2315c187fa0d3ab363fdebe22718170b40473e3 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 3 Oct 2017 16:15:42 -0700
Subject: exec: load_script: kill the onstack interp[BINPRM_BUF_SIZE] array

Patch series "exec: binfmt_misc: fix use-after-free, kill
iname[BINPRM_BUF_SIZE]".

It looks like this code was always wrong, then commit 948b701a607f
("binfmt_misc: add persistent opened binary handler for containers")
added more problems.

This patch (of 6):

load_script() can simply use i_name instead, it points into bprm->buf[]
and nobody can change this memory until we call prepare_binprm().

The only complication is that we need to also change the signature of
bprm_change_interp() but this change looks good too.

While at it, do whitespace/style cleanups.

NOTE: the real motivation for this change is that people want to
increase BINPRM_BUF_SIZE, we need to change load_misc_binary() too but
this looks more complicated because afaics it is very buggy.

Link: http://lkml.kernel.org/r/20170918163446.GA26793@redhat.com
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Travis Gummels <tgummels@redhat.com>
Cc: Ben Woodard <woodard@redhat.com>
Cc: Jim Foraker <foraker1@llnl.gov>
Cc: <tdhooge@llnl.gov>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/binfmts.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index fb44d6180ca0..18d05b5491f3 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -131,7 +131,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm,
 			   int executable_stack);
 extern int transfer_args_to_stack(struct linux_binprm *bprm,
 				  unsigned long *sp_location);
-extern int bprm_change_interp(char *interp, struct linux_binprm *bprm);
+extern int bprm_change_interp(const char *interp, struct linux_binprm *bprm);
 extern int copy_strings_kernel(int argc, const char *const *argv,
 			       struct linux_binprm *bprm);
 extern int prepare_bprm_creds(struct linux_binprm *bprm);
-- 
cgit v1.2.3


From 7240767450d6d8380fb153e2998a1bb4ede7b029 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 3 Oct 2017 16:16:04 -0700
Subject: include/linux/bitfield.h: remove 32bit from FIELD_GET comment block

I do not see anything that restricts this macro to 32 bit width.

Link: http://lkml.kernel.org/r/1505921975-23379-1-git-send-email-yamada.masahiro@socionext.com
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitfield.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h
index 8b9d6fff002d..f2deb71958b2 100644
--- a/include/linux/bitfield.h
+++ b/include/linux/bitfield.h
@@ -92,7 +92,7 @@
 /**
  * FIELD_GET() - extract a bitfield element
  * @_mask: shifted mask defining the field's length and position
- * @_reg:  32bit value of entire bitfield
+ * @_reg:  value of entire bitfield
  *
  * FIELD_GET() extracts the field specified by @_mask from the
  * bitfield passed in as @_reg by masking and shifting it down.
-- 
cgit v1.2.3


From 1dd2bfc86818ddbc95f98e312e7704350223fd7d Mon Sep 17 00:00:00 2001
From: YASUAKI ISHIMATSU <yasu.isimatu@gmail.com>
Date: Tue, 3 Oct 2017 16:16:29 -0700
Subject: mm/memory_hotplug: change pfn_to_section_nr/section_nr_to_pfn macro
 to inline function

pfn_to_section_nr() and section_nr_to_pfn() are defined as macro.
pfn_to_section_nr() has no issue even if it is defined as macro.  But
section_nr_to_pfn() has overflow issue if sec is defined as int.

section_nr_to_pfn() just shifts sec by PFN_SECTION_SHIFT.  If sec is
defined as unsigned long, section_nr_to_pfn() returns pfn as 64 bit value.
But if sec is defined as int, section_nr_to_pfn() returns pfn as 32 bit
value.

__remove_section() calculates start_pfn using section_nr_to_pfn() and
scn_nr defined as int.  So if hot-removed memory address is over 16TB,
overflow issue occurs and section_nr_to_pfn() does not calculate correct
pfn.

To make callers use proper arg, the patch changes the macros to inline
functions.

Fixes: 815121d2b5cd ("memory_hotplug: clear zone when removing the memory")
Link: http://lkml.kernel.org/r/e643a387-e573-6bbf-d418-c60c8ee3d15e@gmail.com
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Reza Arbab <arbab@linux.vnet.ibm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 356a814e7c8e..c8f89417740b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1094,8 +1094,14 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
 #error Allocator MAX_ORDER exceeds SECTION_SIZE
 #endif
 
-#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT)
-#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT)
+static inline unsigned long pfn_to_section_nr(unsigned long pfn)
+{
+	return pfn >> PFN_SECTION_SHIFT;
+}
+static inline unsigned long section_nr_to_pfn(unsigned long sec)
+{
+	return sec << PFN_SECTION_SHIFT;
+}
 
 #define SECTION_ALIGN_UP(pfn)	(((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK)
 #define SECTION_ALIGN_DOWN(pfn)	((pfn) & PAGE_SECTION_MASK)
-- 
cgit v1.2.3


From 32e57c29e3c038ac802b7cc214a8795a4234055f Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 3 Oct 2017 16:16:54 -0700
Subject: include/linux/fs.h: fix comment about struct address_space

Before commit 9c5d760b8d22 ("mm: split gfp_mask and mapping flags into
separate fields") the private_* fields of struct adrress_space were
grouped together and using "ditto" in comments describing the last
fields was correct.

With introduction of gpf_mask between private_lock and private_list
"ditto" references the wrong description.

Fix it by using the elaborate description.

Link: http://lkml.kernel.org/r/1507009987-8746-1-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 339e73742e73..13dab191a23e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -403,7 +403,7 @@ struct address_space {
 	unsigned long		flags;		/* error bits */
 	spinlock_t		private_lock;	/* for use by the address_space */
 	gfp_t			gfp_mask;	/* implicit gfp mask for allocations */
-	struct list_head	private_list;	/* ditto */
+	struct list_head	private_list;	/* for use by the address_space */
 	void			*private_data;	/* ditto */
 	errseq_t		wb_err;
 } __attribute__((aligned(sizeof(long)))) __randomize_layout;
-- 
cgit v1.2.3


From de3ee99b097dd51938276e3af388cd4ad0f2750a Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 20 Sep 2017 10:56:14 +0200
Subject: mmc: Delete bounce buffer handling

In may, Steven sent a patch deleting the bounce buffer handling
and the CONFIG_MMC_BLOCK_BOUNCE option.

I chose the less invasive path of making it a runtime config
option, and we merged that successfully for kernel v4.12.

The code is however just standing in the way and taking up
space for seemingly no gain on any systems in wide use today.

Pierre says the code was there to improve speed on TI SDHCI
controllers on certain HP laptops and possibly some Ricoh
controllers as well. Early SDHCI controllers lacked the
scatter-gather feature, which made software bounce buffers
a significant speed boost.

We are clearly talking about the list of SDHCI PCI-based
MMC/SD card readers found in the pci_ids[] list in
drivers/mmc/host/sdhci-pci-core.c.

The TI SDHCI derivative is not supported by the upstream
kernel. This leaves the Ricoh.

What we can however notice is that the x86 defconfigs in the
kernel did not enable CONFIG_MMC_BLOCK_BOUNCE option, which
means that any such laptop would have to have a custom
configured kernel to actually take advantage of this
bounce buffer speed-up. It simply seems like there was
a speed optimization for the Ricoh controllers that noone
was using. (I have not checked the distro defconfigs but
I am pretty sure the situation is the same there.)

Bounce buffers increased performance on the OMAP HSMMC
at one point, and was part of the original submission in
commit a45c6cb81647 ("[ARM] 5369/1: omap mmc: Add new
   omap hsmmc controller for 2430 and 34xx, v3")

This optimization was removed in
commit 0ccd76d4c236 ("omap_hsmmc: Implement scatter-gather
   emulation")
which found that scatter-gather emulation provided even
better performance.

The same was introduced for SDHCI in
commit 2134a922c6e7 ("sdhci: scatter-gather (ADMA) support")

I am pretty positively convinced that software
scatter-gather emulation will do for any host controller what
the bounce buffers were doing. Essentially, the bounce buffer
was a reimplementation of software scatter-gather-emulation in
the MMC subsystem, and it should be done away with.

Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Juha Yrjola <juha.yrjola@solidboot.com>
Cc: Steven J. Hill <Steven.Hill@cavium.com>
Cc: Shawn Lin <shawn.lin@rock-chips.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Suggested-by: Steven J. Hill <Steven.Hill@cavium.com>
Suggested-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index f3f2d07feb2a..9a43763a68ad 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -316,7 +316,7 @@ struct mmc_host {
 #define MMC_CAP_UHS_SDR50	(1 << 18)	/* Host supports UHS SDR50 mode */
 #define MMC_CAP_UHS_SDR104	(1 << 19)	/* Host supports UHS SDR104 mode */
 #define MMC_CAP_UHS_DDR50	(1 << 20)	/* Host supports UHS DDR50 mode */
-#define MMC_CAP_NO_BOUNCE_BUFF	(1 << 21)	/* Disable bounce buffers on host */
+/* (1 << 21) is free for reuse */
 #define MMC_CAP_DRIVER_TYPE_A	(1 << 23)	/* Host supports Driver Type A */
 #define MMC_CAP_DRIVER_TYPE_C	(1 << 24)	/* Host supports Driver Type C */
 #define MMC_CAP_DRIVER_TYPE_D	(1 << 25)	/* Host supports Driver Type D */
-- 
cgit v1.2.3


From 6b9dc4806b28214a4a260517e59439e0ac12a15e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 2 Oct 2017 12:34:50 +0200
Subject: watchdog/core, powerpc: Replace watchdog_nmi_reconfigure()

The recent cleanup of the watchdog code split watchdog_nmi_reconfigure()
into two stages. One to stop the NMI and one to restart it after
reconfiguration. That was done by adding a boolean 'run' argument to the
code, which is functionally correct but not necessarily a piece of art.

Replace it by two explicit functions: watchdog_nmi_stop() and
watchdog_nmi_start().

Fixes: 6592ad2fcc8f ("watchdog/core, powerpc: Make watchdog_nmi_reconfigure() two stage")
Requested-by: Linus 'Nursing his pet-peeve' Torvalds <torvalds@linuxfoundation.org>
Signed-off-by: Thomas 'Mopping up garbage' Gleixner <tglx@linutronix.de>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1710021957480.2114@nanos
---
 include/linux/nmi.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 89ba8b23c6fe..0c9ed49fb21a 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -109,7 +109,8 @@ static inline int hardlockup_detector_perf_init(void) { return 0; }
 # endif
 #endif
 
-void watchdog_nmi_reconfigure(bool run);
+void watchdog_nmi_stop(void);
+void watchdog_nmi_start(void);
 
 /**
  * touch_nmi_watchdog - restart NMI watchdog timeout.
-- 
cgit v1.2.3


From 34ddaa3e5c0096fef52485186c7eb6cf56ddc686 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 3 Oct 2017 16:39:02 +0200
Subject: powerpc/watchdog: Make use of watchdog_nmi_probe()

The rework of the core hotplug code triggers the WARN_ON in start_wd_cpu()
on powerpc because it is called multiple times for the boot CPU.

The first call is via:

  start_wd_on_cpu+0x80/0x2f0
  watchdog_nmi_reconfigure+0x124/0x170
  softlockup_reconfigure_threads+0x110/0x130
  lockup_detector_init+0xbc/0xe0
  kernel_init_freeable+0x18c/0x37c
  kernel_init+0x2c/0x160
  ret_from_kernel_thread+0x5c/0xbc

And then again via the CPU hotplug registration:

  start_wd_on_cpu+0x80/0x2f0
  cpuhp_invoke_callback+0x194/0x620
  cpuhp_thread_fun+0x7c/0x1b0
  smpboot_thread_fn+0x290/0x2a0
  kthread+0x168/0x1b0
  ret_from_kernel_thread+0x5c/0xbc

This can be avoided by setting up the cpu hotplug state with nocalls and
move the initialization to the watchdog_nmi_probe() function. That
initializes the hotplug callbacks without invoking the callback and the
following core initialization function then configures the watchdog for the
online CPUs (in this case CPU0) via softlockup_reconfigure_threads().

Reported-and-tested-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: linuxppc-dev@lists.ozlabs.org
---
 include/linux/nmi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 0c9ed49fb21a..27e249ed7c5c 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -111,6 +111,7 @@ static inline int hardlockup_detector_perf_init(void) { return 0; }
 
 void watchdog_nmi_stop(void);
 void watchdog_nmi_start(void);
+int watchdog_nmi_probe(void);
 
 /**
  * touch_nmi_watchdog - restart NMI watchdog timeout.
-- 
cgit v1.2.3


From 192b2d78722ffea188e5ec6ae5d55010dce05a4b Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Fri, 29 Sep 2017 21:09:36 -0700
Subject: Drivers: hv: vmbus: Fix bugs in rescind handling

This patch addresses the following bugs in the current rescind handling code:

1. Fixes a race condition where we may be invoking hv_process_channel_removal()
on an already freed channel.

2. Prevents indefinite wait when rescinding sub-channels by correctly setting
the probe_complete state.

I would like to thank Dexuan for patiently reviewing earlier versions of this
patch and identifying many of the issues fixed here.

Greg, please apply this to 4.14-final.

Fixes: '54a66265d675 ("Drivers: hv: vmbus: Fix rescind handling")'

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Dexuan Cui <decui@microsoft.com>
Cc: stable@vger.kernel.org # (4.13 and above)
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hyperv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index c458d7b7ad19..6431087816ba 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1403,7 +1403,7 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf,
 				const int *srv_version, int srv_vercnt,
 				int *nego_fw_version, int *nego_srv_version);
 
-void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid);
+void hv_process_channel_removal(u32 relid);
 
 void vmbus_setevent(struct vmbus_channel *channel);
 /*
-- 
cgit v1.2.3


From 98589a0998b8b13c4a8fa1ccb0e62751a019faa5 Mon Sep 17 00:00:00 2001
From: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Date: Mon, 9 Oct 2017 15:27:15 +0300
Subject: netfilter: xt_bpf: Fix XT_BPF_MODE_FD_PINNED mode of 'xt_bpf_info_v1'

Commit 2c16d6033264 ("netfilter: xt_bpf: support ebpf") introduced
support for attaching an eBPF object by an fd, with the
'bpf_mt_check_v1' ABI expecting the '.fd' to be specified upon each
IPT_SO_SET_REPLACE call.

However this breaks subsequent iptables calls:

 # iptables -A INPUT -m bpf --object-pinned /sys/fs/bpf/xxx -j ACCEPT
 # iptables -A INPUT -s 5.6.7.8 -j ACCEPT
 iptables: Invalid argument. Run `dmesg' for more information.

That's because iptables works by loading existing rules using
IPT_SO_GET_ENTRIES to userspace, then issuing IPT_SO_SET_REPLACE with
the replacement set.

However, the loaded 'xt_bpf_info_v1' has an arbitrary '.fd' number
(from the initial "iptables -m bpf" invocation) - so when 2nd invocation
occurs, userspace passes a bogus fd number, which leads to
'bpf_mt_check_v1' to fail.

One suggested solution [1] was to hack iptables userspace, to perform a
"entries fixup" immediatley after IPT_SO_GET_ENTRIES, by opening a new,
process-local fd per every 'xt_bpf_info_v1' entry seen.

However, in [2] both Pablo Neira Ayuso and Willem de Bruijn suggested to
depricate the xt_bpf_info_v1 ABI dealing with pinned ebpf objects.

This fix changes the XT_BPF_MODE_FD_PINNED behavior to ignore the given
'.fd' and instead perform an in-kernel lookup for the bpf object given
the provided '.path'.

It also defines an alias for the XT_BPF_MODE_FD_PINNED mode, named
XT_BPF_MODE_PATH_PINNED, to better reflect the fact that the user is
expected to provide the path of the pinned object.

Existing XT_BPF_MODE_FD_ELF behavior (non-pinned fd mode) is preserved.

References: [1] https://marc.info/?l=netfilter-devel&m=150564724607440&w=2
            [2] https://marc.info/?l=netfilter-devel&m=150575727129880&w=2

Reported-by: Rafael Buchbinder <rafi@rbk.ms>
Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/bpf.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8390859e79e7..f1af7d63d678 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -368,6 +368,11 @@ static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
 {
 }
 
+static inline int bpf_obj_get_user(const char __user *pathname)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline struct net_device  *__dev_map_lookup_elem(struct bpf_map *map,
 						       u32 key)
 {
-- 
cgit v1.2.3


From d153b153446f7d8832bb2ebd92309c8a6003b3bb Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 27 Sep 2017 11:35:30 +0200
Subject: sched/core: Fix wake_affine() performance regression

Eric reported a sysbench regression against commit:

  3fed382b46ba ("sched/numa: Implement NUMA node level wake_affine()")

Similarly, Rik was looking at the NAS-lu.C benchmark, which regressed
against his v3.10 enterprise kernel.

PRE (current tip/master):

 ivb-ep sysbench:

   2: [30 secs]     transactions:                        64110  (2136.94 per sec.)
   5: [30 secs]     transactions:                        143644 (4787.99 per sec.)
  10: [30 secs]     transactions:                        274298 (9142.93 per sec.)
  20: [30 secs]     transactions:                        418683 (13955.45 per sec.)
  40: [30 secs]     transactions:                        320731 (10690.15 per sec.)
  80: [30 secs]     transactions:                        355096 (11834.28 per sec.)

 hsw-ex NAS:

 OMP_PROC_BIND/lu.C.x_threads_144_run_1.log: Time in seconds =                    18.01
 OMP_PROC_BIND/lu.C.x_threads_144_run_2.log: Time in seconds =                    17.89
 OMP_PROC_BIND/lu.C.x_threads_144_run_3.log: Time in seconds =                    17.93
 lu.C.x_threads_144_run_1.log: Time in seconds =                   434.68
 lu.C.x_threads_144_run_2.log: Time in seconds =                   405.36
 lu.C.x_threads_144_run_3.log: Time in seconds =                   433.83

POST (+patch):

 ivb-ep sysbench:

   2: [30 secs]     transactions:                        64494  (2149.75 per sec.)
   5: [30 secs]     transactions:                        145114 (4836.99 per sec.)
  10: [30 secs]     transactions:                        278311 (9276.69 per sec.)
  20: [30 secs]     transactions:                        437169 (14571.60 per sec.)
  40: [30 secs]     transactions:                        669837 (22326.73 per sec.)
  80: [30 secs]     transactions:                        631739 (21055.88 per sec.)

 hsw-ex NAS:

 lu.C.x_threads_144_run_1.log: Time in seconds =                    23.36
 lu.C.x_threads_144_run_2.log: Time in seconds =                    22.96
 lu.C.x_threads_144_run_3.log: Time in seconds =                    22.52

This patch takes out all the shiny wake_affine() stuff and goes back to
utter basics. Between the two CPUs involved with the wakeup (the CPU
doing the wakeup and the CPU we ran on previously) pick the CPU we can
run on _now_.

This restores much of the regressions against the older kernels,
but leaves some ground in the overloaded case. The default-enabled
WA_WEIGHT (which will be introduced in the next patch) is an attempt
to address the overloaded situation.

Reported-by: Eric Farman <farman@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: jinpuwang@gmail.com
Cc: vcaputo@pengaru.com
Fixes: 3fed382b46ba ("sched/numa: Implement NUMA node level wake_affine()")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/topology.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index d7b6dab956ec..7d065abc7a47 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -71,14 +71,6 @@ struct sched_domain_shared {
 	atomic_t	ref;
 	atomic_t	nr_busy_cpus;
 	int		has_idle_cores;
-
-	/*
-	 * Some variables from the most recent sd_lb_stats for this domain,
-	 * used by wake_affine().
-	 */
-	unsigned long	nr_running;
-	unsigned long	load;
-	unsigned long	capacity;
 };
 
 struct sched_domain {
-- 
cgit v1.2.3


From 2bbbd96357ce76cc45ec722c00f654aa7b189112 Mon Sep 17 00:00:00 2001
From: Jan Luebbe <jlu@pengutronix.de>
Date: Mon, 28 Aug 2017 17:25:16 +0200
Subject: bus: mbus: fix window size calculation for 4GB windows

At least the Armada XP SoC supports 4GB on a single DRAM window. Because
the size register values contain the actual size - 1, the MSB is set in
that case. For example, the SDRAM window's control register's value is
0xffffffe1 for 4GB (bits 31 to 24 contain the size).

The MBUS driver reads back each window's size from registers and
calculates the actual size as (control_reg | ~DDR_SIZE_MASK) + 1, which
overflows for 32 bit values, resulting in other miscalculations further
on (a bad RAM window for the CESA crypto engine calculated by
mvebu_mbus_setup_cpu_target_nooverlap() in my case).

This patch changes the type in 'struct mbus_dram_window' from u32 to
u64, which allows us to keep using the same register calculation code in
most MBUS-using drivers (which calculate ->size - 1 again).

Fixes: fddddb52a6c4 ("bus: introduce an Marvell EBU MBus driver")
CC: stable@vger.kernel.org
Signed-off-by: Jan Luebbe <jlu@pengutronix.de>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
---
 include/linux/mbus.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mbus.h b/include/linux/mbus.h
index 0d3f14fd2621..4773145246ed 100644
--- a/include/linux/mbus.h
+++ b/include/linux/mbus.h
@@ -31,8 +31,8 @@ struct mbus_dram_target_info
 	struct mbus_dram_window {
 		u8	cs_index;
 		u8	mbus_attr;
-		u32	base;
-		u32	size;
+		u64	base;
+		u64	size;
 	} cs[4];
 };
 
-- 
cgit v1.2.3


From 30ae9610d275f8f03f5bf7612ce71d8af6fc400b Mon Sep 17 00:00:00 2001
From: Shanker Donthineni <shankerd@codeaurora.org>
Date: Mon, 9 Oct 2017 11:46:55 -0500
Subject: irqchip/gic-v3-its: Add missing changes to support 52bit physical
 address

The current ITS driver works fine as long as normal memory and GICR
regions are located within the lower 48bit (>=0 && <2^48) physical
address space. Some of the registers GICR_PEND/PROP, GICR_VPEND/VPROP
and GITS_CBASER are handled properly but not all when configuring
the hardware with 52bit physical address.

This patch does the following changes to support 52bit PA.
  -Handle 52bit PA in GITS_BASERn.
  -Fix ITT_addr width to 52bits, bits[51:8].
  -Fix RDbase width to 52bits, bits[51:16].
  -Fix VPT_addr width to 52bits, bits[51:16].

Definition of the GITS_BASERn register when ITS PageSize is 64KB:
  -Bits[47:16] of the register provide bits[47:16] of the table PA.
  -Bits[15:12] of the register provide bits[51:48] of the table PA.
  -Bits[15:00] of the base physical address are 0.

Signed-off-by: Shanker Donthineni <shankerd@codeaurora.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqchip/arm-gic-v3.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 1ea576c8126f..14b74f22d43c 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -372,6 +372,8 @@
 #define GITS_BASER_ENTRY_SIZE_SHIFT		(48)
 #define GITS_BASER_ENTRY_SIZE(r)	((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1)
 #define GITS_BASER_ENTRY_SIZE_MASK	GENMASK_ULL(52, 48)
+#define GITS_BASER_PHYS_52_to_48(phys)					\
+	(((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12)
 #define GITS_BASER_SHAREABILITY_SHIFT	(10)
 #define GITS_BASER_InnerShareable					\
 	GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable)
-- 
cgit v1.2.3


From 20608924cc2e6bdeaf6f58ccbe9ddfe12dbfa082 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Wed, 4 Oct 2017 14:26:26 +0200
Subject: genirq: generic chip: Add irq_gc_mask_disable_and_ack_set()

The irq_gc_mask_disable_reg_and_ack() function name implies that it
provides the combined functions of irq_gc_mask_disable_reg() and
irq_gc_ack().  However, the implementation does not actually do
that since it writes the mask instead of the disable register. It
also does not maintain the mask cache which makes it inappropriate
to use with other masking functions.

In addition, commit 659fb32d1b67 ("genirq: replace irq_gc_ack() with
{set,clr}_bit variants (fwd)") effectively renamed irq_gc_ack() to
irq_gc_ack_set_bit() so this function probably should have also been
renamed at that time.

The generic chip code currently provides three functions for use
with the irq_mask member of the irq_chip structure and two functions
for use with the irq_ack member of the irq_chip structure. These
functions could be combined into six functions for use with the
irq_mask_ack member of the irq_chip structure.  However, since only
one of the combinations is currently used, only the function
irq_gc_mask_disable_and_ack_set() is added by this commit.

The '_reg' and '_bit' portions of the base function name were left
out of the new combined function name in an attempt to keep the
function name length manageable with the 80 character source code
line length while still allowing the distinct aspects of each
combination to be captured by the name.

If other combinations are desired in the future please add them to
the irq generic chip library at that time.

Signed-off-by: Doug Berger <opendmb@gmail.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irq.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index d4728bf6a537..494d328f7051 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -1010,6 +1010,7 @@ void irq_gc_unmask_enable_reg(struct irq_data *d);
 void irq_gc_ack_set_bit(struct irq_data *d);
 void irq_gc_ack_clr_bit(struct irq_data *d);
 void irq_gc_mask_disable_reg_and_ack(struct irq_data *d);
+void irq_gc_mask_disable_and_ack_set(struct irq_data *d);
 void irq_gc_eoi(struct irq_data *d);
 int irq_gc_set_wake(struct irq_data *d, unsigned int on);
 
-- 
cgit v1.2.3


From 0d08af35f16a0cc418ad2afde3bc5f70ace82705 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Wed, 4 Oct 2017 14:28:17 +0200
Subject: genirq: generic chip: remove irq_gc_mask_disable_reg_and_ack()

Any usage of the irq_gc_mask_disable_reg_and_ack() function has
been replaced with the desired functionality.

The incorrect and ambiguously named function is removed here to
prevent accidental misuse.

Signed-off-by: Doug Berger <opendmb@gmail.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irq.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 494d328f7051..5ad10948ea95 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -1009,7 +1009,6 @@ void irq_gc_mask_clr_bit(struct irq_data *d);
 void irq_gc_unmask_enable_reg(struct irq_data *d);
 void irq_gc_ack_set_bit(struct irq_data *d);
 void irq_gc_ack_clr_bit(struct irq_data *d);
-void irq_gc_mask_disable_reg_and_ack(struct irq_data *d);
 void irq_gc_mask_disable_and_ack_set(struct irq_data *d);
 void irq_gc_eoi(struct irq_data *d);
 int irq_gc_set_wake(struct irq_data *d, unsigned int on);
-- 
cgit v1.2.3


From 8a1ac5dc7be09883051b1bf89a5e57d7ad850fa5 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 13 Oct 2017 15:57:40 -0700
Subject: include/linux/of.h: provide of_n_{addr,size}_cells wrappers for
 !CONFIG_OF

The pci-rcar driver is enabled for compile tests, and this has shown that
the driver cannot build without CONFIG_OF, following the inclusion of
commit f8f2fe7355fb ("PCI: rcar: Use new OF interrupt mapping when possible"):

  drivers/pci/host/pcie-rcar.c: In function 'pci_dma_range_parser_init':
  drivers/pci/host/pcie-rcar.c:1039:2: error: implicit declaration of function 'of_n_addr_cells' [-Werror=implicit-function-declaration]
    parser->pna = of_n_addr_cells(node);
    ^

As pointed out by Ben Dooks and Geert Uytterhoeven, this is actually
supposed to build fine, which we can achieve if we make the declaration
of of_irq_parse_and_map_pci conditional on CONFIG_OF and provide an
empty inline function otherwise, as we do for a lot of other of
interfaces.

This lets us build the rcar_pci driver again without CONFIG_OF for build
testing.  All platforms using this driver select OF, so this doesn't
change anything for the users.

[akpm@linux-foundation.org: be consistent with surrounding code]
Link: http://lkml.kernel.org/r/20170911200805.3363318-1-arnd@arndb.de
Fixes: c25da4778803 ("PCI: rcar: Add Renesas R-Car PCIe driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Frank Rowand <frank.rowand@sony.com>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Magnus Damm <damm@opensource.se>
Cc: Ben Dooks <ben.dooks@codethink.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/of.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index cfc34117fc92..b240ed69dc96 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -734,6 +734,16 @@ static inline struct device_node *of_get_cpu_node(int cpu,
 	return NULL;
 }
 
+static inline int of_n_addr_cells(struct device_node *np)
+{
+	return 0;
+
+}
+static inline int of_n_size_cells(struct device_node *np)
+{
+	return 0;
+}
+
 static inline int of_property_read_u64(const struct device_node *np,
 				       const char *propname, u64 *out_value)
 {
-- 
cgit v1.2.3


From e8c97af0c1f23d6ffedcaa3918861f2595e1db62 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 13 Oct 2017 15:58:11 -0700
Subject: linux/kernel.h: add/correct kernel-doc notation

Add kernel-doc notation for some macros.  Correct kernel-doc comments &
typos for a few macros.

Link: http://lkml.kernel.org/r/76fa1403-1511-be4c-e9c4-456b43edfad3@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 90 +++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 74 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 0ad4c3044cf9..91189bb0c818 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -44,6 +44,12 @@
 
 #define STACK_MAGIC	0xdeadbeef
 
+/**
+ * REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value
+ * @x: value to repeat
+ *
+ * NOTE: @x is not checked for > 0xff; larger values produce odd results.
+ */
 #define REPEAT_BYTE(x)	((~0ul / 0xff) * (x))
 
 /* @a is a power of 2 value */
@@ -57,6 +63,10 @@
 #define READ			0
 #define WRITE			1
 
+/**
+ * ARRAY_SIZE - get the number of elements in array @arr
+ * @arr: array to be sized
+ */
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
 
 #define u64_to_user_ptr(x) (		\
@@ -76,7 +86,15 @@
 #define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
 #define round_down(x, y) ((x) & ~__round_mask(x, y))
 
+/**
+ * FIELD_SIZEOF - get the size of a struct's field
+ * @t: the target struct
+ * @f: the target struct's field
+ * Return: the size of @f in the struct definition without having a
+ * declared instance of @t.
+ */
 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
+
 #define DIV_ROUND_UP __KERNEL_DIV_ROUND_UP
 
 #define DIV_ROUND_DOWN_ULL(ll, d) \
@@ -107,7 +125,7 @@
 /*
  * Divide positive or negative dividend by positive or negative divisor
  * and round to closest integer. Result is undefined for negative
- * divisors if he dividend variable type is unsigned and for negative
+ * divisors if the dividend variable type is unsigned and for negative
  * dividends if the divisor variable type is unsigned.
  */
 #define DIV_ROUND_CLOSEST(x, divisor)(			\
@@ -247,13 +265,13 @@ extern int _cond_resched(void);
  * @ep_ro: right open interval endpoint
  *
  * Perform a "reciprocal multiplication" in order to "scale" a value into
- * range [0, ep_ro), where the upper interval endpoint is right-open.
+ * range [0, @ep_ro), where the upper interval endpoint is right-open.
  * This is useful, e.g. for accessing a index of an array containing
- * ep_ro elements, for example. Think of it as sort of modulus, only that
+ * @ep_ro elements, for example. Think of it as sort of modulus, only that
  * the result isn't that of modulo. ;) Note that if initial input is a
  * small value, then result will return 0.
  *
- * Return: a result based on val in interval [0, ep_ro).
+ * Return: a result based on @val in interval [0, @ep_ro).
  */
 static inline u32 reciprocal_scale(u32 val, u32 ep_ro)
 {
@@ -618,8 +636,8 @@ do {									\
  * trace_printk - printf formatting in the ftrace buffer
  * @fmt: the printf format for printing
  *
- * Note: __trace_printk is an internal function for trace_printk and
- *       the @ip is passed in via the trace_printk macro.
+ * Note: __trace_printk is an internal function for trace_printk() and
+ *       the @ip is passed in via the trace_printk() macro.
  *
  * This function allows a kernel developer to debug fast path sections
  * that printk is not appropriate for. By scattering in various
@@ -629,7 +647,7 @@ do {									\
  * This is intended as a debugging tool for the developer only.
  * Please refrain from leaving trace_printks scattered around in
  * your code. (Extra memory is used for special buffers that are
- * allocated when trace_printk() is used)
+ * allocated when trace_printk() is used.)
  *
  * A little optization trick is done here. If there's only one
  * argument, there's no need to scan the string for printf formats.
@@ -681,7 +699,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...);
  *       the @ip is passed in via the trace_puts macro.
  *
  * This is similar to trace_printk() but is made for those really fast
- * paths that a developer wants the least amount of "Heisenbug" affects,
+ * paths that a developer wants the least amount of "Heisenbug" effects,
  * where the processing of the print format is still too much.
  *
  * This function allows a kernel developer to debug fast path sections
@@ -692,7 +710,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...);
  * This is intended as a debugging tool for the developer only.
  * Please refrain from leaving trace_puts scattered around in
  * your code. (Extra memory is used for special buffers that are
- * allocated when trace_puts() is used)
+ * allocated when trace_puts() is used.)
  *
  * Returns: 0 if nothing was written, positive # if string was.
  *  (1 when __trace_bputs is used, strlen(str) when __trace_puts is used)
@@ -771,6 +789,12 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 	t2 min2 = (y);					\
 	(void) (&min1 == &min2);			\
 	min1 < min2 ? min1 : min2; })
+
+/**
+ * min - return minimum of two values of the same or compatible types
+ * @x: first value
+ * @y: second value
+ */
 #define min(x, y)					\
 	__min(typeof(x), typeof(y),			\
 	      __UNIQUE_ID(min1_), __UNIQUE_ID(min2_),	\
@@ -781,12 +805,31 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 	t2 max2 = (y);					\
 	(void) (&max1 == &max2);			\
 	max1 > max2 ? max1 : max2; })
+
+/**
+ * max - return maximum of two values of the same or compatible types
+ * @x: first value
+ * @y: second value
+ */
 #define max(x, y)					\
 	__max(typeof(x), typeof(y),			\
 	      __UNIQUE_ID(max1_), __UNIQUE_ID(max2_),	\
 	      x, y)
 
+/**
+ * min3 - return minimum of three values
+ * @x: first value
+ * @y: second value
+ * @z: third value
+ */
 #define min3(x, y, z) min((typeof(x))min(x, y), z)
+
+/**
+ * max3 - return maximum of three values
+ * @x: first value
+ * @y: second value
+ * @z: third value
+ */
 #define max3(x, y, z) max((typeof(x))max(x, y), z)
 
 /**
@@ -805,8 +848,8 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
  * @lo: lowest allowable value
  * @hi: highest allowable value
  *
- * This macro does strict typechecking of lo/hi to make sure they are of the
- * same type as val.  See the unnecessary pointer comparisons.
+ * This macro does strict typechecking of @lo/@hi to make sure they are of the
+ * same type as @val.  See the unnecessary pointer comparisons.
  */
 #define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
 
@@ -816,11 +859,24 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
  *
  * Or not use min/max/clamp at all, of course.
  */
+
+/**
+ * min_t - return minimum of two values, using the specified type
+ * @type: data type to use
+ * @x: first value
+ * @y: second value
+ */
 #define min_t(type, x, y)				\
 	__min(type, type,				\
 	      __UNIQUE_ID(min1_), __UNIQUE_ID(min2_),	\
 	      x, y)
 
+/**
+ * max_t - return maximum of two values, using the specified type
+ * @type: data type to use
+ * @x: first value
+ * @y: second value
+ */
 #define max_t(type, x, y)				\
 	__max(type, type,				\
 	      __UNIQUE_ID(min1_), __UNIQUE_ID(min2_),	\
@@ -834,7 +890,7 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
  * @hi: maximum allowable value
  *
  * This macro does no typechecking and uses temporary variables of type
- * 'type' to make all the comparisons.
+ * @type to make all the comparisons.
  */
 #define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
 
@@ -845,15 +901,17 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
  * @hi: maximum allowable value
  *
  * This macro does no typechecking and uses temporary variables of whatever
- * type the input argument 'val' is.  This is useful when val is an unsigned
- * type and min and max are literals that will otherwise be assigned a signed
+ * type the input argument @val is.  This is useful when @val is an unsigned
+ * type and @lo and @hi are literals that will otherwise be assigned a signed
  * integer type.
  */
 #define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi)
 
 
-/*
- * swap - swap value of @a and @b
+/**
+ * swap - swap values of @a and @b
+ * @a: first value
+ * @b: second value
  */
 #define swap(a, b) \
 	do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
-- 
cgit v1.2.3


From f892760aa66a2d657deaf59538fb69433036767c Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 13 Oct 2017 15:58:15 -0700
Subject: fs/mpage.c: fix mpage_writepage() for pages with buffers

When using FAT on a block device which supports rw_page, we can hit
BUG_ON(!PageLocked(page)) in try_to_free_buffers().  This is because we
call clean_buffers() after unlocking the page we've written.  Introduce
a new clean_page_buffers() which cleans all buffers associated with a
page and call it from within bdev_write_page().

[akpm@linux-foundation.org: s/PAGE_SIZE/~0U/ per Linus and Matthew]
Link: http://lkml.kernel.org/r/20171006211541.GA7409@bombadil.infradead.org
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reported-by: Toshi Kani <toshi.kani@hpe.com>
Reported-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Tested-by: Toshi Kani <toshi.kani@hpe.com>
Acked-by: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/buffer_head.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index c8dae555eccf..446b24cac67d 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -232,6 +232,7 @@ int generic_write_end(struct file *, struct address_space *,
 				loff_t, unsigned, unsigned,
 				struct page *, void *);
 void page_zero_new_buffers(struct page *page, unsigned from, unsigned to);
+void clean_page_buffers(struct page *page);
 int cont_write_begin(struct file *, struct address_space *, loff_t,
 			unsigned, unsigned, struct page **, void **,
 			get_block_t *, loff_t *);
-- 
cgit v1.2.3


From ca182551857cc2c1e6a2b7f1e72090a137a15008 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Fri, 13 Oct 2017 15:58:22 -0700
Subject: kmemleak: clear stale pointers from task stacks

Kmemleak considers any pointers on task stacks as references.  This
patch clears newly allocated and reused vmap stacks.

Link: http://lkml.kernel.org/r/150728990124.744199.8403409836394318684.stgit@buzz
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/thread_info.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 905d769d8ddc..5f7eeab990fe 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -42,7 +42,7 @@ enum {
 #define THREAD_ALIGN	THREAD_SIZE
 #endif
 
-#ifdef CONFIG_DEBUG_STACK_USAGE
+#if IS_ENABLED(CONFIG_DEBUG_STACK_USAGE) || IS_ENABLED(CONFIG_DEBUG_KMEMLEAK)
 # define THREADINFO_GFP		(GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | \
 				 __GFP_ZERO)
 #else
-- 
cgit v1.2.3


From 0ad646c81b2182f7fa67ec0c8c825e0ee165696d Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Fri, 13 Oct 2017 11:58:53 -0700
Subject: tun: call dev_get_valid_name() before register_netdevice()

register_netdevice() could fail early when we have an invalid
dev name, in which case ->ndo_uninit() is not called. For tun
device, this is a problem because a timer etc. are already
initialized and it expects ->ndo_uninit() to clean them up.

We could move these initializations into a ->ndo_init() so
that register_netdevice() knows better, however this is still
complicated due to the logic in tun_detach().

Therefore, I choose to just call dev_get_valid_name() before
register_netdevice(), which is quicker and much easier to audit.
And for this specific case, it is already enough.

Fixes: 96442e42429e ("tuntap: choose the txq based on rxq")
Reported-by: Dmitry Alexeev <avekceeb@gmail.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f535779d9dc1..2eaac7d75af4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3694,6 +3694,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 				    unsigned char name_assign_type,
 				    void (*setup)(struct net_device *),
 				    unsigned int txqs, unsigned int rxqs);
+int dev_get_valid_name(struct net *net, struct net_device *dev,
+		       const char *name);
+
 #define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \
 	alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, 1, 1)
 
-- 
cgit v1.2.3


From 363b02dab09b3226f3bd1420dad9c72b79a42a76 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 4 Oct 2017 16:43:25 +0100
Subject: KEYS: Fix race between updating and finding a negative key

Consolidate KEY_FLAG_INSTANTIATED, KEY_FLAG_NEGATIVE and the rejection
error into one field such that:

 (1) The instantiation state can be modified/read atomically.

 (2) The error can be accessed atomically with the state.

 (3) The error isn't stored unioned with the payload pointers.

This deals with the problem that the state is spread over three different
objects (two bits and a separate variable) and reading or updating them
atomically isn't practical, given that not only can uninstantiated keys
change into instantiated or rejected keys, but rejected keys can also turn
into instantiated keys - and someone accessing the key might not be using
any locking.

The main side effect of this problem is that what was held in the payload
may change, depending on the state.  For instance, you might observe the
key to be in the rejected state.  You then read the cached error, but if
the key semaphore wasn't locked, the key might've become instantiated
between the two reads - and you might now have something in hand that isn't
actually an error code.

The state is now KEY_IS_UNINSTANTIATED, KEY_IS_POSITIVE or a negative error
code if the key is negatively instantiated.  The key_is_instantiated()
function is replaced with key_is_positive() to avoid confusion as negative
keys are also 'instantiated'.

Additionally, barriering is included:

 (1) Order payload-set before state-set during instantiation.

 (2) Order state-read before payload-read when using the key.

Further separate barriering is necessary if RCU is being used to access the
payload content after reading the payload pointers.

Fixes: 146aa8b1453b ("KEYS: Merge the type-specific data with the payload data")
Cc: stable@vger.kernel.org # v4.4+
Reported-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Eric Biggers <ebiggers@google.com>
---
 include/linux/key.h | 47 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/key.h b/include/linux/key.h
index e315e16b6ff8..8a15cabe928d 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -138,6 +138,11 @@ struct key_restriction {
 	struct key_type *keytype;
 };
 
+enum key_state {
+	KEY_IS_UNINSTANTIATED,
+	KEY_IS_POSITIVE,		/* Positively instantiated */
+};
+
 /*****************************************************************************/
 /*
  * authentication token / access credential / keyring
@@ -169,6 +174,7 @@ struct key {
 						 * - may not match RCU dereferenced payload
 						 * - payload should contain own length
 						 */
+	short			state;		/* Key state (+) or rejection error (-) */
 
 #ifdef KEY_DEBUGGING
 	unsigned		magic;
@@ -176,18 +182,16 @@ struct key {
 #endif
 
 	unsigned long		flags;		/* status flags (change with bitops) */
-#define KEY_FLAG_INSTANTIATED	0	/* set if key has been instantiated */
-#define KEY_FLAG_DEAD		1	/* set if key type has been deleted */
-#define KEY_FLAG_REVOKED	2	/* set if key had been revoked */
-#define KEY_FLAG_IN_QUOTA	3	/* set if key consumes quota */
-#define KEY_FLAG_USER_CONSTRUCT	4	/* set if key is being constructed in userspace */
-#define KEY_FLAG_NEGATIVE	5	/* set if key is negative */
-#define KEY_FLAG_ROOT_CAN_CLEAR	6	/* set if key can be cleared by root without permission */
-#define KEY_FLAG_INVALIDATED	7	/* set if key has been invalidated */
-#define KEY_FLAG_BUILTIN	8	/* set if key is built in to the kernel */
-#define KEY_FLAG_ROOT_CAN_INVAL	9	/* set if key can be invalidated by root without permission */
-#define KEY_FLAG_KEEP		10	/* set if key should not be removed */
-#define KEY_FLAG_UID_KEYRING	11	/* set if key is a user or user session keyring */
+#define KEY_FLAG_DEAD		0	/* set if key type has been deleted */
+#define KEY_FLAG_REVOKED	1	/* set if key had been revoked */
+#define KEY_FLAG_IN_QUOTA	2	/* set if key consumes quota */
+#define KEY_FLAG_USER_CONSTRUCT	3	/* set if key is being constructed in userspace */
+#define KEY_FLAG_ROOT_CAN_CLEAR	4	/* set if key can be cleared by root without permission */
+#define KEY_FLAG_INVALIDATED	5	/* set if key has been invalidated */
+#define KEY_FLAG_BUILTIN	6	/* set if key is built in to the kernel */
+#define KEY_FLAG_ROOT_CAN_INVAL	7	/* set if key can be invalidated by root without permission */
+#define KEY_FLAG_KEEP		8	/* set if key should not be removed */
+#define KEY_FLAG_UID_KEYRING	9	/* set if key is a user or user session keyring */
 
 	/* the key type and key description string
 	 * - the desc is used to match a key against search criteria
@@ -213,7 +217,6 @@ struct key {
 			struct list_head name_link;
 			struct assoc_array keys;
 		};
-		int reject_error;
 	};
 
 	/* This is set on a keyring to restrict the addition of a link to a key
@@ -353,17 +356,27 @@ extern void key_set_timeout(struct key *, unsigned);
 #define	KEY_NEED_SETATTR 0x20	/* Require permission to change attributes */
 #define	KEY_NEED_ALL	0x3f	/* All the above permissions */
 
+static inline short key_read_state(const struct key *key)
+{
+	/* Barrier versus mark_key_instantiated(). */
+	return smp_load_acquire(&key->state);
+}
+
 /**
- * key_is_instantiated - Determine if a key has been positively instantiated
+ * key_is_positive - Determine if a key has been positively instantiated
  * @key: The key to check.
  *
  * Return true if the specified key has been positively instantiated, false
  * otherwise.
  */
-static inline bool key_is_instantiated(const struct key *key)
+static inline bool key_is_positive(const struct key *key)
+{
+	return key_read_state(key) == KEY_IS_POSITIVE;
+}
+
+static inline bool key_is_negative(const struct key *key)
 {
-	return test_bit(KEY_FLAG_INSTANTIATED, &key->flags) &&
-		!test_bit(KEY_FLAG_NEGATIVE, &key->flags);
+	return key_read_state(key) < 0;
 }
 
 #define dereference_key_rcu(KEY)					\
-- 
cgit v1.2.3


From 55dfce873dca46df00304c44a568d7933bffff89 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 9 Oct 2017 11:09:33 -0700
Subject: Input: factor out and export input_device_id matching code

Factor out and export input_match_device_id() so that modules may use it.
It will be needed by joydev to blacklist accelerometers in composite
devices.

Tested-by: Roderick Colenbrander <roderick.colenbrander@sony.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/input.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/input.h b/include/linux/input.h
index fb5e23c7ed98..2a44650e449d 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -469,6 +469,9 @@ int input_get_keycode(struct input_dev *dev, struct input_keymap_entry *ke);
 int input_set_keycode(struct input_dev *dev,
 		      const struct input_keymap_entry *ke);
 
+bool input_match_device_id(const struct input_dev *dev,
+			   const struct input_device_id *id);
+
 void input_enable_softrepeat(struct input_dev *dev, int delay, int period);
 
 extern struct class input_class;
-- 
cgit v1.2.3


From 8724ecb072293f109a6f5dc93be8a98bf61fe14f Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 9 Oct 2017 12:01:14 -0700
Subject: Input: allow matching device IDs on property bits

Let's allow matching input devices on their property bits, both in-kernel
and when generating module aliases.

Tested-by: Roderick Colenbrander <roderick.colenbrander@sony.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/input.h           | 4 ++++
 include/linux/mod_devicetable.h | 3 +++
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/input.h b/include/linux/input.h
index 2a44650e449d..7c7516eb7d76 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -234,6 +234,10 @@ struct input_dev {
 #error "SW_MAX and INPUT_DEVICE_ID_SW_MAX do not match"
 #endif
 
+#if INPUT_PROP_MAX != INPUT_DEVICE_ID_PROP_MAX
+#error "INPUT_PROP_MAX and INPUT_DEVICE_ID_PROP_MAX do not match"
+#endif
+
 #define INPUT_DEVICE_ID_MATCH_DEVICE \
 	(INPUT_DEVICE_ID_MATCH_BUS | INPUT_DEVICE_ID_MATCH_VENDOR | INPUT_DEVICE_ID_MATCH_PRODUCT)
 #define INPUT_DEVICE_ID_MATCH_DEVICE_AND_VERSION \
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 3f74ef2281e8..72f0b7f19c59 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -293,6 +293,7 @@ struct pcmcia_device_id {
 #define INPUT_DEVICE_ID_SND_MAX		0x07
 #define INPUT_DEVICE_ID_FF_MAX		0x7f
 #define INPUT_DEVICE_ID_SW_MAX		0x0f
+#define INPUT_DEVICE_ID_PROP_MAX	0x1f
 
 #define INPUT_DEVICE_ID_MATCH_BUS	1
 #define INPUT_DEVICE_ID_MATCH_VENDOR	2
@@ -308,6 +309,7 @@ struct pcmcia_device_id {
 #define INPUT_DEVICE_ID_MATCH_SNDBIT	0x0400
 #define INPUT_DEVICE_ID_MATCH_FFBIT	0x0800
 #define INPUT_DEVICE_ID_MATCH_SWBIT	0x1000
+#define INPUT_DEVICE_ID_MATCH_PROPBIT	0x2000
 
 struct input_device_id {
 
@@ -327,6 +329,7 @@ struct input_device_id {
 	kernel_ulong_t sndbit[INPUT_DEVICE_ID_SND_MAX / BITS_PER_LONG + 1];
 	kernel_ulong_t ffbit[INPUT_DEVICE_ID_FF_MAX / BITS_PER_LONG + 1];
 	kernel_ulong_t swbit[INPUT_DEVICE_ID_SW_MAX / BITS_PER_LONG + 1];
+	kernel_ulong_t propbit[INPUT_DEVICE_ID_PROP_MAX / BITS_PER_LONG + 1];
 
 	kernel_ulong_t driver_info;
 };
-- 
cgit v1.2.3


From a961e40917fb14614d368d8bc9782ca4d6a8cd11 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Thu, 19 Oct 2017 13:30:15 -0400
Subject: membarrier: Provide register expedited private command

This introduces a "register private expedited" membarrier command which
allows eventual removal of important memory barrier constraints on the
scheduler fast-paths. It changes how the "private expedited" membarrier
command (new to 4.14) is used from user-space.

This new command allows processes to register their intent to use the
private expedited command.  This affects how the expedited private
command introduced in 4.14-rc is meant to be used, and should be merged
before 4.14 final.

Processes are now required to register before using
MEMBARRIER_CMD_PRIVATE_EXPEDITED, otherwise that command returns EPERM.

This fixes a problem that arose when designing requested extensions to
sys_membarrier() to allow JITs to efficiently flush old code from
instruction caches.  Several potential algorithms are much less painful
if the user register intent to use this functionality early on, for
example, before the process spawns the second thread.  Registering at
this time removes the need to interrupt each and every thread in that
process at the first expedited sys_membarrier() system call.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  3 +++
 include/linux/sched/mm.h | 16 ++++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 46f4ecf5479a..1861ea8dba77 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -445,6 +445,9 @@ struct mm_struct {
 	unsigned long flags; /* Must use atomic bitops to access the bits */
 
 	struct core_state *core_state; /* coredumping support */
+#ifdef CONFIG_MEMBARRIER
+	atomic_t membarrier_state;
+#endif
 #ifdef CONFIG_AIO
 	spinlock_t			ioctx_lock;
 	struct kioctx_table __rcu	*ioctx_table;
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index ae53e413fb13..ab9bf7b73954 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -211,4 +211,20 @@ static inline void memalloc_noreclaim_restore(unsigned int flags)
 	current->flags = (current->flags & ~PF_MEMALLOC) | flags;
 }
 
+#ifdef CONFIG_MEMBARRIER
+enum {
+	MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY	= (1U << 0),
+	MEMBARRIER_STATE_SWITCH_MM			= (1U << 1),
+};
+
+static inline void membarrier_execve(struct task_struct *t)
+{
+	atomic_set(&t->mm->membarrier_state, 0);
+}
+#else
+static inline void membarrier_execve(struct task_struct *t)
+{
+}
+#endif
+
 #endif /* _LINUX_SCHED_MM_H */
-- 
cgit v1.2.3


From 27fdb35fe99011d86bcc54f62fe84712c53f4d05 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 19 Oct 2017 14:26:21 -0700
Subject: doc: Fix various RCU docbook comment-header problems

Because many of RCU's files have not been included into docbook, a
number of errors have accumulated.  This commit fixes them.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rculist.h  |  2 +-
 include/linux/rcupdate.h | 22 ++++++++++++++--------
 include/linux/srcu.h     |  1 +
 3 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index b1fd8bf85fdc..2bea1d5e9930 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -276,7 +276,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
 #define list_entry_rcu(ptr, type, member) \
 	container_of(lockless_dereference(ptr), type, member)
 
-/**
+/*
  * Where are list_empty_rcu() and list_first_entry_rcu()?
  *
  * Implementing those functions following their counterparts list_empty() and
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index de50d8a4cf41..1a9f70d44af9 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -523,7 +523,7 @@ static inline void rcu_preempt_sleep_check(void) { }
  * Return the value of the specified RCU-protected pointer, but omit
  * both the smp_read_barrier_depends() and the READ_ONCE().  This
  * is useful in cases where update-side locks prevent the value of the
- * pointer from changing.  Please note that this primitive does -not-
+ * pointer from changing.  Please note that this primitive does *not*
  * prevent the compiler from repeating this reference or combining it
  * with other references, so it should not be used without protection
  * of appropriate locks.
@@ -568,7 +568,7 @@ static inline void rcu_preempt_sleep_check(void) { }
  * is handed off from RCU to some other synchronization mechanism, for
  * example, reference counting or locking.  In C11, it would map to
  * kill_dependency().  It could be used as follows:
- *
+ * ``
  *	rcu_read_lock();
  *	p = rcu_dereference(gp);
  *	long_lived = is_long_lived(p);
@@ -579,6 +579,7 @@ static inline void rcu_preempt_sleep_check(void) { }
  *			p = rcu_pointer_handoff(p);
  *	}
  *	rcu_read_unlock();
+ *``
  */
 #define rcu_pointer_handoff(p) (p)
 
@@ -778,18 +779,21 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 
 /**
  * RCU_INIT_POINTER() - initialize an RCU protected pointer
+ * @p: The pointer to be initialized.
+ * @v: The value to initialized the pointer to.
  *
  * Initialize an RCU-protected pointer in special cases where readers
  * do not need ordering constraints on the CPU or the compiler.  These
  * special cases are:
  *
- * 1.	This use of RCU_INIT_POINTER() is NULLing out the pointer -or-
+ * 1.	This use of RCU_INIT_POINTER() is NULLing out the pointer *or*
  * 2.	The caller has taken whatever steps are required to prevent
- *	RCU readers from concurrently accessing this pointer -or-
+ *	RCU readers from concurrently accessing this pointer *or*
  * 3.	The referenced data structure has already been exposed to
- *	readers either at compile time or via rcu_assign_pointer() -and-
- *	a.	You have not made -any- reader-visible changes to
- *		this structure since then -or-
+ *	readers either at compile time or via rcu_assign_pointer() *and*
+ *
+ *	a.	You have not made *any* reader-visible changes to
+ *		this structure since then *or*
  *	b.	It is OK for readers accessing this structure from its
  *		new location to see the old state of the structure.  (For
  *		example, the changes were to statistical counters or to
@@ -805,7 +809,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
  * by a single external-to-structure RCU-protected pointer, then you may
  * use RCU_INIT_POINTER() to initialize the internal RCU-protected
  * pointers, but you must use rcu_assign_pointer() to initialize the
- * external-to-structure pointer -after- you have completely initialized
+ * external-to-structure pointer *after* you have completely initialized
  * the reader-accessible portions of the linked structure.
  *
  * Note that unlike rcu_assign_pointer(), RCU_INIT_POINTER() provides no
@@ -819,6 +823,8 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 
 /**
  * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer
+ * @p: The pointer to be initialized.
+ * @v: The value to initialized the pointer to.
  *
  * GCC-style initialization for an RCU-protected pointer in a structure field.
  */
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 39af9bc0f653..62be8966e837 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -78,6 +78,7 @@ void synchronize_srcu(struct srcu_struct *sp);
 
 /**
  * srcu_read_lock_held - might we be in SRCU read-side critical section?
+ * @sp: The srcu_struct structure to check
  *
  * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an SRCU
  * read-side critical section.  In absence of CONFIG_DEBUG_LOCK_ALLOC,
-- 
cgit v1.2.3


From 34f79502bbcfab659b8729da68b5e387f96eb4c1 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Wed, 18 Oct 2017 07:10:36 -0700
Subject: bpf: avoid preempt enable/disable in sockmap using tcp_skb_cb region

SK_SKB BPF programs are run from the socket/tcp context but early in
the stack before much of the TCP metadata is needed in tcp_skb_cb. So
we can use some unused fields to place BPF metadata needed for SK_SKB
programs when implementing the redirect function.

This allows us to drop the preempt disable logic. It does however
require an API change so sk_redirect_map() has been updated to
additionally provide ctx_ptr to skb. Note, we do however continue to
disable/enable preemption around actual BPF program running to account
for map updates.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index d29e58fde364..818a0b26249e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -728,7 +728,7 @@ void xdp_do_flush_map(void);
 void bpf_warn_invalid_xdp_action(u32 act);
 void bpf_warn_invalid_xdp_redirect(u32 ifindex);
 
-struct sock *do_sk_redirect_map(void);
+struct sock *do_sk_redirect_map(struct sk_buff *skb);
 
 #ifdef CONFIG_BPF_JIT
 extern int bpf_jit_enable;
-- 
cgit v1.2.3


From 88796e7e5c457cae72833196cb98e6895dd107e2 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Fri, 20 Oct 2017 10:13:46 -0700
Subject: sched/swait: Document it clearly that the swait facilities are
 special and shouldn't be used

We currently welcome using swait over wait whenever possible because
it is a slimmer data structure. However, Linus has made it very clear
that he does not want this used, unless under very specific RT scenarios
(such as current users).

Update the comments before kernel hipsters start thinking swait is the
cool thing to do.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Acked-by: Luis R. Rodriguez <mcgrof@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dave@stgolabs.net
Cc: wagi@monom.org
Link: http://lkml.kernel.org/r/20171020171346.24445-1-dave@stgolabs.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/swait.h | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swait.h b/include/linux/swait.h
index 73e97a08d3d0..cf30f5022472 100644
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -9,13 +9,16 @@
 /*
  * Simple wait queues
  *
- * While these are very similar to the other/complex wait queues (wait.h) the
- * most important difference is that the simple waitqueue allows for
- * deterministic behaviour -- IOW it has strictly bounded IRQ and lock hold
- * times.
+ * While these are very similar to regular wait queues (wait.h) the most
+ * important difference is that the simple waitqueue allows for deterministic
+ * behaviour -- IOW it has strictly bounded IRQ and lock hold times.
  *
- * In order to make this so, we had to drop a fair number of features of the
- * other waitqueue code; notably:
+ * Mainly, this is accomplished by two things. Firstly not allowing swake_up_all
+ * from IRQ disabled, and dropping the lock upon every wakeup, giving a higher
+ * priority task a chance to run.
+ *
+ * Secondly, we had to drop a fair number of features of the other waitqueue
+ * code; notably:
  *
  *  - mixing INTERRUPTIBLE and UNINTERRUPTIBLE sleeps on the same waitqueue;
  *    all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right
@@ -24,12 +27,14 @@
  *  - the exclusive mode; because this requires preserving the list order
  *    and this is hard.
  *
- *  - custom wake functions; because you cannot give any guarantees about
- *    random code.
- *
- * As a side effect of this; the data structures are slimmer.
+ *  - custom wake callback functions; because you cannot give any guarantees
+ *    about random code. This also allows swait to be used in RT, such that
+ *    raw spinlock can be used for the swait queue head.
  *
- * One would recommend using this wait queue where possible.
+ * As a side effect of these; the data structures are slimmer albeit more ad-hoc.
+ * For all the above, note that simple wait queues should _only_ be used under
+ * very specific realtime constraints -- it is best to stick with the regular
+ * wait queues in most cases.
  */
 
 struct task_struct;
-- 
cgit v1.2.3


From 0cc2b4e5a020fc7f4d1795741c116c983e9467d7 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 24 Oct 2017 15:20:45 +0200
Subject: PM / QoS: Fix device resume latency PM QoS

The special value of 0 for device resume latency PM QoS means
"no restriction", but there are two problems with that.

First, device resume latency PM QoS requests with 0 as the
value are always put in front of requests with positive
values in the priority lists used internally by the PM QoS
framework, causing 0 to be chosen as an effective constraint
value.  However, that 0 is then interpreted as "no restriction"
effectively overriding the other requests with specific
restrictions which is incorrect.

Second, the users of device resume latency PM QoS have no
way to specify that *any* resume latency at all should be
avoided, which is an artificial limitation in general.

To address these issues, modify device resume latency PM QoS to
use S32_MAX as the "no constraint" value and 0 as the "no
latency at all" one and rework its users (the cpuidle menu
governor, the genpd QoS governor and the runtime PM framework)
to follow these changes.

Also add a special "n/a" value to the corresponding user space I/F
to allow user space to indicate that it cannot accept any resume
latencies at all for the given device.

Fixes: 85dc0b8a4019 (PM / QoS: Make it possible to expose PM QoS latency constraints)
Link: https://bugzilla.kernel.org/show_bug.cgi?id=197323
Reported-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Alex Shi <alex.shi@linaro.org>
Cc: All applicable <stable@vger.kernel.org>
---
 include/linux/pm_qos.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index 032b55909145..6737a8c9e8c6 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -27,16 +27,17 @@ enum pm_qos_flags_status {
 	PM_QOS_FLAGS_ALL,
 };
 
-#define PM_QOS_DEFAULT_VALUE -1
+#define PM_QOS_DEFAULT_VALUE	(-1)
+#define PM_QOS_LATENCY_ANY	S32_MAX
 
 #define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE	(2000 * USEC_PER_SEC)
 #define PM_QOS_NETWORK_LAT_DEFAULT_VALUE	(2000 * USEC_PER_SEC)
 #define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE	0
 #define PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE	0
 #define PM_QOS_RESUME_LATENCY_DEFAULT_VALUE	0
+#define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT	PM_QOS_LATENCY_ANY
 #define PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE	0
 #define PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT	(-1)
-#define PM_QOS_LATENCY_ANY			((s32)(~(__u32)0 >> 1))
 
 #define PM_QOS_FLAG_NO_POWER_OFF	(1 << 0)
 #define PM_QOS_FLAG_REMOTE_WAKEUP	(1 << 1)
-- 
cgit v1.2.3


From be0f161ef141e4df368aa3f417a1c2ab9c362e75 Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@mellanox.com>
Date: Thu, 28 Sep 2017 15:33:50 -0500
Subject: net/mlx5e: DCBNL, Implement tc with ets type and zero bandwidth

Previously, tc with ets type and zero bandwidth is not accepted
by driver. This behavior does not follow the IEEE802.1qaz spec.

If there are tcs with ets type and zero bandwidth, these tcs are
assigned to the lowest priority tc_group #0. We equally distribute
100% bw of the tc_group #0 to these zero bandwidth ets tcs.
Also, the non zero bandwidth ets tcs are assigned to tc_group #1.

If there is no zero bandwidth ets tc, the non zero bandwidth ets tcs
are assigned to tc_group #0.

Fixes: cdcf11212b22 ("net/mlx5e: Validate BW weight values of ETS")
Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Reviewed-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/port.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index c57d4b7de3a8..c59af8ab753a 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -157,6 +157,8 @@ int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc);
 int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev,
 			    u8 prio, u8 *tc);
 int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group);
+int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev,
+			     u8 tc, u8 *tc_group);
 int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw);
 int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev,
 				u8 tc, u8 *bw_pct);
-- 
cgit v1.2.3


From dea6e19f4ef746aa18b4c33d1a7fed54356796ed Mon Sep 17 00:00:00 2001
From: Girish Moodalbail <girish.moodalbail@oracle.com>
Date: Fri, 27 Oct 2017 00:00:16 -0700
Subject: tap: reference to KVA of an unloaded module causes kernel panic

The commit 9a393b5d5988 ("tap: tap as an independent module") created a
separate tap module that implements tap functionality and exports
interfaces that will be used by macvtap and ipvtap modules to create
create respective tap devices.

However, that patch introduced a regression wherein the modules macvtap
and ipvtap can be removed (through modprobe -r) while there are
applications using the respective /dev/tapX devices. These applications
cause kernel to hold reference to /dev/tapX through 'struct cdev
macvtap_cdev' and 'struct cdev ipvtap_dev' defined in macvtap and ipvtap
modules respectively. So,  when the application is later closed the
kernel panics because we are referencing KVA that is present in the
unloaded modules.

----------8<------- Example ----------8<----------
$ sudo ip li add name mv0 link enp7s0 type macvtap
$ sudo ip li show mv0 |grep mv0| awk -e '{print $1 $2}'
  14:mv0@enp7s0:
$ cat /dev/tap14 &
$ lsmod |egrep -i 'tap|vlan'
macvtap                16384  0
macvlan                24576  1 macvtap
tap                    24576  3 macvtap
$ sudo modprobe -r macvtap
$ fg
cat /dev/tap14
^C

<...system panics...>
BUG: unable to handle kernel paging request at ffffffffa038c500
IP: cdev_put+0xf/0x30
----------8<-----------------8<----------

The fix is to set cdev.owner to the module that creates the tap device
(either macvtap or ipvtap). With this set, the operations (in
fs/char_dev.c) on char device holds and releases the module through
cdev_get() and cdev_put() and will not allow the module to unload
prematurely.

Fixes: 9a393b5d5988ea4e (tap: tap as an independent module)
Signed-off-by: Girish Moodalbail <girish.moodalbail@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_tap.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h
index 4837157da0dc..9ae41cdd0d4c 100644
--- a/include/linux/if_tap.h
+++ b/include/linux/if_tap.h
@@ -73,8 +73,8 @@ void tap_del_queues(struct tap_dev *tap);
 int tap_get_minor(dev_t major, struct tap_dev *tap);
 void tap_free_minor(dev_t major, struct tap_dev *tap);
 int tap_queue_resize(struct tap_dev *tap);
-int tap_create_cdev(struct cdev *tap_cdev,
-		    dev_t *tap_major, const char *device_name);
+int tap_create_cdev(struct cdev *tap_cdev, dev_t *tap_major,
+		    const char *device_name, struct module *module);
 void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev);
 
 #endif /*_LINUX_IF_TAP_H_*/
-- 
cgit v1.2.3


From 1da4fc97cbf89514e417a3df46eaec864a9b8a48 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 28 Oct 2017 19:43:54 +0800
Subject: sctp: fix some type cast warnings introduced by stream reconf

These warnings were found by running 'make C=2 M=net/sctp/'.

They are introduced by not aware of Endian when coding stream
reconf patches.

Since commit c0d8bab6ae51 ("sctp: add get and set sockopt for
reconf_enable") enabled stream reconf feature for users, the
Fixes tag below would use it.

Fixes: c0d8bab6ae51 ("sctp: add get and set sockopt for reconf_enable")
Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 82b171e1aa0b..09d7412e9cb0 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -716,28 +716,28 @@ struct sctp_reconf_chunk {
 
 struct sctp_strreset_outreq {
 	struct sctp_paramhdr param_hdr;
-	__u32 request_seq;
-	__u32 response_seq;
-	__u32 send_reset_at_tsn;
-	__u16 list_of_streams[0];
+	__be32 request_seq;
+	__be32 response_seq;
+	__be32 send_reset_at_tsn;
+	__be16 list_of_streams[0];
 };
 
 struct sctp_strreset_inreq {
 	struct sctp_paramhdr param_hdr;
-	__u32 request_seq;
-	__u16 list_of_streams[0];
+	__be32 request_seq;
+	__be16 list_of_streams[0];
 };
 
 struct sctp_strreset_tsnreq {
 	struct sctp_paramhdr param_hdr;
-	__u32 request_seq;
+	__be32 request_seq;
 };
 
 struct sctp_strreset_addstrm {
 	struct sctp_paramhdr param_hdr;
-	__u32 request_seq;
-	__u16 number_of_streams;
-	__u16 reserved;
+	__be32 request_seq;
+	__be16 number_of_streams;
+	__be16 reserved;
 };
 
 enum {
@@ -752,16 +752,16 @@ enum {
 
 struct sctp_strreset_resp {
 	struct sctp_paramhdr param_hdr;
-	__u32 response_seq;
-	__u32 result;
+	__be32 response_seq;
+	__be32 result;
 };
 
 struct sctp_strreset_resptsn {
 	struct sctp_paramhdr param_hdr;
-	__u32 response_seq;
-	__u32 result;
-	__u32 senders_next_tsn;
-	__u32 receivers_next_tsn;
+	__be32 response_seq;
+	__be32 result;
+	__be32 senders_next_tsn;
+	__be32 receivers_next_tsn;
 };
 
 #endif /* __LINUX_SCTP_H__ */
-- 
cgit v1.2.3


From 978aa0474115f3f5848949f2efce4def0766a5cb Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 28 Oct 2017 19:43:57 +0800
Subject: sctp: fix some type cast warnings introduced since very beginning

These warnings were found by running 'make C=2 M=net/sctp/'.
They are there since very beginning.

Note after this patch, there still one warning left in
sctp_outq_flush():
  sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM)

Since it has been moved to sctp_stream_outq_migrate on net-next,
to avoid the extra job when merging net-next to net, I will post
the fix for it after the merging is done.

Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sctp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index 09d7412e9cb0..da803dfc7a39 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -231,7 +231,7 @@ struct sctp_datahdr {
 	__be32 tsn;
 	__be16 stream;
 	__be16 ssn;
-	__be32 ppid;
+	__u32 ppid;
 	__u8  payload[0];
 };
 
-- 
cgit v1.2.3