From d0b6e0a8ef24b1b07078ababe5d91bcdf4f4264a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 12 Sep 2017 21:36:55 +0200 Subject: watchdog/hardlockup: Provide interface to stop/restart perf events Provide an interface to stop and restart perf NMI watchdog events on all CPUs. This is only usable during init and especially for handling the perf HT bug on Intel machines. It's safe to use it this way as nothing can start/stop the NMI watchdog in parallel. Signed-off-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194146.167649596@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index a36abe2da13e..b24d4a58674a 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -75,7 +75,11 @@ static inline void hardlockup_detector_disable(void) {} #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) extern void arch_touch_nmi_watchdog(void); +extern void hardlockup_detector_perf_stop(void); +extern void hardlockup_detector_perf_restart(void); #else +static inline void hardlockup_detector_perf_stop(void) { } +static inline void hardlockup_detector_perf_restart(void) { } #if !defined(CONFIG_HAVE_NMI_WATCHDOG) static inline void arch_touch_nmi_watchdog(void) {} #endif -- cgit v1.2.3 From 6554fd8cf06db86f861bb24d7487b2873ca444c4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:36:57 +0200 Subject: watchdog/core: Provide interface to stop from poweroff() PARISC has a a busy looping power off routine. If the watchdog is enabled the watchdog timer will still fire, but the thread is not running, which causes the softlockup watchdog to trigger. Provide a interface which allows to turn the watchdog off. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Helge Deller Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Cc: linux-parisc@vger.kernel.org Link: http://lkml.kernel.org/r/20170912194146.327343752@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index b24d4a58674a..85bb268be39c 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -12,10 +12,10 @@ #ifdef CONFIG_LOCKUP_DETECTOR void lockup_detector_init(void); +void lockup_detector_soft_poweroff(void); #else -static inline void lockup_detector_init(void) -{ -} +static inline void lockup_detector_init(void) { } +static inline void lockup_detector_soft_poweroff(void) { } #endif #ifdef CONFIG_SOFTLOCKUP_DETECTOR -- cgit v1.2.3 From 5490125d77a43016b26f629d4b485e2c62172551 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:36:59 +0200 Subject: watchdog/core: Remove broken suspend/resume interfaces This interface has several issues: - It's causing recursive locking of the hotplug lock. - It's complete overkill to teardown all threads and then recreate them The same can be achieved with the simple hardlockup_detector_perf_stop / restart() interfaces. The abuse from the busy looping poweroff() loop of PARISC has been solved as well. Remove the cruft. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194146.487537732@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 85bb268be39c..7eefe7abf44b 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -164,7 +164,6 @@ extern int watchdog_thresh; extern unsigned long watchdog_enabled; extern struct cpumask watchdog_cpumask; extern unsigned long *watchdog_cpumask_bits; -extern int __read_mostly watchdog_suspended; #ifdef CONFIG_SMP extern int sysctl_softlockup_all_cpu_backtrace; extern int sysctl_hardlockup_all_cpu_backtrace; @@ -192,17 +191,6 @@ extern int proc_watchdog_thresh(struct ctl_table *, int , void __user *, size_t *, loff_t *); extern int proc_watchdog_cpumask(struct ctl_table *, int, void __user *, size_t *, loff_t *); -extern int lockup_detector_suspend(void); -extern void lockup_detector_resume(void); -#else -static inline int lockup_detector_suspend(void) -{ - return 0; -} - -static inline void lockup_detector_resume(void) -{ -} #endif #ifdef CONFIG_HAVE_ACPI_APEI_NMI -- cgit v1.2.3 From 941154bd6937a710ae9193a3c733c0029e5ae7b8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:04 +0200 Subject: watchdog/hardlockup/perf: Prevent CPU hotplug deadlock The following deadlock is possible in the watchdog hotplug code: cpus_write_lock() ... takedown_cpu() smpboot_park_threads() smpboot_park_thread() kthread_park() ->park() := watchdog_disable() watchdog_nmi_disable() perf_event_release_kernel(); put_event() _free_event() ->destroy() := hw_perf_event_destroy() x86_release_hardware() release_ds_buffers() get_online_cpus() when a per cpu watchdog perf event is destroyed which drops the last reference to the PMU hardware. The cleanup code there invokes get_online_cpus() which instantly deadlocks because the hotplug percpu rwsem is write locked. To solve this add a deferring mechanism: cpus_write_lock() kthread_park() watchdog_nmi_disable(deferred) perf_event_disable(event); move_event_to_deferred(event); .... cpus_write_unlock() cleaup_deferred_events() perf_event_release_kernel() This is still properly serialized against concurrent hotplug via the cpu_add_remove_lock, which is held by the task which initiated the hotplug event. This is also used to handle event destruction when the watchdog threads are parked via other mechanisms than CPU hotplug. Analyzed-by: Peter Zijlstra Reported-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194146.884469246@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 7eefe7abf44b..80354e6fa86d 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -13,9 +13,11 @@ #ifdef CONFIG_LOCKUP_DETECTOR void lockup_detector_init(void); void lockup_detector_soft_poweroff(void); +void lockup_detector_cleanup(void); #else static inline void lockup_detector_init(void) { } static inline void lockup_detector_soft_poweroff(void) { } +static inline void lockup_detector_cleanup(void) { } #endif #ifdef CONFIG_SOFTLOCKUP_DETECTOR @@ -77,9 +79,13 @@ static inline void hardlockup_detector_disable(void) {} extern void arch_touch_nmi_watchdog(void); extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); +extern void hardlockup_detector_perf_disable(void); +extern void hardlockup_detector_perf_cleanup(void); #else static inline void hardlockup_detector_perf_stop(void) { } static inline void hardlockup_detector_perf_restart(void) { } +static inline void hardlockup_detector_perf_disable(void) { } +static inline void hardlockup_detector_perf_cleanup(void) { } #if !defined(CONFIG_HAVE_NMI_WATCHDOG) static inline void arch_touch_nmi_watchdog(void) {} #endif -- cgit v1.2.3 From 01f0a02701cbcf32d22cfc9d1ab9a3f0ff2ba68c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:05 +0200 Subject: watchdog/core: Remove the park_in_progress obfuscation Commit: b94f51183b06 ("kernel/watchdog: prevent false hardlockup on overloaded system") tries to fix the following issue: proc_write() set_sample_period() <--- New sample period becoms visible <----- Broken starts proc_watchdog_update() watchdog_enable_all_cpus() watchdog_hrtimer_fn() update_watchdog_all_cpus() restart_timer(sample_period) watchdog_park_threads() thread->park() disable_nmi() <----- Broken ends The reason why this is broken is that the update of the watchdog threshold becomes immediately effective and visible for the hrtimer function which uses that value to rearm the timer. But the NMI/perf side still uses the old value up to the point where it is disabled. If the rate has been lowered then the NMI can run fast enough to 'detect' a hard lockup because the timer has not fired due to the longer period. The patch 'fixed' this by adding a variable: proc_write() set_sample_period() <----- Broken starts proc_watchdog_update() watchdog_enable_all_cpus() watchdog_hrtimer_fn() update_watchdog_all_cpus() restart_timer(sample_period) watchdog_park_threads() park_in_progress = 1 <----- Broken ends nmi_watchdog() if (park_in_progress) return; The only effect of this variable was to make the window where the breakage can hit small enough that it was not longer observable in testing. From a correctness point of view it is a pointless bandaid which merily papers over the root cause: the unsychronized update of the variable. Looking deeper into the related code pathes unearthed similar problems in the watchdog_start()/stop() functions. watchdog_start() perf_nmi_event_start() hrtimer_start() watchdog_stop() hrtimer_cancel() perf_nmi_event_stop() In both cases the call order is wrong because if the tasks gets preempted or the VM gets scheduled out long enough after the first call, then there is a chance that the next NMI will see a stale hrtimer interrupt count and trigger a false positive hard lockup splat. Get rid of park_in_progress so the code can be gradually deobfuscated and pruned from several layers of duct tape papering over the root cause, which has been either ignored or not understood at all. Once this is removed the underlying problem will be fixed by rewriting the proc interface to do a proper synchronized update. Address the start/stop() ordering problem as well by reverting the call order, so this part is at least correct now. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1709052038270.2393@nanos Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 80354e6fa86d..91a3a4a4c8ae 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -27,7 +27,6 @@ extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); extern unsigned int softlockup_panic; extern int soft_watchdog_enabled; -extern atomic_t watchdog_park_in_progress; #else static inline void touch_softlockup_watchdog_sched(void) { -- cgit v1.2.3 From 0d85923c7a81719567311ba0eae8ecb2efd4c8a0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:09 +0200 Subject: smpboot/threads, watchdog/core: Avoid runtime allocation smpboot_update_cpumask_threads_percpu() allocates a temporary cpumask at runtime. This is suboptimal because the call site needs more code size for proper error handling than a statically allocated temporary mask requires data size. Add static temporary cpumask. The function is globaly serialized, so no further protection required. Remove the half baken error handling in the watchdog code and get rid of the export as there are no in tree modular users of that function. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194147.297288838@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/smpboot.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h index 12910cf19869..c149aa7bedf3 100644 --- a/include/linux/smpboot.h +++ b/include/linux/smpboot.h @@ -55,7 +55,7 @@ smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread) } void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread); -int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, - const struct cpumask *); +void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, + const struct cpumask *); #endif -- cgit v1.2.3 From 3b371b5936e7777c819619c00ca60f196a8e13fa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:13 +0200 Subject: watchdog/core: Clean up header mess Having the same #ifdef in various places does not make it more readable. Collect stuff into one place. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194147.627096864@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 60 ++++++++++++++++++++++------------------------------- 1 file changed, 25 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 91a3a4a4c8ae..cfebb3bc4eed 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -14,11 +14,29 @@ void lockup_detector_init(void); void lockup_detector_soft_poweroff(void); void lockup_detector_cleanup(void); +bool is_hardlockup(void); + +extern int watchdog_user_enabled; +extern int nmi_watchdog_enabled; +extern int soft_watchdog_enabled; +extern int watchdog_thresh; +extern unsigned long watchdog_enabled; + +extern struct cpumask watchdog_cpumask; +extern unsigned long *watchdog_cpumask_bits; +#ifdef CONFIG_SMP +extern int sysctl_softlockup_all_cpu_backtrace; +extern int sysctl_hardlockup_all_cpu_backtrace; #else +#define sysctl_softlockup_all_cpu_backtrace 0 +#define sysctl_hardlockup_all_cpu_backtrace 0 +#endif /* !CONFIG_SMP */ + +#else /* CONFIG_LOCKUP_DETECTOR */ static inline void lockup_detector_init(void) { } static inline void lockup_detector_soft_poweroff(void) { } static inline void lockup_detector_cleanup(void) { } -#endif +#endif /* !CONFIG_LOCKUP_DETECTOR */ #ifdef CONFIG_SOFTLOCKUP_DETECTOR extern void touch_softlockup_watchdog_sched(void); @@ -26,28 +44,17 @@ extern void touch_softlockup_watchdog(void); extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); extern unsigned int softlockup_panic; -extern int soft_watchdog_enabled; #else -static inline void touch_softlockup_watchdog_sched(void) -{ -} -static inline void touch_softlockup_watchdog(void) -{ -} -static inline void touch_softlockup_watchdog_sync(void) -{ -} -static inline void touch_all_softlockup_watchdogs(void) -{ -} +static inline void touch_softlockup_watchdog_sched(void) { } +static inline void touch_softlockup_watchdog(void) { } +static inline void touch_softlockup_watchdog_sync(void) { } +static inline void touch_all_softlockup_watchdogs(void) { } #endif #ifdef CONFIG_DETECT_HUNG_TASK void reset_hung_task_detector(void); #else -static inline void reset_hung_task_detector(void) -{ -} +static inline void reset_hung_task_detector(void) { } #endif /* @@ -92,7 +99,7 @@ static inline void arch_touch_nmi_watchdog(void) {} /** * touch_nmi_watchdog - restart NMI watchdog timeout. - * + * * If the architecture supports the NMI watchdog, touch_nmi_watchdog() * may be used to reset the timeout - for code which intentionally * disables interrupts for a long time. This call is stateless. @@ -162,21 +169,6 @@ static inline bool trigger_single_cpu_backtrace(int cpu) u64 hw_nmi_get_sample_period(int watchdog_thresh); #endif -#ifdef CONFIG_LOCKUP_DETECTOR -extern int nmi_watchdog_enabled; -extern int watchdog_user_enabled; -extern int watchdog_thresh; -extern unsigned long watchdog_enabled; -extern struct cpumask watchdog_cpumask; -extern unsigned long *watchdog_cpumask_bits; -#ifdef CONFIG_SMP -extern int sysctl_softlockup_all_cpu_backtrace; -extern int sysctl_hardlockup_all_cpu_backtrace; -#else -#define sysctl_softlockup_all_cpu_backtrace 0 -#define sysctl_hardlockup_all_cpu_backtrace 0 -#endif - #if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \ defined(CONFIG_HARDLOCKUP_DETECTOR) void watchdog_update_hrtimer_threshold(u64 period); @@ -184,7 +176,6 @@ void watchdog_update_hrtimer_threshold(u64 period); static inline void watchdog_update_hrtimer_threshold(u64 period) { } #endif -extern bool is_hardlockup(void); struct ctl_table; extern int proc_watchdog(struct ctl_table *, int , void __user *, size_t *, loff_t *); @@ -196,7 +187,6 @@ extern int proc_watchdog_thresh(struct ctl_table *, int , void __user *, size_t *, loff_t *); extern int proc_watchdog_cpumask(struct ctl_table *, int, void __user *, size_t *, loff_t *); -#endif #ifdef CONFIG_HAVE_ACPI_APEI_NMI #include -- cgit v1.2.3 From 51d4052b01ca555e0d1d5fe297b309beb6c64aa0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:14 +0200 Subject: watchdog/sysctl: Get rid of the #ifdeffery The sysctl of the nmi_watchdog file prevents writes by setting: min = max = 0 if none of the users is enabled. That involves ifdeffery and is competely non obvious. If none of the facilities is enabeld, then the file can simply be made read only. Move the ifdeffery into the header and use a constant for file permissions. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194147.706073616@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index cfebb3bc4eed..5774b443dba1 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -81,6 +81,12 @@ extern unsigned int hardlockup_panic; static inline void hardlockup_detector_disable(void) {} #endif +#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) +# define NMI_WATCHDOG_SYSCTL_PERM 0644 +#else +# define NMI_WATCHDOG_SYSCTL_PERM 0444 +#endif + #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) extern void arch_touch_nmi_watchdog(void); extern void hardlockup_detector_perf_stop(void); -- cgit v1.2.3 From 7feeb9cd4f5b34476ffb9e6d58d58c5416375b19 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:15 +0200 Subject: watchdog/sysctl: Clean up sysctl variable name space Reflect that these variables are user interface related and remove the whitespace damage in the sysctl table while at it. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194147.783210221@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 5774b443dba1..4a8d1037364e 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -17,8 +17,8 @@ void lockup_detector_cleanup(void); bool is_hardlockup(void); extern int watchdog_user_enabled; -extern int nmi_watchdog_enabled; -extern int soft_watchdog_enabled; +extern int nmi_watchdog_user_enabled; +extern int soft_watchdog_user_enabled; extern int watchdog_thresh; extern unsigned long watchdog_enabled; @@ -62,12 +62,12 @@ static inline void reset_hung_task_detector(void) { } * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit - * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector. * - * 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled' - * are variables that are only used as an 'interface' between the parameters - * in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The - * 'watchdog_thresh' variable is handled differently because its value is not - * boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh' - * is equal zero. + * 'watchdog_user_enabled', 'nmi_watchdog_user_enabled' and + * 'soft_watchdog_user_enabled' are variables that are only used as an + * 'interface' between the parameters in /proc/sys/kernel and the internal + * state bits in 'watchdog_enabled'. The 'watchdog_thresh' variable is + * handled differently because its value is not boolean, and the lockup + * detectors are 'suspended' while 'watchdog_thresh' is equal zero. */ #define NMI_WATCHDOG_ENABLED_BIT 0 #define SOFT_WATCHDOG_ENABLED_BIT 1 -- cgit v1.2.3 From 6592ad2fcc8f15b4f99b36c1db7d9f65510c203b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:16 +0200 Subject: watchdog/core, powerpc: Make watchdog_nmi_reconfigure() two stage Both the perf reconfiguration and the powerpc watchdog_nmi_reconfigure() need to be done in two steps. 1) Stop all NMIs 2) Read the new parameters and start NMIs Right now watchdog_nmi_reconfigure() is a combination of both. To allow a clean reconfiguration add a 'run' argument and split the functionality in powerpc. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20170912194147.862865570@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 4a8d1037364e..eee255bc0fd6 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -103,6 +103,8 @@ static inline void arch_touch_nmi_watchdog(void) {} #endif #endif +void watchdog_nmi_reconfigure(bool run); + /** * touch_nmi_watchdog - restart NMI watchdog timeout. * -- cgit v1.2.3 From 178b9f7a36d2c74a38274b66dd89f53611298a19 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:18 +0200 Subject: watchdog/hardlockup/perf: Implement init time perf validation The watchdog tries to create perf events even after it figured out that perf is not functional or the requested event is not supported. That's braindead as this can be done once at init time and if not supported the NMI watchdog can be turned off unconditonally. Implement the perf hardlockup detector functionality for that. This creates a new event create function, which will replace the unholy mess of the existing one in later patches. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194148.019090547@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index eee255bc0fd6..72c62a809e92 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -93,14 +93,18 @@ extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); extern void hardlockup_detector_perf_disable(void); extern void hardlockup_detector_perf_cleanup(void); +extern int hardlockup_detector_perf_init(void); #else static inline void hardlockup_detector_perf_stop(void) { } static inline void hardlockup_detector_perf_restart(void) { } static inline void hardlockup_detector_perf_disable(void) { } static inline void hardlockup_detector_perf_cleanup(void) { } -#if !defined(CONFIG_HAVE_NMI_WATCHDOG) +# if !defined(CONFIG_HAVE_NMI_WATCHDOG) +static inline int hardlockup_detector_perf_init(void) { return -ENODEV; } static inline void arch_touch_nmi_watchdog(void) {} -#endif +# else +static inline int hardlockup_detector_perf_init(void) { return 0; } +# endif #endif void watchdog_nmi_reconfigure(bool run); -- cgit v1.2.3 From 2a1b8ee4f5665b4291e43e4a25d964c3eb2f4c32 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Sep 2017 21:37:20 +0200 Subject: watchdog/hardlockup/perf: Implement CPU enable replacement watchdog_nmi_enable() is an unparseable mess, Provide a clean perf specific implementation, which will be used when the existing setup/teardown mess is replaced. Signed-off-by: Thomas Gleixner Reviewed-by: Don Zickus Cc: Andrew Morton Cc: Borislav Petkov Cc: Chris Metcalf Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Ulrich Obergfell Link: http://lkml.kernel.org/r/20170912194148.180215498@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 72c62a809e92..89ba8b23c6fe 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -92,12 +92,14 @@ extern void arch_touch_nmi_watchdog(void); extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); extern void hardlockup_detector_perf_disable(void); +extern void hardlockup_detector_perf_enable(void); extern void hardlockup_detector_perf_cleanup(void); extern int hardlockup_detector_perf_init(void); #else static inline void hardlockup_detector_perf_stop(void) { } static inline void hardlockup_detector_perf_restart(void) { } static inline void hardlockup_detector_perf_disable(void) { } +static inline void hardlockup_detector_perf_enable(void) { } static inline void hardlockup_detector_perf_cleanup(void) { } # if !defined(CONFIG_HAVE_NMI_WATCHDOG) static inline int hardlockup_detector_perf_init(void) { return -ENODEV; } -- cgit v1.2.3 From 850fdec8d2fd1eebfa003fea39bec08cd69b6155 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 18 Sep 2017 12:17:57 +0200 Subject: driver core: remove DRIVER_ATTR DRIVER_ATTR is no longer in use, and driver authors should be using DRIVER_ATTR_RW() or DRIVER_ATTR_RO() or DRIVER_ATTR_WO() instead in order to always get the permissions correct. So remove it so that no one can use it anymore. Acked-by: Alan Tull Reviewed-by: Moritz Fischer Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index c6f27207dbe8..2bc70ddda09b 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -307,8 +307,6 @@ struct driver_attribute { size_t count); }; -#define DRIVER_ATTR(_name, _mode, _show, _store) \ - struct driver_attribute driver_attr_##_name = __ATTR(_name, _mode, _show, _store) #define DRIVER_ATTR_RW(_name) \ struct driver_attribute driver_attr_##_name = __ATTR_RW(_name) #define DRIVER_ATTR_RO(_name) \ -- cgit v1.2.3 From 7fc10de8d49a748c476532c9d8e8fe19e548dd67 Mon Sep 17 00:00:00 2001 From: Dragos Bogdan Date: Tue, 5 Sep 2017 15:14:45 +0300 Subject: iio: ad_sigma_delta: Implement a dedicated reset function Since most of the SD ADCs have the option of reseting the serial interface by sending a number of SCLKs with CS = 0 and DIN = 1, a dedicated function that can do this is usefull. Needed for the patch: iio: ad7793: Fix the serial interface reset Signed-off-by: Dragos Bogdan Acked-by: Lars-Peter Clausen Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/adc/ad_sigma_delta.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 5ba430cc9a87..1fc7abd28b0b 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -111,6 +111,9 @@ int ad_sd_write_reg(struct ad_sigma_delta *sigma_delta, unsigned int reg, int ad_sd_read_reg(struct ad_sigma_delta *sigma_delta, unsigned int reg, unsigned int size, unsigned int *val); +int ad_sd_reset(struct ad_sigma_delta *sigma_delta, + unsigned int reset_length); + int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, int *val); int ad_sd_calibrate_all(struct ad_sigma_delta *sigma_delta, -- cgit v1.2.3 From 99d3cd27f755d63fd6cf85169eaa873d90769aa5 Mon Sep 17 00:00:00 2001 From: Inbar Karmy Date: Thu, 24 Aug 2017 17:21:44 +0300 Subject: net/mlx5: Fix FPGA capability location Currently, FPGA capability is located in (mdev)->caps.hca_cur, change the location to be (mdev)->caps.fpga, since hca_cur is reserved for HCA device capabilities. Fixes: e29341fb3a5b ("net/mlx5: FPGA, Add basic support for Innova") Signed-off-by: Inbar Karmy Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 5 ++--- include/linux/mlx5/driver.h | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index eaf4ad209c8f..e32dbc4934db 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -980,7 +980,6 @@ enum mlx5_cap_type { MLX5_CAP_RESERVED, MLX5_CAP_VECTOR_CALC, MLX5_CAP_QOS, - MLX5_CAP_FPGA, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1110,10 +1109,10 @@ enum mlx5_mcam_feature_groups { MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_feature_cap_mask.enhanced_features.fld) #define MLX5_CAP_FPGA(mdev, cap) \ - MLX5_GET(fpga_cap, (mdev)->caps.hca_cur[MLX5_CAP_FPGA], cap) + MLX5_GET(fpga_cap, (mdev)->caps.fpga, cap) #define MLX5_CAP64_FPGA(mdev, cap) \ - MLX5_GET64(fpga_cap, (mdev)->caps.hca_cur[MLX5_CAP_FPGA], cap) + MLX5_GET64(fpga_cap, (mdev)->caps.fpga, cap) enum { MLX5_CMD_STAT_OK = 0x0, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 02ff700e4f30..401c8972cc3a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -774,6 +774,7 @@ struct mlx5_core_dev { u32 hca_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; u32 pcam[MLX5_ST_SZ_DW(pcam_reg)]; u32 mcam[MLX5_ST_SZ_DW(mcam_reg)]; + u32 fpga[MLX5_ST_SZ_DW(fpga_cap)]; } caps; phys_addr_t iseg_base; struct mlx5_init_seg __iomem *iseg; -- cgit v1.2.3 From 16f1c5bb3ed75b3cf3ced537db40f7e1a244debe Mon Sep 17 00:00:00 2001 From: Raed Salem Date: Sun, 30 Jul 2017 11:02:51 +0300 Subject: net/mlx5: Check device capability for maximum flow counters Added check for the maximal number of flow counters attached to rule (FTE). Fixes: bd5251dbf156b ('net/mlx5_core: Introduce flow steering destination of type counter') Signed-off-by: Raed Salem Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index a528b35a022e..69772347f866 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -327,7 +327,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reserved_at_80[0x18]; u8 log_max_destination[0x8]; - u8 reserved_at_a0[0x18]; + u8 log_max_flow_counter[0x8]; + u8 reserved_at_a8[0x10]; u8 log_max_flow[0x8]; u8 reserved_at_c0[0x40]; -- cgit v1.2.3 From e6b72ee88a56bcfe63f72e9c30766484c45bec72 Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Tue, 26 Sep 2017 18:35:45 +0200 Subject: netfilter: ebtables: fix race condition in frame_filter_net_init() It is possible for ebt_in_hook to be triggered before ebt_table is assigned resulting in a NULL-pointer dereference. Make sure hooks are registered as the last step. Fixes: aee12a0a3727 ("ebtables: remove nf_hook_register usage") Signed-off-by: Artem Savkov Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge/ebtables.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 2c2a5514b0df..528b24c78308 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -108,9 +108,10 @@ struct ebt_table { #define EBT_ALIGN(s) (((s) + (__alignof__(struct _xt_align)-1)) & \ ~(__alignof__(struct _xt_align)-1)) -extern struct ebt_table *ebt_register_table(struct net *net, - const struct ebt_table *table, - const struct nf_hook_ops *); +extern int ebt_register_table(struct net *net, + const struct ebt_table *table, + const struct nf_hook_ops *ops, + struct ebt_table **res); extern void ebt_unregister_table(struct net *net, struct ebt_table *table, const struct nf_hook_ops *); extern unsigned int ebt_do_table(struct sk_buff *skb, -- cgit v1.2.3 From fa87b91c94a8fd2c8502b6761be2d08a8e9bcf55 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 3 Oct 2017 16:14:24 -0700 Subject: include/linux/mm.h: fix typo in VM_MPX definition There's a typo in recent change of VM_MPX definition. We want it to be VM_HIGH_ARCH_4, not VM_HIGH_ARCH_BIT_4. This bug does cause visible regressions. In arch_vma_name the vmflags are tested against VM_MPX. With the incorrect value of VM_MPX, a number of vmas (such as the stack) test positive and end up being marked as "[mpx]" in /proc/N/maps instead of their correct names. This confuses tools like rr which expect to be able to find familiar vmas. Fixes: df3735c5b40f ("x86,mpx: make mpx depend on x86-64 to free up VMA flag") Link: http://lkml.kernel.org/r/20170918140253.36856-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Reviewed-by: Rik van Riel Cc: Dave Hansen Cc: Kyle Huey Cc: [4.14+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index f8c10d336e42..065d99deb847 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -240,7 +240,7 @@ extern unsigned int kobjsize(const void *objp); #if defined(CONFIG_X86_INTEL_MPX) /* MPX specific bounds table or bounds directory */ -# define VM_MPX VM_HIGH_ARCH_BIT_4 +# define VM_MPX VM_HIGH_ARCH_4 #else # define VM_MPX VM_NONE #endif -- cgit v1.2.3 From 4d4bbd8526a8fbeb2c090ea360211fceff952383 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 3 Oct 2017 16:14:50 -0700 Subject: mm, oom_reaper: skip mm structs with mmu notifiers Andrea has noticed that the oom_reaper doesn't invalidate the range via mmu notifiers (mmu_notifier_invalidate_range_start/end) and that can corrupt the memory of the kvm guest for example. tlb_flush_mmu_tlbonly already invokes mmu notifiers but that is not sufficient as per Andrea: "mmu_notifier_invalidate_range cannot be used in replacement of mmu_notifier_invalidate_range_start/end. For KVM mmu_notifier_invalidate_range is a noop and rightfully so. A MMU notifier implementation has to implement either ->invalidate_range method or the invalidate_range_start/end methods, not both. And if you implement invalidate_range_start/end like KVM is forced to do, calling mmu_notifier_invalidate_range in common code is a noop for KVM. For those MMU notifiers that can get away only implementing ->invalidate_range, the ->invalidate_range is implicitly called by mmu_notifier_invalidate_range_end(). And only those secondary MMUs that share the same pagetable with the primary MMU (like AMD iommuv2) can get away only implementing ->invalidate_range" As the callback is allowed to sleep and the implementation is out of hand of the MM it is safer to simply bail out if there is an mmu notifier registered. In order to not fail too early make the mm_has_notifiers check under the oom_lock and have a little nap before failing to give the current oom victim some more time to exit. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/20170913113427.2291-1-mhocko@kernel.org Fixes: aac453635549 ("mm, oom: introduce oom reaper") Signed-off-by: Michal Hocko Reported-by: Andrea Arcangeli Reviewed-by: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmu_notifier.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 7b2e31b1745a..6866e8126982 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -400,6 +400,11 @@ extern void mmu_notifier_synchronize(void); #else /* CONFIG_MMU_NOTIFIER */ +static inline int mm_has_notifiers(struct mm_struct *mm) +{ + return 0; +} + static inline void mmu_notifier_release(struct mm_struct *mm) { } -- cgit v1.2.3 From a1b2289cef92ef0e9a92afcd2e1ea71d5bcaaf64 Mon Sep 17 00:00:00 2001 From: Sherry Yang Date: Tue, 3 Oct 2017 16:15:00 -0700 Subject: android: binder: drop lru lock in isolate callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop the global lru lock in isolate callback before calling zap_page_range which calls cond_resched, and re-acquire the global lru lock before returning. Also change return code to LRU_REMOVED_RETRY. Use mmput_async when fail to acquire mmap sem in an atomic context. Fix "BUG: sleeping function called from invalid context" errors when CONFIG_DEBUG_ATOMIC_SLEEP is enabled. Also restore mmput_async, which was initially introduced in commit ec8d7c14ea14 ("mm, oom_reaper: do not mmput synchronously from the oom reaper context"), and was removed in commit 212925802454 ("mm: oom: let oom_reap_task and exit_mmap run concurrently"). Link: http://lkml.kernel.org/r/20170914182231.90908-1-sherryy@android.com Fixes: f2517eb76f1f2 ("android: binder: Add global lru shrinker to binder") Signed-off-by: Sherry Yang Signed-off-by: Greg Kroah-Hartman Reported-by: Kyle Yan Acked-by: Arve Hjønnevåg Acked-by: Michal Hocko Cc: Martijn Coenen Cc: Todd Kjos Cc: Riley Andrews Cc: Ingo Molnar Cc: Vlastimil Babka Cc: Hillf Danton Cc: Peter Zijlstra Cc: Andrea Arcangeli Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Oleg Nesterov Cc: Hoeun Ryu Cc: Christopher Lameter Cc: Vegard Nossum Cc: Frederic Weisbecker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched/mm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 3a19c253bdb1..ae53e413fb13 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -84,6 +84,12 @@ static inline bool mmget_not_zero(struct mm_struct *mm) /* mmput gets rid of the mappings and all user-space */ extern void mmput(struct mm_struct *); +#ifdef CONFIG_MMU +/* same as above but performs the slow path from the async context. Can + * be called from the atomic context as well + */ +void mmput_async(struct mm_struct *); +#endif /* Grab a reference to a task's mm, if it is not already going away */ extern struct mm_struct *get_task_mm(struct task_struct *task); -- cgit v1.2.3 From c2315c187fa0d3ab363fdebe22718170b40473e3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 3 Oct 2017 16:15:42 -0700 Subject: exec: load_script: kill the onstack interp[BINPRM_BUF_SIZE] array Patch series "exec: binfmt_misc: fix use-after-free, kill iname[BINPRM_BUF_SIZE]". It looks like this code was always wrong, then commit 948b701a607f ("binfmt_misc: add persistent opened binary handler for containers") added more problems. This patch (of 6): load_script() can simply use i_name instead, it points into bprm->buf[] and nobody can change this memory until we call prepare_binprm(). The only complication is that we need to also change the signature of bprm_change_interp() but this change looks good too. While at it, do whitespace/style cleanups. NOTE: the real motivation for this change is that people want to increase BINPRM_BUF_SIZE, we need to change load_misc_binary() too but this looks more complicated because afaics it is very buggy. Link: http://lkml.kernel.org/r/20170918163446.GA26793@redhat.com Signed-off-by: Oleg Nesterov Acked-by: Kees Cook Cc: Travis Gummels Cc: Ben Woodard Cc: Jim Foraker Cc: Cc: Al Viro Cc: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/binfmts.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index fb44d6180ca0..18d05b5491f3 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -131,7 +131,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm, int executable_stack); extern int transfer_args_to_stack(struct linux_binprm *bprm, unsigned long *sp_location); -extern int bprm_change_interp(char *interp, struct linux_binprm *bprm); +extern int bprm_change_interp(const char *interp, struct linux_binprm *bprm); extern int copy_strings_kernel(int argc, const char *const *argv, struct linux_binprm *bprm); extern int prepare_bprm_creds(struct linux_binprm *bprm); -- cgit v1.2.3 From 7240767450d6d8380fb153e2998a1bb4ede7b029 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 3 Oct 2017 16:16:04 -0700 Subject: include/linux/bitfield.h: remove 32bit from FIELD_GET comment block I do not see anything that restricts this macro to 32 bit width. Link: http://lkml.kernel.org/r/1505921975-23379-1-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Acked-by: Jakub Kicinski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitfield.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 8b9d6fff002d..f2deb71958b2 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -92,7 +92,7 @@ /** * FIELD_GET() - extract a bitfield element * @_mask: shifted mask defining the field's length and position - * @_reg: 32bit value of entire bitfield + * @_reg: value of entire bitfield * * FIELD_GET() extracts the field specified by @_mask from the * bitfield passed in as @_reg by masking and shifting it down. -- cgit v1.2.3 From 1dd2bfc86818ddbc95f98e312e7704350223fd7d Mon Sep 17 00:00:00 2001 From: YASUAKI ISHIMATSU Date: Tue, 3 Oct 2017 16:16:29 -0700 Subject: mm/memory_hotplug: change pfn_to_section_nr/section_nr_to_pfn macro to inline function pfn_to_section_nr() and section_nr_to_pfn() are defined as macro. pfn_to_section_nr() has no issue even if it is defined as macro. But section_nr_to_pfn() has overflow issue if sec is defined as int. section_nr_to_pfn() just shifts sec by PFN_SECTION_SHIFT. If sec is defined as unsigned long, section_nr_to_pfn() returns pfn as 64 bit value. But if sec is defined as int, section_nr_to_pfn() returns pfn as 32 bit value. __remove_section() calculates start_pfn using section_nr_to_pfn() and scn_nr defined as int. So if hot-removed memory address is over 16TB, overflow issue occurs and section_nr_to_pfn() does not calculate correct pfn. To make callers use proper arg, the patch changes the macros to inline functions. Fixes: 815121d2b5cd ("memory_hotplug: clear zone when removing the memory") Link: http://lkml.kernel.org/r/e643a387-e573-6bbf-d418-c60c8ee3d15e@gmail.com Signed-off-by: Yasuaki Ishimatsu Acked-by: Michal Hocko Cc: Xishi Qiu Cc: Reza Arbab Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 356a814e7c8e..c8f89417740b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1094,8 +1094,14 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn) #error Allocator MAX_ORDER exceeds SECTION_SIZE #endif -#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT) -#define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT) +static inline unsigned long pfn_to_section_nr(unsigned long pfn) +{ + return pfn >> PFN_SECTION_SHIFT; +} +static inline unsigned long section_nr_to_pfn(unsigned long sec) +{ + return sec << PFN_SECTION_SHIFT; +} #define SECTION_ALIGN_UP(pfn) (((pfn) + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK) #define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK) -- cgit v1.2.3 From 32e57c29e3c038ac802b7cc214a8795a4234055f Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 3 Oct 2017 16:16:54 -0700 Subject: include/linux/fs.h: fix comment about struct address_space Before commit 9c5d760b8d22 ("mm: split gfp_mask and mapping flags into separate fields") the private_* fields of struct adrress_space were grouped together and using "ditto" in comments describing the last fields was correct. With introduction of gpf_mask between private_lock and private_list "ditto" references the wrong description. Fix it by using the elaborate description. Link: http://lkml.kernel.org/r/1507009987-8746-1-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 339e73742e73..13dab191a23e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -403,7 +403,7 @@ struct address_space { unsigned long flags; /* error bits */ spinlock_t private_lock; /* for use by the address_space */ gfp_t gfp_mask; /* implicit gfp mask for allocations */ - struct list_head private_list; /* ditto */ + struct list_head private_list; /* for use by the address_space */ void *private_data; /* ditto */ errseq_t wb_err; } __attribute__((aligned(sizeof(long)))) __randomize_layout; -- cgit v1.2.3 From de3ee99b097dd51938276e3af388cd4ad0f2750a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 20 Sep 2017 10:56:14 +0200 Subject: mmc: Delete bounce buffer handling In may, Steven sent a patch deleting the bounce buffer handling and the CONFIG_MMC_BLOCK_BOUNCE option. I chose the less invasive path of making it a runtime config option, and we merged that successfully for kernel v4.12. The code is however just standing in the way and taking up space for seemingly no gain on any systems in wide use today. Pierre says the code was there to improve speed on TI SDHCI controllers on certain HP laptops and possibly some Ricoh controllers as well. Early SDHCI controllers lacked the scatter-gather feature, which made software bounce buffers a significant speed boost. We are clearly talking about the list of SDHCI PCI-based MMC/SD card readers found in the pci_ids[] list in drivers/mmc/host/sdhci-pci-core.c. The TI SDHCI derivative is not supported by the upstream kernel. This leaves the Ricoh. What we can however notice is that the x86 defconfigs in the kernel did not enable CONFIG_MMC_BLOCK_BOUNCE option, which means that any such laptop would have to have a custom configured kernel to actually take advantage of this bounce buffer speed-up. It simply seems like there was a speed optimization for the Ricoh controllers that noone was using. (I have not checked the distro defconfigs but I am pretty sure the situation is the same there.) Bounce buffers increased performance on the OMAP HSMMC at one point, and was part of the original submission in commit a45c6cb81647 ("[ARM] 5369/1: omap mmc: Add new omap hsmmc controller for 2430 and 34xx, v3") This optimization was removed in commit 0ccd76d4c236 ("omap_hsmmc: Implement scatter-gather emulation") which found that scatter-gather emulation provided even better performance. The same was introduced for SDHCI in commit 2134a922c6e7 ("sdhci: scatter-gather (ADMA) support") I am pretty positively convinced that software scatter-gather emulation will do for any host controller what the bounce buffers were doing. Essentially, the bounce buffer was a reimplementation of software scatter-gather-emulation in the MMC subsystem, and it should be done away with. Cc: Pierre Ossman Cc: Juha Yrjola Cc: Steven J. Hill Cc: Shawn Lin Cc: Adrian Hunter Suggested-by: Steven J. Hill Suggested-by: Shawn Lin Signed-off-by: Linus Walleij Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index f3f2d07feb2a..9a43763a68ad 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -316,7 +316,7 @@ struct mmc_host { #define MMC_CAP_UHS_SDR50 (1 << 18) /* Host supports UHS SDR50 mode */ #define MMC_CAP_UHS_SDR104 (1 << 19) /* Host supports UHS SDR104 mode */ #define MMC_CAP_UHS_DDR50 (1 << 20) /* Host supports UHS DDR50 mode */ -#define MMC_CAP_NO_BOUNCE_BUFF (1 << 21) /* Disable bounce buffers on host */ +/* (1 << 21) is free for reuse */ #define MMC_CAP_DRIVER_TYPE_A (1 << 23) /* Host supports Driver Type A */ #define MMC_CAP_DRIVER_TYPE_C (1 << 24) /* Host supports Driver Type C */ #define MMC_CAP_DRIVER_TYPE_D (1 << 25) /* Host supports Driver Type D */ -- cgit v1.2.3 From 6b9dc4806b28214a4a260517e59439e0ac12a15e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 2 Oct 2017 12:34:50 +0200 Subject: watchdog/core, powerpc: Replace watchdog_nmi_reconfigure() The recent cleanup of the watchdog code split watchdog_nmi_reconfigure() into two stages. One to stop the NMI and one to restart it after reconfiguration. That was done by adding a boolean 'run' argument to the code, which is functionally correct but not necessarily a piece of art. Replace it by two explicit functions: watchdog_nmi_stop() and watchdog_nmi_start(). Fixes: 6592ad2fcc8f ("watchdog/core, powerpc: Make watchdog_nmi_reconfigure() two stage") Requested-by: Linus 'Nursing his pet-peeve' Torvalds Signed-off-by: Thomas 'Mopping up garbage' Gleixner Acked-by: Michael Ellerman Cc: Peter Zijlstra Cc: Don Zickus Cc: Benjamin Herrenschmidt Cc: Nicholas Piggin Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1710021957480.2114@nanos --- include/linux/nmi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 89ba8b23c6fe..0c9ed49fb21a 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -109,7 +109,8 @@ static inline int hardlockup_detector_perf_init(void) { return 0; } # endif #endif -void watchdog_nmi_reconfigure(bool run); +void watchdog_nmi_stop(void); +void watchdog_nmi_start(void); /** * touch_nmi_watchdog - restart NMI watchdog timeout. -- cgit v1.2.3 From 34ddaa3e5c0096fef52485186c7eb6cf56ddc686 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 3 Oct 2017 16:39:02 +0200 Subject: powerpc/watchdog: Make use of watchdog_nmi_probe() The rework of the core hotplug code triggers the WARN_ON in start_wd_cpu() on powerpc because it is called multiple times for the boot CPU. The first call is via: start_wd_on_cpu+0x80/0x2f0 watchdog_nmi_reconfigure+0x124/0x170 softlockup_reconfigure_threads+0x110/0x130 lockup_detector_init+0xbc/0xe0 kernel_init_freeable+0x18c/0x37c kernel_init+0x2c/0x160 ret_from_kernel_thread+0x5c/0xbc And then again via the CPU hotplug registration: start_wd_on_cpu+0x80/0x2f0 cpuhp_invoke_callback+0x194/0x620 cpuhp_thread_fun+0x7c/0x1b0 smpboot_thread_fn+0x290/0x2a0 kthread+0x168/0x1b0 ret_from_kernel_thread+0x5c/0xbc This can be avoided by setting up the cpu hotplug state with nocalls and move the initialization to the watchdog_nmi_probe() function. That initializes the hotplug callbacks without invoking the callback and the following core initialization function then configures the watchdog for the online CPUs (in this case CPU0) via softlockup_reconfigure_threads(). Reported-and-tested-by: Michael Ellerman Signed-off-by: Thomas Gleixner Acked-by: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Nicholas Piggin Cc: linuxppc-dev@lists.ozlabs.org --- include/linux/nmi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 0c9ed49fb21a..27e249ed7c5c 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -111,6 +111,7 @@ static inline int hardlockup_detector_perf_init(void) { return 0; } void watchdog_nmi_stop(void); void watchdog_nmi_start(void); +int watchdog_nmi_probe(void); /** * touch_nmi_watchdog - restart NMI watchdog timeout. -- cgit v1.2.3 From 192b2d78722ffea188e5ec6ae5d55010dce05a4b Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 29 Sep 2017 21:09:36 -0700 Subject: Drivers: hv: vmbus: Fix bugs in rescind handling This patch addresses the following bugs in the current rescind handling code: 1. Fixes a race condition where we may be invoking hv_process_channel_removal() on an already freed channel. 2. Prevents indefinite wait when rescinding sub-channels by correctly setting the probe_complete state. I would like to thank Dexuan for patiently reviewing earlier versions of this patch and identifying many of the issues fixed here. Greg, please apply this to 4.14-final. Fixes: '54a66265d675 ("Drivers: hv: vmbus: Fix rescind handling")' Signed-off-by: K. Y. Srinivasan Reviewed-by: Dexuan Cui Cc: stable@vger.kernel.org # (4.13 and above) Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index c458d7b7ad19..6431087816ba 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1403,7 +1403,7 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, const int *srv_version, int srv_vercnt, int *nego_fw_version, int *nego_srv_version); -void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); +void hv_process_channel_removal(u32 relid); void vmbus_setevent(struct vmbus_channel *channel); /* -- cgit v1.2.3 From 98589a0998b8b13c4a8fa1ccb0e62751a019faa5 Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Mon, 9 Oct 2017 15:27:15 +0300 Subject: netfilter: xt_bpf: Fix XT_BPF_MODE_FD_PINNED mode of 'xt_bpf_info_v1' Commit 2c16d6033264 ("netfilter: xt_bpf: support ebpf") introduced support for attaching an eBPF object by an fd, with the 'bpf_mt_check_v1' ABI expecting the '.fd' to be specified upon each IPT_SO_SET_REPLACE call. However this breaks subsequent iptables calls: # iptables -A INPUT -m bpf --object-pinned /sys/fs/bpf/xxx -j ACCEPT # iptables -A INPUT -s 5.6.7.8 -j ACCEPT iptables: Invalid argument. Run `dmesg' for more information. That's because iptables works by loading existing rules using IPT_SO_GET_ENTRIES to userspace, then issuing IPT_SO_SET_REPLACE with the replacement set. However, the loaded 'xt_bpf_info_v1' has an arbitrary '.fd' number (from the initial "iptables -m bpf" invocation) - so when 2nd invocation occurs, userspace passes a bogus fd number, which leads to 'bpf_mt_check_v1' to fail. One suggested solution [1] was to hack iptables userspace, to perform a "entries fixup" immediatley after IPT_SO_GET_ENTRIES, by opening a new, process-local fd per every 'xt_bpf_info_v1' entry seen. However, in [2] both Pablo Neira Ayuso and Willem de Bruijn suggested to depricate the xt_bpf_info_v1 ABI dealing with pinned ebpf objects. This fix changes the XT_BPF_MODE_FD_PINNED behavior to ignore the given '.fd' and instead perform an in-kernel lookup for the bpf object given the provided '.path'. It also defines an alias for the XT_BPF_MODE_FD_PINNED mode, named XT_BPF_MODE_PATH_PINNED, to better reflect the fact that the user is expected to provide the path of the pinned object. Existing XT_BPF_MODE_FD_ELF behavior (non-pinned fd mode) is preserved. References: [1] https://marc.info/?l=netfilter-devel&m=150564724607440&w=2 [2] https://marc.info/?l=netfilter-devel&m=150575727129880&w=2 Reported-by: Rafael Buchbinder Signed-off-by: Shmulik Ladkani Acked-by: Willem de Bruijn Acked-by: Daniel Borkmann Signed-off-by: Pablo Neira Ayuso --- include/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8390859e79e7..f1af7d63d678 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -368,6 +368,11 @@ static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages) { } +static inline int bpf_obj_get_user(const char __user *pathname) +{ + return -EOPNOTSUPP; +} + static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key) { -- cgit v1.2.3 From d153b153446f7d8832bb2ebd92309c8a6003b3bb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 Sep 2017 11:35:30 +0200 Subject: sched/core: Fix wake_affine() performance regression Eric reported a sysbench regression against commit: 3fed382b46ba ("sched/numa: Implement NUMA node level wake_affine()") Similarly, Rik was looking at the NAS-lu.C benchmark, which regressed against his v3.10 enterprise kernel. PRE (current tip/master): ivb-ep sysbench: 2: [30 secs] transactions: 64110 (2136.94 per sec.) 5: [30 secs] transactions: 143644 (4787.99 per sec.) 10: [30 secs] transactions: 274298 (9142.93 per sec.) 20: [30 secs] transactions: 418683 (13955.45 per sec.) 40: [30 secs] transactions: 320731 (10690.15 per sec.) 80: [30 secs] transactions: 355096 (11834.28 per sec.) hsw-ex NAS: OMP_PROC_BIND/lu.C.x_threads_144_run_1.log: Time in seconds = 18.01 OMP_PROC_BIND/lu.C.x_threads_144_run_2.log: Time in seconds = 17.89 OMP_PROC_BIND/lu.C.x_threads_144_run_3.log: Time in seconds = 17.93 lu.C.x_threads_144_run_1.log: Time in seconds = 434.68 lu.C.x_threads_144_run_2.log: Time in seconds = 405.36 lu.C.x_threads_144_run_3.log: Time in seconds = 433.83 POST (+patch): ivb-ep sysbench: 2: [30 secs] transactions: 64494 (2149.75 per sec.) 5: [30 secs] transactions: 145114 (4836.99 per sec.) 10: [30 secs] transactions: 278311 (9276.69 per sec.) 20: [30 secs] transactions: 437169 (14571.60 per sec.) 40: [30 secs] transactions: 669837 (22326.73 per sec.) 80: [30 secs] transactions: 631739 (21055.88 per sec.) hsw-ex NAS: lu.C.x_threads_144_run_1.log: Time in seconds = 23.36 lu.C.x_threads_144_run_2.log: Time in seconds = 22.96 lu.C.x_threads_144_run_3.log: Time in seconds = 22.52 This patch takes out all the shiny wake_affine() stuff and goes back to utter basics. Between the two CPUs involved with the wakeup (the CPU doing the wakeup and the CPU we ran on previously) pick the CPU we can run on _now_. This restores much of the regressions against the older kernels, but leaves some ground in the overloaded case. The default-enabled WA_WEIGHT (which will be introduced in the next patch) is an attempt to address the overloaded situation. Reported-by: Eric Farman Signed-off-by: Peter Zijlstra (Intel) Cc: Christian Borntraeger Cc: Linus Torvalds Cc: Matthew Rosato Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: jinpuwang@gmail.com Cc: vcaputo@pengaru.com Fixes: 3fed382b46ba ("sched/numa: Implement NUMA node level wake_affine()") Signed-off-by: Ingo Molnar --- include/linux/sched/topology.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index d7b6dab956ec..7d065abc7a47 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -71,14 +71,6 @@ struct sched_domain_shared { atomic_t ref; atomic_t nr_busy_cpus; int has_idle_cores; - - /* - * Some variables from the most recent sd_lb_stats for this domain, - * used by wake_affine(). - */ - unsigned long nr_running; - unsigned long load; - unsigned long capacity; }; struct sched_domain { -- cgit v1.2.3 From 2bbbd96357ce76cc45ec722c00f654aa7b189112 Mon Sep 17 00:00:00 2001 From: Jan Luebbe Date: Mon, 28 Aug 2017 17:25:16 +0200 Subject: bus: mbus: fix window size calculation for 4GB windows At least the Armada XP SoC supports 4GB on a single DRAM window. Because the size register values contain the actual size - 1, the MSB is set in that case. For example, the SDRAM window's control register's value is 0xffffffe1 for 4GB (bits 31 to 24 contain the size). The MBUS driver reads back each window's size from registers and calculates the actual size as (control_reg | ~DDR_SIZE_MASK) + 1, which overflows for 32 bit values, resulting in other miscalculations further on (a bad RAM window for the CESA crypto engine calculated by mvebu_mbus_setup_cpu_target_nooverlap() in my case). This patch changes the type in 'struct mbus_dram_window' from u32 to u64, which allows us to keep using the same register calculation code in most MBUS-using drivers (which calculate ->size - 1 again). Fixes: fddddb52a6c4 ("bus: introduce an Marvell EBU MBus driver") CC: stable@vger.kernel.org Signed-off-by: Jan Luebbe Signed-off-by: Gregory CLEMENT --- include/linux/mbus.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mbus.h b/include/linux/mbus.h index 0d3f14fd2621..4773145246ed 100644 --- a/include/linux/mbus.h +++ b/include/linux/mbus.h @@ -31,8 +31,8 @@ struct mbus_dram_target_info struct mbus_dram_window { u8 cs_index; u8 mbus_attr; - u32 base; - u32 size; + u64 base; + u64 size; } cs[4]; }; -- cgit v1.2.3 From 30ae9610d275f8f03f5bf7612ce71d8af6fc400b Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Mon, 9 Oct 2017 11:46:55 -0500 Subject: irqchip/gic-v3-its: Add missing changes to support 52bit physical address The current ITS driver works fine as long as normal memory and GICR regions are located within the lower 48bit (>=0 && <2^48) physical address space. Some of the registers GICR_PEND/PROP, GICR_VPEND/VPROP and GITS_CBASER are handled properly but not all when configuring the hardware with 52bit physical address. This patch does the following changes to support 52bit PA. -Handle 52bit PA in GITS_BASERn. -Fix ITT_addr width to 52bits, bits[51:8]. -Fix RDbase width to 52bits, bits[51:16]. -Fix VPT_addr width to 52bits, bits[51:16]. Definition of the GITS_BASERn register when ITS PageSize is 64KB: -Bits[47:16] of the register provide bits[47:16] of the table PA. -Bits[15:12] of the register provide bits[51:48] of the table PA. -Bits[15:00] of the base physical address are 0. Signed-off-by: Shanker Donthineni Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 1ea576c8126f..14b74f22d43c 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -372,6 +372,8 @@ #define GITS_BASER_ENTRY_SIZE_SHIFT (48) #define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1) #define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48) +#define GITS_BASER_PHYS_52_to_48(phys) \ + (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12) #define GITS_BASER_SHAREABILITY_SHIFT (10) #define GITS_BASER_InnerShareable \ GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) -- cgit v1.2.3 From 20608924cc2e6bdeaf6f58ccbe9ddfe12dbfa082 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 4 Oct 2017 14:26:26 +0200 Subject: genirq: generic chip: Add irq_gc_mask_disable_and_ack_set() The irq_gc_mask_disable_reg_and_ack() function name implies that it provides the combined functions of irq_gc_mask_disable_reg() and irq_gc_ack(). However, the implementation does not actually do that since it writes the mask instead of the disable register. It also does not maintain the mask cache which makes it inappropriate to use with other masking functions. In addition, commit 659fb32d1b67 ("genirq: replace irq_gc_ack() with {set,clr}_bit variants (fwd)") effectively renamed irq_gc_ack() to irq_gc_ack_set_bit() so this function probably should have also been renamed at that time. The generic chip code currently provides three functions for use with the irq_mask member of the irq_chip structure and two functions for use with the irq_ack member of the irq_chip structure. These functions could be combined into six functions for use with the irq_mask_ack member of the irq_chip structure. However, since only one of the combinations is currently used, only the function irq_gc_mask_disable_and_ack_set() is added by this commit. The '_reg' and '_bit' portions of the base function name were left out of the new combined function name in an attempt to keep the function name length manageable with the 80 character source code line length while still allowing the distinct aspects of each combination to be captured by the name. If other combinations are desired in the future please add them to the irq generic chip library at that time. Signed-off-by: Doug Berger Signed-off-by: Marc Zyngier --- include/linux/irq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index d4728bf6a537..494d328f7051 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1010,6 +1010,7 @@ void irq_gc_unmask_enable_reg(struct irq_data *d); void irq_gc_ack_set_bit(struct irq_data *d); void irq_gc_ack_clr_bit(struct irq_data *d); void irq_gc_mask_disable_reg_and_ack(struct irq_data *d); +void irq_gc_mask_disable_and_ack_set(struct irq_data *d); void irq_gc_eoi(struct irq_data *d); int irq_gc_set_wake(struct irq_data *d, unsigned int on); -- cgit v1.2.3 From 0d08af35f16a0cc418ad2afde3bc5f70ace82705 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Wed, 4 Oct 2017 14:28:17 +0200 Subject: genirq: generic chip: remove irq_gc_mask_disable_reg_and_ack() Any usage of the irq_gc_mask_disable_reg_and_ack() function has been replaced with the desired functionality. The incorrect and ambiguously named function is removed here to prevent accidental misuse. Signed-off-by: Doug Berger Signed-off-by: Marc Zyngier --- include/linux/irq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 494d328f7051..5ad10948ea95 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1009,7 +1009,6 @@ void irq_gc_mask_clr_bit(struct irq_data *d); void irq_gc_unmask_enable_reg(struct irq_data *d); void irq_gc_ack_set_bit(struct irq_data *d); void irq_gc_ack_clr_bit(struct irq_data *d); -void irq_gc_mask_disable_reg_and_ack(struct irq_data *d); void irq_gc_mask_disable_and_ack_set(struct irq_data *d); void irq_gc_eoi(struct irq_data *d); int irq_gc_set_wake(struct irq_data *d, unsigned int on); -- cgit v1.2.3 From 8a1ac5dc7be09883051b1bf89a5e57d7ad850fa5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 13 Oct 2017 15:57:40 -0700 Subject: include/linux/of.h: provide of_n_{addr,size}_cells wrappers for !CONFIG_OF The pci-rcar driver is enabled for compile tests, and this has shown that the driver cannot build without CONFIG_OF, following the inclusion of commit f8f2fe7355fb ("PCI: rcar: Use new OF interrupt mapping when possible"): drivers/pci/host/pcie-rcar.c: In function 'pci_dma_range_parser_init': drivers/pci/host/pcie-rcar.c:1039:2: error: implicit declaration of function 'of_n_addr_cells' [-Werror=implicit-function-declaration] parser->pna = of_n_addr_cells(node); ^ As pointed out by Ben Dooks and Geert Uytterhoeven, this is actually supposed to build fine, which we can achieve if we make the declaration of of_irq_parse_and_map_pci conditional on CONFIG_OF and provide an empty inline function otherwise, as we do for a lot of other of interfaces. This lets us build the rcar_pci driver again without CONFIG_OF for build testing. All platforms using this driver select OF, so this doesn't change anything for the users. [akpm@linux-foundation.org: be consistent with surrounding code] Link: http://lkml.kernel.org/r/20170911200805.3363318-1-arnd@arndb.de Fixes: c25da4778803 ("PCI: rcar: Add Renesas R-Car PCIe driver") Signed-off-by: Arnd Bergmann Reviewed-by: Frank Rowand Acked-by: Geert Uytterhoeven Cc: Bjorn Helgaas Cc: Magnus Damm Cc: Ben Dooks Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/of.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index cfc34117fc92..b240ed69dc96 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -734,6 +734,16 @@ static inline struct device_node *of_get_cpu_node(int cpu, return NULL; } +static inline int of_n_addr_cells(struct device_node *np) +{ + return 0; + +} +static inline int of_n_size_cells(struct device_node *np) +{ + return 0; +} + static inline int of_property_read_u64(const struct device_node *np, const char *propname, u64 *out_value) { -- cgit v1.2.3 From e8c97af0c1f23d6ffedcaa3918861f2595e1db62 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 13 Oct 2017 15:58:11 -0700 Subject: linux/kernel.h: add/correct kernel-doc notation Add kernel-doc notation for some macros. Correct kernel-doc comments & typos for a few macros. Link: http://lkml.kernel.org/r/76fa1403-1511-be4c-e9c4-456b43edfad3@infradead.org Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 90 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 74 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 0ad4c3044cf9..91189bb0c818 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -44,6 +44,12 @@ #define STACK_MAGIC 0xdeadbeef +/** + * REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value + * @x: value to repeat + * + * NOTE: @x is not checked for > 0xff; larger values produce odd results. + */ #define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) /* @a is a power of 2 value */ @@ -57,6 +63,10 @@ #define READ 0 #define WRITE 1 +/** + * ARRAY_SIZE - get the number of elements in array @arr + * @arr: array to be sized + */ #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) #define u64_to_user_ptr(x) ( \ @@ -76,7 +86,15 @@ #define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) #define round_down(x, y) ((x) & ~__round_mask(x, y)) +/** + * FIELD_SIZEOF - get the size of a struct's field + * @t: the target struct + * @f: the target struct's field + * Return: the size of @f in the struct definition without having a + * declared instance of @t. + */ #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) + #define DIV_ROUND_UP __KERNEL_DIV_ROUND_UP #define DIV_ROUND_DOWN_ULL(ll, d) \ @@ -107,7 +125,7 @@ /* * Divide positive or negative dividend by positive or negative divisor * and round to closest integer. Result is undefined for negative - * divisors if he dividend variable type is unsigned and for negative + * divisors if the dividend variable type is unsigned and for negative * dividends if the divisor variable type is unsigned. */ #define DIV_ROUND_CLOSEST(x, divisor)( \ @@ -247,13 +265,13 @@ extern int _cond_resched(void); * @ep_ro: right open interval endpoint * * Perform a "reciprocal multiplication" in order to "scale" a value into - * range [0, ep_ro), where the upper interval endpoint is right-open. + * range [0, @ep_ro), where the upper interval endpoint is right-open. * This is useful, e.g. for accessing a index of an array containing - * ep_ro elements, for example. Think of it as sort of modulus, only that + * @ep_ro elements, for example. Think of it as sort of modulus, only that * the result isn't that of modulo. ;) Note that if initial input is a * small value, then result will return 0. * - * Return: a result based on val in interval [0, ep_ro). + * Return: a result based on @val in interval [0, @ep_ro). */ static inline u32 reciprocal_scale(u32 val, u32 ep_ro) { @@ -618,8 +636,8 @@ do { \ * trace_printk - printf formatting in the ftrace buffer * @fmt: the printf format for printing * - * Note: __trace_printk is an internal function for trace_printk and - * the @ip is passed in via the trace_printk macro. + * Note: __trace_printk is an internal function for trace_printk() and + * the @ip is passed in via the trace_printk() macro. * * This function allows a kernel developer to debug fast path sections * that printk is not appropriate for. By scattering in various @@ -629,7 +647,7 @@ do { \ * This is intended as a debugging tool for the developer only. * Please refrain from leaving trace_printks scattered around in * your code. (Extra memory is used for special buffers that are - * allocated when trace_printk() is used) + * allocated when trace_printk() is used.) * * A little optization trick is done here. If there's only one * argument, there's no need to scan the string for printf formats. @@ -681,7 +699,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...); * the @ip is passed in via the trace_puts macro. * * This is similar to trace_printk() but is made for those really fast - * paths that a developer wants the least amount of "Heisenbug" affects, + * paths that a developer wants the least amount of "Heisenbug" effects, * where the processing of the print format is still too much. * * This function allows a kernel developer to debug fast path sections @@ -692,7 +710,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...); * This is intended as a debugging tool for the developer only. * Please refrain from leaving trace_puts scattered around in * your code. (Extra memory is used for special buffers that are - * allocated when trace_puts() is used) + * allocated when trace_puts() is used.) * * Returns: 0 if nothing was written, positive # if string was. * (1 when __trace_bputs is used, strlen(str) when __trace_puts is used) @@ -771,6 +789,12 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } t2 min2 = (y); \ (void) (&min1 == &min2); \ min1 < min2 ? min1 : min2; }) + +/** + * min - return minimum of two values of the same or compatible types + * @x: first value + * @y: second value + */ #define min(x, y) \ __min(typeof(x), typeof(y), \ __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ @@ -781,12 +805,31 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } t2 max2 = (y); \ (void) (&max1 == &max2); \ max1 > max2 ? max1 : max2; }) + +/** + * max - return maximum of two values of the same or compatible types + * @x: first value + * @y: second value + */ #define max(x, y) \ __max(typeof(x), typeof(y), \ __UNIQUE_ID(max1_), __UNIQUE_ID(max2_), \ x, y) +/** + * min3 - return minimum of three values + * @x: first value + * @y: second value + * @z: third value + */ #define min3(x, y, z) min((typeof(x))min(x, y), z) + +/** + * max3 - return maximum of three values + * @x: first value + * @y: second value + * @z: third value + */ #define max3(x, y, z) max((typeof(x))max(x, y), z) /** @@ -805,8 +848,8 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * @lo: lowest allowable value * @hi: highest allowable value * - * This macro does strict typechecking of lo/hi to make sure they are of the - * same type as val. See the unnecessary pointer comparisons. + * This macro does strict typechecking of @lo/@hi to make sure they are of the + * same type as @val. See the unnecessary pointer comparisons. */ #define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) @@ -816,11 +859,24 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * * Or not use min/max/clamp at all, of course. */ + +/** + * min_t - return minimum of two values, using the specified type + * @type: data type to use + * @x: first value + * @y: second value + */ #define min_t(type, x, y) \ __min(type, type, \ __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ x, y) +/** + * max_t - return maximum of two values, using the specified type + * @type: data type to use + * @x: first value + * @y: second value + */ #define max_t(type, x, y) \ __max(type, type, \ __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ @@ -834,7 +890,7 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * @hi: maximum allowable value * * This macro does no typechecking and uses temporary variables of type - * 'type' to make all the comparisons. + * @type to make all the comparisons. */ #define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi) @@ -845,15 +901,17 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } * @hi: maximum allowable value * * This macro does no typechecking and uses temporary variables of whatever - * type the input argument 'val' is. This is useful when val is an unsigned - * type and min and max are literals that will otherwise be assigned a signed + * type the input argument @val is. This is useful when @val is an unsigned + * type and @lo and @hi are literals that will otherwise be assigned a signed * integer type. */ #define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi) -/* - * swap - swap value of @a and @b +/** + * swap - swap values of @a and @b + * @a: first value + * @b: second value */ #define swap(a, b) \ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) -- cgit v1.2.3 From f892760aa66a2d657deaf59538fb69433036767c Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 13 Oct 2017 15:58:15 -0700 Subject: fs/mpage.c: fix mpage_writepage() for pages with buffers When using FAT on a block device which supports rw_page, we can hit BUG_ON(!PageLocked(page)) in try_to_free_buffers(). This is because we call clean_buffers() after unlocking the page we've written. Introduce a new clean_page_buffers() which cleans all buffers associated with a page and call it from within bdev_write_page(). [akpm@linux-foundation.org: s/PAGE_SIZE/~0U/ per Linus and Matthew] Link: http://lkml.kernel.org/r/20171006211541.GA7409@bombadil.infradead.org Signed-off-by: Matthew Wilcox Reported-by: Toshi Kani Reported-by: OGAWA Hirofumi Tested-by: Toshi Kani Acked-by: Johannes Thumshirn Cc: Ross Zwisler Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/buffer_head.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index c8dae555eccf..446b24cac67d 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -232,6 +232,7 @@ int generic_write_end(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page *, void *); void page_zero_new_buffers(struct page *page, unsigned from, unsigned to); +void clean_page_buffers(struct page *page); int cont_write_begin(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page **, void **, get_block_t *, loff_t *); -- cgit v1.2.3 From ca182551857cc2c1e6a2b7f1e72090a137a15008 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 13 Oct 2017 15:58:22 -0700 Subject: kmemleak: clear stale pointers from task stacks Kmemleak considers any pointers on task stacks as references. This patch clears newly allocated and reused vmap stacks. Link: http://lkml.kernel.org/r/150728990124.744199.8403409836394318684.stgit@buzz Signed-off-by: Konstantin Khlebnikov Acked-by: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/thread_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 905d769d8ddc..5f7eeab990fe 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -42,7 +42,7 @@ enum { #define THREAD_ALIGN THREAD_SIZE #endif -#ifdef CONFIG_DEBUG_STACK_USAGE +#if IS_ENABLED(CONFIG_DEBUG_STACK_USAGE) || IS_ENABLED(CONFIG_DEBUG_KMEMLEAK) # define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | \ __GFP_ZERO) #else -- cgit v1.2.3 From 0ad646c81b2182f7fa67ec0c8c825e0ee165696d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 13 Oct 2017 11:58:53 -0700 Subject: tun: call dev_get_valid_name() before register_netdevice() register_netdevice() could fail early when we have an invalid dev name, in which case ->ndo_uninit() is not called. For tun device, this is a problem because a timer etc. are already initialized and it expects ->ndo_uninit() to clean them up. We could move these initializations into a ->ndo_init() so that register_netdevice() knows better, however this is still complicated due to the logic in tun_detach(). Therefore, I choose to just call dev_get_valid_name() before register_netdevice(), which is quicker and much easier to audit. And for this specific case, it is already enough. Fixes: 96442e42429e ("tuntap: choose the txq based on rxq") Reported-by: Dmitry Alexeev Cc: Jason Wang Cc: "Michael S. Tsirkin" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f535779d9dc1..2eaac7d75af4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3694,6 +3694,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *), unsigned int txqs, unsigned int rxqs); +int dev_get_valid_name(struct net *net, struct net_device *dev, + const char *name); + #define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \ alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, 1, 1) -- cgit v1.2.3 From 363b02dab09b3226f3bd1420dad9c72b79a42a76 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Oct 2017 16:43:25 +0100 Subject: KEYS: Fix race between updating and finding a negative key Consolidate KEY_FLAG_INSTANTIATED, KEY_FLAG_NEGATIVE and the rejection error into one field such that: (1) The instantiation state can be modified/read atomically. (2) The error can be accessed atomically with the state. (3) The error isn't stored unioned with the payload pointers. This deals with the problem that the state is spread over three different objects (two bits and a separate variable) and reading or updating them atomically isn't practical, given that not only can uninstantiated keys change into instantiated or rejected keys, but rejected keys can also turn into instantiated keys - and someone accessing the key might not be using any locking. The main side effect of this problem is that what was held in the payload may change, depending on the state. For instance, you might observe the key to be in the rejected state. You then read the cached error, but if the key semaphore wasn't locked, the key might've become instantiated between the two reads - and you might now have something in hand that isn't actually an error code. The state is now KEY_IS_UNINSTANTIATED, KEY_IS_POSITIVE or a negative error code if the key is negatively instantiated. The key_is_instantiated() function is replaced with key_is_positive() to avoid confusion as negative keys are also 'instantiated'. Additionally, barriering is included: (1) Order payload-set before state-set during instantiation. (2) Order state-read before payload-read when using the key. Further separate barriering is necessary if RCU is being used to access the payload content after reading the payload pointers. Fixes: 146aa8b1453b ("KEYS: Merge the type-specific data with the payload data") Cc: stable@vger.kernel.org # v4.4+ Reported-by: Eric Biggers Signed-off-by: David Howells Reviewed-by: Eric Biggers --- include/linux/key.h | 47 ++++++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index e315e16b6ff8..8a15cabe928d 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -138,6 +138,11 @@ struct key_restriction { struct key_type *keytype; }; +enum key_state { + KEY_IS_UNINSTANTIATED, + KEY_IS_POSITIVE, /* Positively instantiated */ +}; + /*****************************************************************************/ /* * authentication token / access credential / keyring @@ -169,6 +174,7 @@ struct key { * - may not match RCU dereferenced payload * - payload should contain own length */ + short state; /* Key state (+) or rejection error (-) */ #ifdef KEY_DEBUGGING unsigned magic; @@ -176,18 +182,16 @@ struct key { #endif unsigned long flags; /* status flags (change with bitops) */ -#define KEY_FLAG_INSTANTIATED 0 /* set if key has been instantiated */ -#define KEY_FLAG_DEAD 1 /* set if key type has been deleted */ -#define KEY_FLAG_REVOKED 2 /* set if key had been revoked */ -#define KEY_FLAG_IN_QUOTA 3 /* set if key consumes quota */ -#define KEY_FLAG_USER_CONSTRUCT 4 /* set if key is being constructed in userspace */ -#define KEY_FLAG_NEGATIVE 5 /* set if key is negative */ -#define KEY_FLAG_ROOT_CAN_CLEAR 6 /* set if key can be cleared by root without permission */ -#define KEY_FLAG_INVALIDATED 7 /* set if key has been invalidated */ -#define KEY_FLAG_BUILTIN 8 /* set if key is built in to the kernel */ -#define KEY_FLAG_ROOT_CAN_INVAL 9 /* set if key can be invalidated by root without permission */ -#define KEY_FLAG_KEEP 10 /* set if key should not be removed */ -#define KEY_FLAG_UID_KEYRING 11 /* set if key is a user or user session keyring */ +#define KEY_FLAG_DEAD 0 /* set if key type has been deleted */ +#define KEY_FLAG_REVOKED 1 /* set if key had been revoked */ +#define KEY_FLAG_IN_QUOTA 2 /* set if key consumes quota */ +#define KEY_FLAG_USER_CONSTRUCT 3 /* set if key is being constructed in userspace */ +#define KEY_FLAG_ROOT_CAN_CLEAR 4 /* set if key can be cleared by root without permission */ +#define KEY_FLAG_INVALIDATED 5 /* set if key has been invalidated */ +#define KEY_FLAG_BUILTIN 6 /* set if key is built in to the kernel */ +#define KEY_FLAG_ROOT_CAN_INVAL 7 /* set if key can be invalidated by root without permission */ +#define KEY_FLAG_KEEP 8 /* set if key should not be removed */ +#define KEY_FLAG_UID_KEYRING 9 /* set if key is a user or user session keyring */ /* the key type and key description string * - the desc is used to match a key against search criteria @@ -213,7 +217,6 @@ struct key { struct list_head name_link; struct assoc_array keys; }; - int reject_error; }; /* This is set on a keyring to restrict the addition of a link to a key @@ -353,17 +356,27 @@ extern void key_set_timeout(struct key *, unsigned); #define KEY_NEED_SETATTR 0x20 /* Require permission to change attributes */ #define KEY_NEED_ALL 0x3f /* All the above permissions */ +static inline short key_read_state(const struct key *key) +{ + /* Barrier versus mark_key_instantiated(). */ + return smp_load_acquire(&key->state); +} + /** - * key_is_instantiated - Determine if a key has been positively instantiated + * key_is_positive - Determine if a key has been positively instantiated * @key: The key to check. * * Return true if the specified key has been positively instantiated, false * otherwise. */ -static inline bool key_is_instantiated(const struct key *key) +static inline bool key_is_positive(const struct key *key) +{ + return key_read_state(key) == KEY_IS_POSITIVE; +} + +static inline bool key_is_negative(const struct key *key) { - return test_bit(KEY_FLAG_INSTANTIATED, &key->flags) && - !test_bit(KEY_FLAG_NEGATIVE, &key->flags); + return key_read_state(key) < 0; } #define dereference_key_rcu(KEY) \ -- cgit v1.2.3 From 55dfce873dca46df00304c44a568d7933bffff89 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 9 Oct 2017 11:09:33 -0700 Subject: Input: factor out and export input_device_id matching code Factor out and export input_match_device_id() so that modules may use it. It will be needed by joydev to blacklist accelerometers in composite devices. Tested-by: Roderick Colenbrander Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index fb5e23c7ed98..2a44650e449d 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -469,6 +469,9 @@ int input_get_keycode(struct input_dev *dev, struct input_keymap_entry *ke); int input_set_keycode(struct input_dev *dev, const struct input_keymap_entry *ke); +bool input_match_device_id(const struct input_dev *dev, + const struct input_device_id *id); + void input_enable_softrepeat(struct input_dev *dev, int delay, int period); extern struct class input_class; -- cgit v1.2.3 From 8724ecb072293f109a6f5dc93be8a98bf61fe14f Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 9 Oct 2017 12:01:14 -0700 Subject: Input: allow matching device IDs on property bits Let's allow matching input devices on their property bits, both in-kernel and when generating module aliases. Tested-by: Roderick Colenbrander Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 4 ++++ include/linux/mod_devicetable.h | 3 +++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index 2a44650e449d..7c7516eb7d76 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -234,6 +234,10 @@ struct input_dev { #error "SW_MAX and INPUT_DEVICE_ID_SW_MAX do not match" #endif +#if INPUT_PROP_MAX != INPUT_DEVICE_ID_PROP_MAX +#error "INPUT_PROP_MAX and INPUT_DEVICE_ID_PROP_MAX do not match" +#endif + #define INPUT_DEVICE_ID_MATCH_DEVICE \ (INPUT_DEVICE_ID_MATCH_BUS | INPUT_DEVICE_ID_MATCH_VENDOR | INPUT_DEVICE_ID_MATCH_PRODUCT) #define INPUT_DEVICE_ID_MATCH_DEVICE_AND_VERSION \ diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 3f74ef2281e8..72f0b7f19c59 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -293,6 +293,7 @@ struct pcmcia_device_id { #define INPUT_DEVICE_ID_SND_MAX 0x07 #define INPUT_DEVICE_ID_FF_MAX 0x7f #define INPUT_DEVICE_ID_SW_MAX 0x0f +#define INPUT_DEVICE_ID_PROP_MAX 0x1f #define INPUT_DEVICE_ID_MATCH_BUS 1 #define INPUT_DEVICE_ID_MATCH_VENDOR 2 @@ -308,6 +309,7 @@ struct pcmcia_device_id { #define INPUT_DEVICE_ID_MATCH_SNDBIT 0x0400 #define INPUT_DEVICE_ID_MATCH_FFBIT 0x0800 #define INPUT_DEVICE_ID_MATCH_SWBIT 0x1000 +#define INPUT_DEVICE_ID_MATCH_PROPBIT 0x2000 struct input_device_id { @@ -327,6 +329,7 @@ struct input_device_id { kernel_ulong_t sndbit[INPUT_DEVICE_ID_SND_MAX / BITS_PER_LONG + 1]; kernel_ulong_t ffbit[INPUT_DEVICE_ID_FF_MAX / BITS_PER_LONG + 1]; kernel_ulong_t swbit[INPUT_DEVICE_ID_SW_MAX / BITS_PER_LONG + 1]; + kernel_ulong_t propbit[INPUT_DEVICE_ID_PROP_MAX / BITS_PER_LONG + 1]; kernel_ulong_t driver_info; }; -- cgit v1.2.3 From a961e40917fb14614d368d8bc9782ca4d6a8cd11 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 19 Oct 2017 13:30:15 -0400 Subject: membarrier: Provide register expedited private command This introduces a "register private expedited" membarrier command which allows eventual removal of important memory barrier constraints on the scheduler fast-paths. It changes how the "private expedited" membarrier command (new to 4.14) is used from user-space. This new command allows processes to register their intent to use the private expedited command. This affects how the expedited private command introduced in 4.14-rc is meant to be used, and should be merged before 4.14 final. Processes are now required to register before using MEMBARRIER_CMD_PRIVATE_EXPEDITED, otherwise that command returns EPERM. This fixes a problem that arose when designing requested extensions to sys_membarrier() to allow JITs to efficiently flush old code from instruction caches. Several potential algorithms are much less painful if the user register intent to use this functionality early on, for example, before the process spawns the second thread. Registering at this time removes the need to interrupt each and every thread in that process at the first expedited sys_membarrier() system call. Signed-off-by: Mathieu Desnoyers Acked-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Alexander Viro Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 3 +++ include/linux/sched/mm.h | 16 ++++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 46f4ecf5479a..1861ea8dba77 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -445,6 +445,9 @@ struct mm_struct { unsigned long flags; /* Must use atomic bitops to access the bits */ struct core_state *core_state; /* coredumping support */ +#ifdef CONFIG_MEMBARRIER + atomic_t membarrier_state; +#endif #ifdef CONFIG_AIO spinlock_t ioctx_lock; struct kioctx_table __rcu *ioctx_table; diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index ae53e413fb13..ab9bf7b73954 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -211,4 +211,20 @@ static inline void memalloc_noreclaim_restore(unsigned int flags) current->flags = (current->flags & ~PF_MEMALLOC) | flags; } +#ifdef CONFIG_MEMBARRIER +enum { + MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY = (1U << 0), + MEMBARRIER_STATE_SWITCH_MM = (1U << 1), +}; + +static inline void membarrier_execve(struct task_struct *t) +{ + atomic_set(&t->mm->membarrier_state, 0); +} +#else +static inline void membarrier_execve(struct task_struct *t) +{ +} +#endif + #endif /* _LINUX_SCHED_MM_H */ -- cgit v1.2.3 From 27fdb35fe99011d86bcc54f62fe84712c53f4d05 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 19 Oct 2017 14:26:21 -0700 Subject: doc: Fix various RCU docbook comment-header problems Because many of RCU's files have not been included into docbook, a number of errors have accumulated. This commit fixes them. Signed-off-by: Paul E. McKenney Signed-off-by: Linus Torvalds --- include/linux/rculist.h | 2 +- include/linux/rcupdate.h | 22 ++++++++++++++-------- include/linux/srcu.h | 1 + 3 files changed, 16 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index b1fd8bf85fdc..2bea1d5e9930 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -276,7 +276,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, #define list_entry_rcu(ptr, type, member) \ container_of(lockless_dereference(ptr), type, member) -/** +/* * Where are list_empty_rcu() and list_first_entry_rcu()? * * Implementing those functions following their counterparts list_empty() and diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index de50d8a4cf41..1a9f70d44af9 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -523,7 +523,7 @@ static inline void rcu_preempt_sleep_check(void) { } * Return the value of the specified RCU-protected pointer, but omit * both the smp_read_barrier_depends() and the READ_ONCE(). This * is useful in cases where update-side locks prevent the value of the - * pointer from changing. Please note that this primitive does -not- + * pointer from changing. Please note that this primitive does *not* * prevent the compiler from repeating this reference or combining it * with other references, so it should not be used without protection * of appropriate locks. @@ -568,7 +568,7 @@ static inline void rcu_preempt_sleep_check(void) { } * is handed off from RCU to some other synchronization mechanism, for * example, reference counting or locking. In C11, it would map to * kill_dependency(). It could be used as follows: - * + * `` * rcu_read_lock(); * p = rcu_dereference(gp); * long_lived = is_long_lived(p); @@ -579,6 +579,7 @@ static inline void rcu_preempt_sleep_check(void) { } * p = rcu_pointer_handoff(p); * } * rcu_read_unlock(); + *`` */ #define rcu_pointer_handoff(p) (p) @@ -778,18 +779,21 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) /** * RCU_INIT_POINTER() - initialize an RCU protected pointer + * @p: The pointer to be initialized. + * @v: The value to initialized the pointer to. * * Initialize an RCU-protected pointer in special cases where readers * do not need ordering constraints on the CPU or the compiler. These * special cases are: * - * 1. This use of RCU_INIT_POINTER() is NULLing out the pointer -or- + * 1. This use of RCU_INIT_POINTER() is NULLing out the pointer *or* * 2. The caller has taken whatever steps are required to prevent - * RCU readers from concurrently accessing this pointer -or- + * RCU readers from concurrently accessing this pointer *or* * 3. The referenced data structure has already been exposed to - * readers either at compile time or via rcu_assign_pointer() -and- - * a. You have not made -any- reader-visible changes to - * this structure since then -or- + * readers either at compile time or via rcu_assign_pointer() *and* + * + * a. You have not made *any* reader-visible changes to + * this structure since then *or* * b. It is OK for readers accessing this structure from its * new location to see the old state of the structure. (For * example, the changes were to statistical counters or to @@ -805,7 +809,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * by a single external-to-structure RCU-protected pointer, then you may * use RCU_INIT_POINTER() to initialize the internal RCU-protected * pointers, but you must use rcu_assign_pointer() to initialize the - * external-to-structure pointer -after- you have completely initialized + * external-to-structure pointer *after* you have completely initialized * the reader-accessible portions of the linked structure. * * Note that unlike rcu_assign_pointer(), RCU_INIT_POINTER() provides no @@ -819,6 +823,8 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) /** * RCU_POINTER_INITIALIZER() - statically initialize an RCU protected pointer + * @p: The pointer to be initialized. + * @v: The value to initialized the pointer to. * * GCC-style initialization for an RCU-protected pointer in a structure field. */ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 39af9bc0f653..62be8966e837 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -78,6 +78,7 @@ void synchronize_srcu(struct srcu_struct *sp); /** * srcu_read_lock_held - might we be in SRCU read-side critical section? + * @sp: The srcu_struct structure to check * * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an SRCU * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, -- cgit v1.2.3 From 34f79502bbcfab659b8729da68b5e387f96eb4c1 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 18 Oct 2017 07:10:36 -0700 Subject: bpf: avoid preempt enable/disable in sockmap using tcp_skb_cb region SK_SKB BPF programs are run from the socket/tcp context but early in the stack before much of the TCP metadata is needed in tcp_skb_cb. So we can use some unused fields to place BPF metadata needed for SK_SKB programs when implementing the redirect function. This allows us to drop the preempt disable logic. It does however require an API change so sk_redirect_map() has been updated to additionally provide ctx_ptr to skb. Note, we do however continue to disable/enable preemption around actual BPF program running to account for map updates. Signed-off-by: John Fastabend Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index d29e58fde364..818a0b26249e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -728,7 +728,7 @@ void xdp_do_flush_map(void); void bpf_warn_invalid_xdp_action(u32 act); void bpf_warn_invalid_xdp_redirect(u32 ifindex); -struct sock *do_sk_redirect_map(void); +struct sock *do_sk_redirect_map(struct sk_buff *skb); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; -- cgit v1.2.3 From 88796e7e5c457cae72833196cb98e6895dd107e2 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 20 Oct 2017 10:13:46 -0700 Subject: sched/swait: Document it clearly that the swait facilities are special and shouldn't be used We currently welcome using swait over wait whenever possible because it is a slimmer data structure. However, Linus has made it very clear that he does not want this used, unless under very specific RT scenarios (such as current users). Update the comments before kernel hipsters start thinking swait is the cool thing to do. Signed-off-by: Davidlohr Bueso Acked-by: Luis R. Rodriguez Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@stgolabs.net Cc: wagi@monom.org Link: http://lkml.kernel.org/r/20171020171346.24445-1-dave@stgolabs.net Signed-off-by: Ingo Molnar --- include/linux/swait.h | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swait.h b/include/linux/swait.h index 73e97a08d3d0..cf30f5022472 100644 --- a/include/linux/swait.h +++ b/include/linux/swait.h @@ -9,13 +9,16 @@ /* * Simple wait queues * - * While these are very similar to the other/complex wait queues (wait.h) the - * most important difference is that the simple waitqueue allows for - * deterministic behaviour -- IOW it has strictly bounded IRQ and lock hold - * times. + * While these are very similar to regular wait queues (wait.h) the most + * important difference is that the simple waitqueue allows for deterministic + * behaviour -- IOW it has strictly bounded IRQ and lock hold times. * - * In order to make this so, we had to drop a fair number of features of the - * other waitqueue code; notably: + * Mainly, this is accomplished by two things. Firstly not allowing swake_up_all + * from IRQ disabled, and dropping the lock upon every wakeup, giving a higher + * priority task a chance to run. + * + * Secondly, we had to drop a fair number of features of the other waitqueue + * code; notably: * * - mixing INTERRUPTIBLE and UNINTERRUPTIBLE sleeps on the same waitqueue; * all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right @@ -24,12 +27,14 @@ * - the exclusive mode; because this requires preserving the list order * and this is hard. * - * - custom wake functions; because you cannot give any guarantees about - * random code. - * - * As a side effect of this; the data structures are slimmer. + * - custom wake callback functions; because you cannot give any guarantees + * about random code. This also allows swait to be used in RT, such that + * raw spinlock can be used for the swait queue head. * - * One would recommend using this wait queue where possible. + * As a side effect of these; the data structures are slimmer albeit more ad-hoc. + * For all the above, note that simple wait queues should _only_ be used under + * very specific realtime constraints -- it is best to stick with the regular + * wait queues in most cases. */ struct task_struct; -- cgit v1.2.3 From 0cc2b4e5a020fc7f4d1795741c116c983e9467d7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 24 Oct 2017 15:20:45 +0200 Subject: PM / QoS: Fix device resume latency PM QoS The special value of 0 for device resume latency PM QoS means "no restriction", but there are two problems with that. First, device resume latency PM QoS requests with 0 as the value are always put in front of requests with positive values in the priority lists used internally by the PM QoS framework, causing 0 to be chosen as an effective constraint value. However, that 0 is then interpreted as "no restriction" effectively overriding the other requests with specific restrictions which is incorrect. Second, the users of device resume latency PM QoS have no way to specify that *any* resume latency at all should be avoided, which is an artificial limitation in general. To address these issues, modify device resume latency PM QoS to use S32_MAX as the "no constraint" value and 0 as the "no latency at all" one and rework its users (the cpuidle menu governor, the genpd QoS governor and the runtime PM framework) to follow these changes. Also add a special "n/a" value to the corresponding user space I/F to allow user space to indicate that it cannot accept any resume latencies at all for the given device. Fixes: 85dc0b8a4019 (PM / QoS: Make it possible to expose PM QoS latency constraints) Link: https://bugzilla.kernel.org/show_bug.cgi?id=197323 Reported-by: Reinette Chatre Tested-by: Reinette Chatre Signed-off-by: Rafael J. Wysocki Acked-by: Alex Shi Cc: All applicable --- include/linux/pm_qos.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 032b55909145..6737a8c9e8c6 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -27,16 +27,17 @@ enum pm_qos_flags_status { PM_QOS_FLAGS_ALL, }; -#define PM_QOS_DEFAULT_VALUE -1 +#define PM_QOS_DEFAULT_VALUE (-1) +#define PM_QOS_LATENCY_ANY S32_MAX #define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) #define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) #define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0 #define PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE 0 #define PM_QOS_RESUME_LATENCY_DEFAULT_VALUE 0 +#define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT PM_QOS_LATENCY_ANY #define PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE 0 #define PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT (-1) -#define PM_QOS_LATENCY_ANY ((s32)(~(__u32)0 >> 1)) #define PM_QOS_FLAG_NO_POWER_OFF (1 << 0) #define PM_QOS_FLAG_REMOTE_WAKEUP (1 << 1) -- cgit v1.2.3 From be0f161ef141e4df368aa3f417a1c2ab9c362e75 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Thu, 28 Sep 2017 15:33:50 -0500 Subject: net/mlx5e: DCBNL, Implement tc with ets type and zero bandwidth Previously, tc with ets type and zero bandwidth is not accepted by driver. This behavior does not follow the IEEE802.1qaz spec. If there are tcs with ets type and zero bandwidth, these tcs are assigned to the lowest priority tc_group #0. We equally distribute 100% bw of the tc_group #0 to these zero bandwidth ets tcs. Also, the non zero bandwidth ets tcs are assigned to tc_group #1. If there is no zero bandwidth ets tc, the non zero bandwidth ets tcs are assigned to tc_group #0. Fixes: cdcf11212b22 ("net/mlx5e: Validate BW weight values of ETS") Signed-off-by: Huy Nguyen Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- include/linux/mlx5/port.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index c57d4b7de3a8..c59af8ab753a 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -157,6 +157,8 @@ int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc); int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev, u8 prio, u8 *tc); int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group); +int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev, + u8 tc, u8 *tc_group); int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw); int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 tc, u8 *bw_pct); -- cgit v1.2.3 From dea6e19f4ef746aa18b4c33d1a7fed54356796ed Mon Sep 17 00:00:00 2001 From: Girish Moodalbail Date: Fri, 27 Oct 2017 00:00:16 -0700 Subject: tap: reference to KVA of an unloaded module causes kernel panic The commit 9a393b5d5988 ("tap: tap as an independent module") created a separate tap module that implements tap functionality and exports interfaces that will be used by macvtap and ipvtap modules to create create respective tap devices. However, that patch introduced a regression wherein the modules macvtap and ipvtap can be removed (through modprobe -r) while there are applications using the respective /dev/tapX devices. These applications cause kernel to hold reference to /dev/tapX through 'struct cdev macvtap_cdev' and 'struct cdev ipvtap_dev' defined in macvtap and ipvtap modules respectively. So, when the application is later closed the kernel panics because we are referencing KVA that is present in the unloaded modules. ----------8<------- Example ----------8<---------- $ sudo ip li add name mv0 link enp7s0 type macvtap $ sudo ip li show mv0 |grep mv0| awk -e '{print $1 $2}' 14:mv0@enp7s0: $ cat /dev/tap14 & $ lsmod |egrep -i 'tap|vlan' macvtap 16384 0 macvlan 24576 1 macvtap tap 24576 3 macvtap $ sudo modprobe -r macvtap $ fg cat /dev/tap14 ^C <...system panics...> BUG: unable to handle kernel paging request at ffffffffa038c500 IP: cdev_put+0xf/0x30 ----------8<-----------------8<---------- The fix is to set cdev.owner to the module that creates the tap device (either macvtap or ipvtap). With this set, the operations (in fs/char_dev.c) on char device holds and releases the module through cdev_get() and cdev_put() and will not allow the module to unload prematurely. Fixes: 9a393b5d5988ea4e (tap: tap as an independent module) Signed-off-by: Girish Moodalbail Signed-off-by: David S. Miller --- include/linux/if_tap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 4837157da0dc..9ae41cdd0d4c 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -73,8 +73,8 @@ void tap_del_queues(struct tap_dev *tap); int tap_get_minor(dev_t major, struct tap_dev *tap); void tap_free_minor(dev_t major, struct tap_dev *tap); int tap_queue_resize(struct tap_dev *tap); -int tap_create_cdev(struct cdev *tap_cdev, - dev_t *tap_major, const char *device_name); +int tap_create_cdev(struct cdev *tap_cdev, dev_t *tap_major, + const char *device_name, struct module *module); void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev); #endif /*_LINUX_IF_TAP_H_*/ -- cgit v1.2.3 From 1da4fc97cbf89514e417a3df46eaec864a9b8a48 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 28 Oct 2017 19:43:54 +0800 Subject: sctp: fix some type cast warnings introduced by stream reconf These warnings were found by running 'make C=2 M=net/sctp/'. They are introduced by not aware of Endian when coding stream reconf patches. Since commit c0d8bab6ae51 ("sctp: add get and set sockopt for reconf_enable") enabled stream reconf feature for users, the Fixes tag below would use it. Fixes: c0d8bab6ae51 ("sctp: add get and set sockopt for reconf_enable") Reported-by: Eric Dumazet Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 82b171e1aa0b..09d7412e9cb0 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -716,28 +716,28 @@ struct sctp_reconf_chunk { struct sctp_strreset_outreq { struct sctp_paramhdr param_hdr; - __u32 request_seq; - __u32 response_seq; - __u32 send_reset_at_tsn; - __u16 list_of_streams[0]; + __be32 request_seq; + __be32 response_seq; + __be32 send_reset_at_tsn; + __be16 list_of_streams[0]; }; struct sctp_strreset_inreq { struct sctp_paramhdr param_hdr; - __u32 request_seq; - __u16 list_of_streams[0]; + __be32 request_seq; + __be16 list_of_streams[0]; }; struct sctp_strreset_tsnreq { struct sctp_paramhdr param_hdr; - __u32 request_seq; + __be32 request_seq; }; struct sctp_strreset_addstrm { struct sctp_paramhdr param_hdr; - __u32 request_seq; - __u16 number_of_streams; - __u16 reserved; + __be32 request_seq; + __be16 number_of_streams; + __be16 reserved; }; enum { @@ -752,16 +752,16 @@ enum { struct sctp_strreset_resp { struct sctp_paramhdr param_hdr; - __u32 response_seq; - __u32 result; + __be32 response_seq; + __be32 result; }; struct sctp_strreset_resptsn { struct sctp_paramhdr param_hdr; - __u32 response_seq; - __u32 result; - __u32 senders_next_tsn; - __u32 receivers_next_tsn; + __be32 response_seq; + __be32 result; + __be32 senders_next_tsn; + __be32 receivers_next_tsn; }; #endif /* __LINUX_SCTP_H__ */ -- cgit v1.2.3 From 978aa0474115f3f5848949f2efce4def0766a5cb Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 28 Oct 2017 19:43:57 +0800 Subject: sctp: fix some type cast warnings introduced since very beginning These warnings were found by running 'make C=2 M=net/sctp/'. They are there since very beginning. Note after this patch, there still one warning left in sctp_outq_flush(): sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM) Since it has been moved to sctp_stream_outq_migrate on net-next, to avoid the extra job when merging net-next to net, I will post the fix for it after the merging is done. Reported-by: Eric Dumazet Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/linux/sctp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sctp.h b/include/linux/sctp.h index 09d7412e9cb0..da803dfc7a39 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -231,7 +231,7 @@ struct sctp_datahdr { __be32 tsn; __be16 stream; __be16 ssn; - __be32 ppid; + __u32 ppid; __u8 payload[0]; }; -- cgit v1.2.3