From 24d28e4f1271cb2f91613dada8f2acccd00eff56 Mon Sep 17 00:00:00 2001 From: Nick Dyer Date: Tue, 5 Jun 2018 10:17:51 -0700 Subject: Input: synaptics-rmi4 - convert irq distribution to irq_domain Convert the RMI driver to use the standard mechanism for distributing IRQs to the various functions. Tested on: * S7300 (F11, F34, F54) * S7817 (F12, F34, F54) Signed-off-by: Nick Dyer Acked-by: Christopher Heiny Signed-off-by: Dmitry Torokhov --- include/linux/rmi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rmi.h b/include/linux/rmi.h index 64125443f8a6..5ef5c7c412a7 100644 --- a/include/linux/rmi.h +++ b/include/linux/rmi.h @@ -354,6 +354,8 @@ struct rmi_driver_data { struct mutex irq_mutex; struct input_dev *input; + struct irq_domain *irqdomain; + u8 pdt_props; u8 num_rx_electrodes; -- cgit v1.2.3 From bf6247a70f2b7569180304041b63b59ab14217bc Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 5 Jun 2018 10:08:44 -0700 Subject: Input: make input_report_slot_state() return boolean Let's make input_report_slot_state() return boolean representing whether the contact is active or not. This will allow writing code like: if (input_mt_report_slot_state(input, obj->mt_tool, obj->type != RMI_2D_OBJECT_NONE) { input_event(sensor->input, EV_ABS, ABS_MT_POSITION_X, obj->x); input_event(sensor->input, EV_ABS, ABS_MT_POSITION_Y, obj->y); ... } instead of: input_mt_report_slot_state(input, obj->mt_tool, obj->type != RMI_2D_OBJECT_NONE); if (obj->type != RMI_2D_OBJECT_NONE) { input_event(sensor->input, EV_ABS, ABS_MT_POSITION_X, obj->x); input_event(sensor->input, EV_ABS, ABS_MT_POSITION_Y, obj->y); ... } Reviewed-by: Henrik Rydberg Acked-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- include/linux/input/mt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h index d7188de4db96..3f4bf60b0bb5 100644 --- a/include/linux/input/mt.h +++ b/include/linux/input/mt.h @@ -100,7 +100,7 @@ static inline bool input_is_mt_axis(int axis) return axis == ABS_MT_SLOT || input_is_mt_value(axis); } -void input_mt_report_slot_state(struct input_dev *dev, +bool input_mt_report_slot_state(struct input_dev *dev, unsigned int tool_type, bool active); void input_mt_report_finger_count(struct input_dev *dev, int count); -- cgit v1.2.3 From 6362f0a290023bafd7f991089e81dd9278f154b8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 19 Jun 2018 10:12:48 -0600 Subject: libata: add command iterator helpers Now that we have the internal tag as a special (higher) value tag, it gets a bit tricky to iterate the internal commands as some loops will exceed ATA_MAX_QUEUE. Add explicit helpers for iterating pending commands, both inflight and internal. Signed-off-by: Jens Axboe Signed-off-by: Tejun Heo --- include/linux/libata.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 8b8946dd63b9..a2257e380789 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1495,6 +1495,29 @@ static inline bool ata_tag_valid(unsigned int tag) return tag < ATA_MAX_QUEUE || ata_tag_internal(tag); } +#define __ata_qc_for_each(ap, qc, tag, max_tag, fn) \ + for ((tag) = 0; (tag) < (max_tag) && \ + ({ qc = fn((ap), (tag)); 1; }); (tag)++) \ + +/* + * Internal use only, iterate commands ignoring error handling and + * status of 'qc'. + */ +#define ata_qc_for_each_raw(ap, qc, tag) \ + __ata_qc_for_each(ap, qc, tag, ATA_MAX_QUEUE, __ata_qc_from_tag) + +/* + * Iterate all potential commands that can be queued + */ +#define ata_qc_for_each(ap, qc, tag) \ + __ata_qc_for_each(ap, qc, tag, ATA_MAX_QUEUE, ata_qc_from_tag) + +/* + * Like ata_qc_for_each, but with the internal tag included + */ +#define ata_qc_for_each_with_internal(ap, qc, tag) \ + __ata_qc_for_each(ap, qc, tag, ATA_MAX_QUEUE + 1, ata_qc_from_tag) + /* * device helpers */ -- cgit v1.2.3 From 9262478220eac908ae6e168c3df2c453c87e2da3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 Jun 2018 17:24:09 -0700 Subject: bpf: enforce correct alignment for instructions After commit 9facc336876f ("bpf: reject any prog that failed read-only lock") offsetof(struct bpf_binary_header, image) became 3 instead of 4, breaking powerpc BPF badly, since instructions need to be word aligned. Fixes: 9facc336876f ("bpf: reject any prog that failed read-only lock") Signed-off-by: Eric Dumazet Cc: Daniel Borkmann Cc: Martin KaFai Lau Cc: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index b615df57b7d5..20f2659dd829 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -472,7 +472,9 @@ struct sock_fprog_kern { struct bpf_binary_header { u16 pages; u16 locked:1; - u8 image[]; + + /* Some arches need word alignment for their instructions */ + u8 image[] __aligned(4); }; struct bpf_prog { -- cgit v1.2.3 From 92397a6c38d139d50fabbe9e2dc09b61d53b2377 Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Tue, 5 Jun 2018 14:15:10 +0800 Subject: iio: buffer: fix the function signature to match implementation linux/iio/buffer-dma.h was not updated to when length was changed to unsigned int. Fixes: c043ec1ca5ba ("iio:buffer: make length types match kfifo types") Signed-off-by: Phil Reid Signed-off-by: Jonathan Cameron --- include/linux/iio/buffer-dma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/buffer-dma.h b/include/linux/iio/buffer-dma.h index 767467d886de..67c75372b691 100644 --- a/include/linux/iio/buffer-dma.h +++ b/include/linux/iio/buffer-dma.h @@ -141,7 +141,7 @@ int iio_dma_buffer_read(struct iio_buffer *buffer, size_t n, char __user *user_buffer); size_t iio_dma_buffer_data_available(struct iio_buffer *buffer); int iio_dma_buffer_set_bytes_per_datum(struct iio_buffer *buffer, size_t bpd); -int iio_dma_buffer_set_length(struct iio_buffer *buffer, int length); +int iio_dma_buffer_set_length(struct iio_buffer *buffer, unsigned int length); int iio_dma_buffer_request_update(struct iio_buffer *buffer); int iio_dma_buffer_init(struct iio_dma_buffer_queue *queue, -- cgit v1.2.3 From 5e03aa61a7f3cb99852bc3ad6925f5fa3c0dcc93 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 14 Jun 2018 10:53:34 +0530 Subject: PM / Domains: Fix return value of of_genpd_opp_to_performance_state() of_genpd_opp_to_performance_state() should return 0 for errors, but the dummy routine isn't doing that. Fix it. Signed-off-by: Viresh Kumar Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 9206a4fef9ac..139f79c8477a 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -276,7 +276,7 @@ static inline unsigned int of_genpd_opp_to_performance_state(struct device *dev, struct device_node *opp_node) { - return -ENODEV; + return 0; } static inline int genpd_dev_pm_attach(struct device *dev) -- cgit v1.2.3 From ad6384ba3ac98ad524194e37de379c1fe503870b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 20 Jun 2018 11:45:37 +0530 Subject: PM / Domains: Rename opp_node to np The DT node passed here isn't necessarily an OPP node, as this routine can also be used for cases where the "required-opps" property is present directly in the device's node. Rename it. This also removes a stale comment. Acked-by: Ulf Hansson Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 139f79c8477a..cb8d84090cfb 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -234,7 +234,7 @@ struct generic_pm_domain *of_genpd_remove_last(struct device_node *np); int of_genpd_parse_idle_states(struct device_node *dn, struct genpd_power_state **states, int *n); unsigned int of_genpd_opp_to_performance_state(struct device *dev, - struct device_node *opp_node); + struct device_node *np); int genpd_dev_pm_attach(struct device *dev); struct device *genpd_dev_pm_attach_by_id(struct device *dev, @@ -274,7 +274,7 @@ static inline int of_genpd_parse_idle_states(struct device_node *dn, static inline unsigned int of_genpd_opp_to_performance_state(struct device *dev, - struct device_node *opp_node) + struct device_node *np) { return 0; } -- cgit v1.2.3 From 8f732850df1b2b4d8d719f7e606dfb3050e7ea11 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Thu, 31 May 2018 13:49:29 +0200 Subject: HID: core: allow concurrent registration of drivers Detected on the Dell XPS 9365. The laptop has 2 devices that benefit from the hid-generic auto-unbinding. When those 2 devices are presented to the userspace, udev loads both wacom and hid-multitouch. When this happens, the code in __hid_bus_reprobe_drivers() is called concurrently and the second device gets reprobed twice. An other bug in the power_supply subsystem prevent to remove the wacom driver if it just finished its initialization, which basically kills the wacom node. [jkosina@suse.cz: reformat changelog a bit] Fixes c17a7476e4c4 ("HID: core: rewrite the hid-generic automatic unbind") Cc: stable@vger.kernel.org # v4.17 Tested-by: Mario Limonciello Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/linux/hid.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 41a3d5775394..773bcb1d4044 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -511,6 +511,7 @@ struct hid_output_fifo { #define HID_STAT_ADDED BIT(0) #define HID_STAT_PARSED BIT(1) #define HID_STAT_DUP_DETECTED BIT(2) +#define HID_STAT_REPROBED BIT(3) struct hid_input { struct list_head list; @@ -579,7 +580,7 @@ struct hid_device { /* device report descriptor */ bool battery_avoid_query; #endif - unsigned int status; /* see STAT flags above */ + unsigned long status; /* see STAT flags above */ unsigned claimed; /* Claimed by hidinput, hiddev? */ unsigned quirks; /* Various quirks the device can pull on us */ bool io_started; /* If IO has started */ -- cgit v1.2.3 From d2d2e3c46be5d6dd8001d0eebdf7cafb9bc7006b Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Thu, 21 Jun 2018 16:43:17 +0300 Subject: acpi: Add helper for deactivating memory region Sometimes memory resource may be overlapping with SystemMemory Operation Region by design, for example if the memory region is used as a mailbox for communication with a firmware in the system. One occasion of such mailboxes is USB Type-C Connector System Software Interface (UCSI). With regions like that, it is important that the driver is able to map the memory with the requirements it has. For example, the driver should be allowed to map the memory as non-cached memory. However, if the operation region has been accessed before the driver has mapped the memory, the memory has been marked as write-back by the time the driver is loaded. That means the driver will fail to map the memory if it expects non-cached memory. To work around the problem, introducing helper that the drivers can use to temporarily deactivate (unmap) SystemMemory Operation Regions that overlap with their IO memory. Fixes: 8243edf44152 ("usb: typec: ucsi: Add ACPI driver") Cc: stable@vger.kernel.org Reviewed-by: Rafael J. Wysocki Signed-off-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- include/linux/acpi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4b35a66383f9..e54f40974eb0 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -443,6 +443,9 @@ int acpi_check_resource_conflict(const struct resource *res); int acpi_check_region(resource_size_t start, resource_size_t n, const char *name); +acpi_status acpi_release_memory(acpi_handle handle, struct resource *res, + u32 level); + int acpi_resources_are_enforced(void); #ifdef CONFIG_HIBERNATION -- cgit v1.2.3 From 8793bb7f4a9dd1396575d2e9337d331662cb7555 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 19 Jun 2018 13:14:56 -0700 Subject: kbuild: add macro for controlling warnings to linux/compiler.h I have occasionally run into a situation where it would make sense to control a compiler warning from a source file rather than doing so from a Makefile using the $(cc-disable-warning, ...) or $(cc-option, ...) helpers. The approach here is similar to what glibc uses, using __diag() and related macros to encapsulate a _Pragma("GCC diagnostic ...") statement that gets turned into the respective "#pragma GCC diagnostic ..." by the preprocessor when the macro gets expanded. Like glibc, I also have an argument to pass the affected compiler version, but decided to actually evaluate that one. For now, this supports GCC_4_6, GCC_4_7, GCC_4_8, GCC_4_9, GCC_5, GCC_6, GCC_7, GCC_8 and GCC_9. Adding support for CLANG_5 and other interesting versions is straightforward here. GNU compilers starting with gcc-4.2 could support it in principle, but "#pragma GCC diagnostic push" was only added in gcc-4.6, so it seems simpler to not deal with those at all. The same versions show a large number of warnings already, so it seems easier to just leave it at that and not do a more fine-grained control for them. The use cases I found so far include: - turning off the gcc-8 -Wattribute-alias warning inside of the SYSCALL_DEFINEx() macro without having to do it globally. - Reducing the build time for a simple re-make after a change, once we move the warnings from ./Makefile and ./scripts/Makefile.extrawarn into linux/compiler.h - More control over the warnings based on other configurations, using preprocessor syntax instead of Makefile syntax. This should make it easier for the average developer to understand and change things. - Adding an easy way to turn the W=1 option on unconditionally for a subdirectory or a specific file. This has been requested by several developers in the past that want to have their subsystems W=1 clean. - Integrating clang better into the build systems. Clang supports more warnings than GCC, and we probably want to classify them as default, W=1, W=2 etc, but there are cases in which the warnings should be classified differently due to excessive false positives from one or the other compiler. - Adding a way to turn the default warnings into errors (e.g. using a new "make E=0" tag) while not also turning the W=1 warnings into errors. This patch for now just adds the minimal infrastructure in order to do the first of the list above. As the #pragma GCC diagnostic takes precedence over command line options, the next step would be to convert a lot of the individual Makefiles that set nonstandard options to use __diag() instead. [paul.burton@mips.com: - Rebase atop current master. - Add __diag_GCC, or more generally __diag_, abstraction to avoid code outside of linux/compiler-gcc.h needing to duplicate knowledge about different GCC versions. - Add a comment argument to __diag_{ignore,warn,error} which isn't used in the expansion of the macros but serves to push people to document the reason for using them - per feedback from Kees Cook. - Translate severity to GCC-specific pragmas in linux/compiler-gcc.h rather than using GCC-specific in linux/compiler_types.h. - Drop all but GCC 8 macros, since we only need to define macros for versions that we need to introduce pragmas for, and as of this series that's just GCC 8. - Capitalize comments in linux/compiler-gcc.h to match the style of the rest of the file. - Line up macro definitions with tabs in linux/compiler-gcc.h.] Signed-off-by: Arnd Bergmann Signed-off-by: Paul Burton Tested-by: Christophe Leroy Tested-by: Stafford Horne Signed-off-by: Masahiro Yamada --- include/linux/compiler-gcc.h | 25 +++++++++++++++++++++++++ include/linux/compiler_types.h | 18 ++++++++++++++++++ 2 files changed, 43 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index f1a7492a5cc8..fd282c7d3e5e 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -347,3 +347,28 @@ #if GCC_VERSION >= 50100 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 #endif + +/* + * Turn individual warnings and errors on and off locally, depending + * on version. + */ +#define __diag_GCC(version, severity, s) \ + __diag_GCC_ ## version(__diag_GCC_ ## severity s) + +/* Severity used in pragma directives */ +#define __diag_GCC_ignore ignored +#define __diag_GCC_warn warning +#define __diag_GCC_error error + +/* Compilers before gcc-4.6 do not understand "#pragma GCC diagnostic push" */ +#if GCC_VERSION >= 40600 +#define __diag_str1(s) #s +#define __diag_str(s) __diag_str1(s) +#define __diag(s) _Pragma(__diag_str(GCC diagnostic s)) +#endif + +#if GCC_VERSION >= 80000 +#define __diag_GCC_8(s) __diag(s) +#else +#define __diag_GCC_8(s) +#endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 6b79a9bba9a7..a8ba6b04152c 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -271,4 +271,22 @@ struct ftrace_likely_data { # define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) #endif +#ifndef __diag +#define __diag(string) +#endif + +#ifndef __diag_GCC +#define __diag_GCC(version, severity, string) +#endif + +#define __diag_push() __diag(push) +#define __diag_pop() __diag(pop) + +#define __diag_ignore(compiler, version, option, comment) \ + __diag_ ## compiler(version, ignore, option) +#define __diag_warn(compiler, version, option, comment) \ + __diag_ ## compiler(version, warn, option) +#define __diag_error(compiler, version, option, comment) \ + __diag_ ## compiler(version, error, option) + #endif /* __LINUX_COMPILER_TYPES_H */ -- cgit v1.2.3 From bee20031772af3debe8cbaa234528f24c7892e8f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 19 Jun 2018 13:14:57 -0700 Subject: disable -Wattribute-alias warning for SYSCALL_DEFINEx() gcc-8 warns for every single definition of a system call entry point, e.g.: include/linux/compat.h:56:18: error: 'compat_sys_rt_sigprocmask' alias between functions of incompatible types 'long int(int, compat_sigset_t *, compat_sigset_t *, compat_size_t)' {aka 'long int(int, struct *, struct *, unsigned int)'} and 'long int(long int, long int, long int, long int)' [-Werror=attribute-alias] asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))\ ^~~~~~~~~~ include/linux/compat.h:45:2: note: in expansion of macro 'COMPAT_SYSCALL_DEFINEx' COMPAT_SYSCALL_DEFINEx(4, _##name, __VA_ARGS__) ^~~~~~~~~~~~~~~~~~~~~~ kernel/signal.c:2601:1: note: in expansion of macro 'COMPAT_SYSCALL_DEFINE4' COMPAT_SYSCALL_DEFINE4(rt_sigprocmask, int, how, compat_sigset_t __user *, nset, ^~~~~~~~~~~~~~~~~~~~~~ include/linux/compat.h:60:18: note: aliased declaration here asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))\ ^~~~~~~~~~ The new warning seems reasonable in principle, but it doesn't help us here, since we rely on the type mismatch to sanitize the system call arguments. After I reported this as GCC PR82435, a new -Wno-attribute-alias option was added that could be used to turn the warning off globally on the command line, but I'd prefer to do it a little more fine-grained. Interestingly, turning a warning off and on again inside of a single macro doesn't always work, in this case I had to add an extra statement inbetween and decided to copy the __SC_TEST one from the native syscall to the compat syscall macro. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83256 for more details about this. [paul.burton@mips.com: - Rebase atop current master. - Split GCC & version arguments to __diag_ignore() in order to match changes to the preceding patch. - Add the comment argument to match the preceding patch.] Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82435 Signed-off-by: Arnd Bergmann Signed-off-by: Paul Burton Tested-by: Christophe Leroy Tested-by: Stafford Horne Signed-off-by: Masahiro Yamada --- include/linux/compat.h | 8 +++++++- include/linux/syscalls.h | 4 ++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index b1a5562b3215..c68acc47da57 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -72,6 +72,9 @@ */ #ifndef COMPAT_SYSCALL_DEFINEx #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ + __diag_push(); \ + __diag_ignore(GCC, 8, "-Wattribute-alias", \ + "Type aliasing is used to sanitize syscall arguments");\ asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ __attribute__((alias(__stringify(__se_compat_sys##name)))); \ @@ -80,8 +83,11 @@ asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ - return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));\ + long ret = __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));\ + __MAP(x,__SC_TEST,__VA_ARGS__); \ + return ret; \ } \ + __diag_pop(); \ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #endif /* COMPAT_SYSCALL_DEFINEx */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 73810808cdf2..a368a68cb667 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -231,6 +231,9 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) */ #ifndef __SYSCALL_DEFINEx #define __SYSCALL_DEFINEx(x, name, ...) \ + __diag_push(); \ + __diag_ignore(GCC, 8, "-Wattribute-alias", \ + "Type aliasing is used to sanitize syscall arguments");\ asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ __attribute__((alias(__stringify(__se_sys##name)))); \ ALLOW_ERROR_INJECTION(sys##name, ERRNO); \ @@ -243,6 +246,7 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \ return ret; \ } \ + __diag_pop(); \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #endif /* __SYSCALL_DEFINEx */ -- cgit v1.2.3 From fdb5c4531c1e0e50e609df83f736b6f3a02896e2 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Tue, 19 Jun 2018 00:04:24 +0100 Subject: bpf: fix attach type BPF_LIRC_MODE2 dependency wrt CONFIG_CGROUP_BPF If the kernel is compiled with CONFIG_CGROUP_BPF not enabled, it is not possible to attach, detach or query IR BPF programs to /dev/lircN devices, making them impossible to use. For embedded devices, it should be possible to use IR decoding without cgroups or CONFIG_CGROUP_BPF enabled. This change requires some refactoring, since bpf_prog_{attach,detach,query} functions are now always compiled, but their code paths for cgroups need moving out. Rather than a #ifdef CONFIG_CGROUP_BPF in kernel/bpf/syscall.c, moving them to kernel/bpf/cgroup.c and kernel/bpf/sockmap.c does not require #ifdefs since that is already conditionally compiled. Fixes: f4364dcfc86d ("media: rc: introduce BPF_PROG_LIRC_MODE2") Signed-off-by: Sean Young Signed-off-by: Daniel Borkmann --- include/linux/bpf-cgroup.h | 26 ++++++++++++++++++++++++++ include/linux/bpf.h | 8 ++++++++ include/linux/bpf_lirc.h | 5 +++-- 3 files changed, 37 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 975fb4cf1bb7..79795c5fa7c3 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -188,12 +188,38 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, \ __ret; \ }) +int cgroup_bpf_prog_attach(const union bpf_attr *attr, + enum bpf_prog_type ptype, struct bpf_prog *prog); +int cgroup_bpf_prog_detach(const union bpf_attr *attr, + enum bpf_prog_type ptype); +int cgroup_bpf_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr); #else +struct bpf_prog; struct cgroup_bpf {}; static inline void cgroup_bpf_put(struct cgroup *cgrp) {} static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } +static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr, + enum bpf_prog_type ptype, + struct bpf_prog *prog) +{ + return -EINVAL; +} + +static inline int cgroup_bpf_prog_detach(const union bpf_attr *attr, + enum bpf_prog_type ptype) +{ + return -EINVAL; +} + +static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + return -EINVAL; +} + #define cgroup_bpf_enabled (0) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7df32a3200f7..8827e797ff97 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -696,6 +696,8 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key); int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type); +int sockmap_get_from_fd(const union bpf_attr *attr, int type, + struct bpf_prog *prog); #else static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key) { @@ -714,6 +716,12 @@ static inline int sock_map_prog(struct bpf_map *map, { return -EOPNOTSUPP; } + +static inline int sockmap_get_from_fd(const union bpf_attr *attr, int type, + struct bpf_prog *prog) +{ + return -EINVAL; +} #endif #if defined(CONFIG_XDP_SOCKETS) diff --git a/include/linux/bpf_lirc.h b/include/linux/bpf_lirc.h index 5f8a4283092d..9d9ff755ec29 100644 --- a/include/linux/bpf_lirc.h +++ b/include/linux/bpf_lirc.h @@ -5,11 +5,12 @@ #include #ifdef CONFIG_BPF_LIRC_MODE2 -int lirc_prog_attach(const union bpf_attr *attr); +int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); int lirc_prog_detach(const union bpf_attr *attr); int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); #else -static inline int lirc_prog_attach(const union bpf_attr *attr) +static inline int lirc_prog_attach(const union bpf_attr *attr, + struct bpf_prog *prog) { return -EINVAL; } -- cgit v1.2.3 From 15bfd21fbc5d35834b9ea383dc458a1f0c9e3434 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 26 Jun 2018 09:14:58 -0600 Subject: block: Fix transfer when chunk sectors exceeds max A device may have boundary restrictions where the number of sectors between boundaries exceeds its max transfer size. In this case, we need to cap the max size to the smaller of the two limits. Reported-by: Jitendra Bhivare Tested-by: Jitendra Bhivare Cc: Reviewed-by: Martin K. Petersen Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9154570edf29..79226ca8f80f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1119,8 +1119,8 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q, if (!q->limits.chunk_sectors) return q->limits.max_sectors; - return q->limits.chunk_sectors - - (offset & (q->limits.chunk_sectors - 1)); + return min(q->limits.max_sectors, (unsigned int)(q->limits.chunk_sectors - + (offset & (q->limits.chunk_sectors - 1)))); } static inline unsigned int blk_rq_get_max_sectors(struct request *rq, -- cgit v1.2.3 From 0efc8562491b7d36f6bbc4fbc8f3348cb6641e9c Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 31 May 2018 11:16:18 +0300 Subject: net/mlx5: E-Switch, Avoid setup attempt if not being e-switch manager In smartnic env, the host (PF) driver might not be an e-switch manager, hence the FW will err on driver attempts to deal with setting/unsetting the eswitch and as a result the overall setup of sriov will fail. Fix that by avoiding the operation if e-switch management is not allowed for this driver instance. While here, move to use the correct name for the esw manager capability name. Fixes: 81848731ff40 ('net/mlx5: E-Switch, Add SR-IOV (FDB) support') Signed-off-by: Or Gerlitz Reported-by: Guy Kushnir Reviewed-by: Eli Cohen Tested-by: Eli Cohen Signed-off-by: Saeed Mahameed --- include/linux/mlx5/eswitch.h | 2 ++ include/linux/mlx5/mlx5_ifc.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index d3c9db492b30..fab5121ffb8f 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -8,6 +8,8 @@ #include +#define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager) + enum { SRIOV_NONE, SRIOV_LEGACY, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 27134c4fcb76..ac281f5ec9b8 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -922,7 +922,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 vnic_env_queue_counters[0x1]; u8 ets[0x1]; u8 nic_flow_table[0x1]; - u8 eswitch_flow_table[0x1]; + u8 eswitch_manager[0x1]; u8 device_memory[0x1]; u8 mcam_reg[0x1]; u8 pcam_reg[0x1]; -- cgit v1.2.3 From a11e1d432b51f63ba698d044441284a661f01144 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 28 Jun 2018 09:43:44 -0700 Subject: Revert changes to convert to ->poll_mask() and aio IOCB_CMD_POLL The poll() changes were not well thought out, and completely unexplained. They also caused a huge performance regression, because "->poll()" was no longer a trivial file operation that just called down to the underlying file operations, but instead did at least two indirect calls. Indirect calls are sadly slow now with the Spectre mitigation, but the performance problem could at least be largely mitigated by changing the "->get_poll_head()" operation to just have a per-file-descriptor pointer to the poll head instead. That gets rid of one of the new indirections. But that doesn't fix the new complexity that is completely unwarranted for the regular case. The (undocumented) reason for the poll() changes was some alleged AIO poll race fixing, but we don't make the common case slower and more complex for some uncommon special case, so this all really needs way more explanations and most likely a fundamental redesign. [ This revert is a revert of about 30 different commits, not reverted individually because that would just be unnecessarily messy - Linus ] Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 -- include/linux/net.h | 1 - include/linux/poll.h | 12 ++++++------ include/linux/skbuff.h | 3 ++- 4 files changed, 8 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 5c91108846db..d78d146a98da 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1720,8 +1720,6 @@ struct file_operations { int (*iterate) (struct file *, struct dir_context *); int (*iterate_shared) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); - struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t); - __poll_t (*poll_mask) (struct file *, __poll_t); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); diff --git a/include/linux/net.h b/include/linux/net.h index 08b6eb964dd6..6554d3ba4396 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -147,7 +147,6 @@ struct proto_ops { int (*getname) (struct socket *sock, struct sockaddr *addr, int peer); - __poll_t (*poll_mask) (struct socket *sock, __poll_t events); __poll_t (*poll) (struct file *file, struct socket *sock, struct poll_table_struct *wait); int (*ioctl) (struct socket *sock, unsigned int cmd, diff --git a/include/linux/poll.h b/include/linux/poll.h index fdf86b4cbc71..7e0fdcf905d2 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -74,18 +74,18 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc) pt->_key = ~(__poll_t)0; /* all events enabled */ } -static inline bool file_has_poll_mask(struct file *file) +static inline bool file_can_poll(struct file *file) { - return file->f_op->get_poll_head && file->f_op->poll_mask; + return file->f_op->poll; } -static inline bool file_can_poll(struct file *file) +static inline __poll_t vfs_poll(struct file *file, struct poll_table_struct *pt) { - return file->f_op->poll || file_has_poll_mask(file); + if (unlikely(!file->f_op->poll)) + return DEFAULT_POLLMASK; + return file->f_op->poll(file, pt); } -__poll_t vfs_poll(struct file *file, struct poll_table_struct *pt); - struct poll_table_entry { struct file *filp; __poll_t key; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c86885954994..164cdedf6012 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3252,7 +3252,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, int *peeked, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); -__poll_t datagram_poll_mask(struct socket *sock, __poll_t events); +__poll_t datagram_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait); int skb_copy_datagram_iter(const struct sk_buff *from, int offset, struct iov_iter *to, int size); static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset, -- cgit v1.2.3 From d50d82faa0c964e31f7a946ba8aba7c715ca7ab0 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 27 Jun 2018 23:26:09 -0700 Subject: slub: fix failure when we delete and create a slab cache In kernel 4.17 I removed some code from dm-bufio that did slab cache merging (commit 21bb13276768: "dm bufio: remove code that merges slab caches") - both slab and slub support merging caches with identical attributes, so dm-bufio now just calls kmem_cache_create and relies on implicit merging. This uncovered a bug in the slub subsystem - if we delete a cache and immediatelly create another cache with the same attributes, it fails because of duplicate filename in /sys/kernel/slab/. The slub subsystem offloads freeing the cache to a workqueue - and if we create the new cache before the workqueue runs, it complains because of duplicate filename in sysfs. This patch fixes the bug by moving the call of kobject_del from sysfs_slab_remove_workfn to shutdown_cache. kobject_del must be called while we hold slab_mutex - so that the sysfs entry is deleted before a cache with the same attributes could be created. Running device-mapper-test-suite with: dmtest run --suite thin-provisioning -n /commit_failure_causes_fallback/ triggered: Buffer I/O error on dev dm-0, logical block 1572848, async page read device-mapper: thin: 253:1: metadata operation 'dm_pool_alloc_data_block' failed: error = -5 device-mapper: thin: 253:1: aborting current metadata transaction sysfs: cannot create duplicate filename '/kernel/slab/:a-0000144' CPU: 2 PID: 1037 Comm: kworker/u48:1 Not tainted 4.17.0.snitm+ #25 Hardware name: Supermicro SYS-1029P-WTR/X11DDW-L, BIOS 2.0a 12/06/2017 Workqueue: dm-thin do_worker [dm_thin_pool] Call Trace: dump_stack+0x5a/0x73 sysfs_warn_dup+0x58/0x70 sysfs_create_dir_ns+0x77/0x80 kobject_add_internal+0xba/0x2e0 kobject_init_and_add+0x70/0xb0 sysfs_slab_add+0xb1/0x250 __kmem_cache_create+0x116/0x150 create_cache+0xd9/0x1f0 kmem_cache_create_usercopy+0x1c1/0x250 kmem_cache_create+0x18/0x20 dm_bufio_client_create+0x1ae/0x410 [dm_bufio] dm_block_manager_create+0x5e/0x90 [dm_persistent_data] __create_persistent_data_objects+0x38/0x940 [dm_thin_pool] dm_pool_abort_metadata+0x64/0x90 [dm_thin_pool] metadata_operation_failed+0x59/0x100 [dm_thin_pool] alloc_data_block.isra.53+0x86/0x180 [dm_thin_pool] process_cell+0x2a3/0x550 [dm_thin_pool] do_worker+0x28d/0x8f0 [dm_thin_pool] process_one_work+0x171/0x370 worker_thread+0x49/0x3f0 kthread+0xf8/0x130 ret_from_fork+0x35/0x40 kobject_add_internal failed for :a-0000144 with -EEXIST, don't try to register things with the same name in the same directory. kmem_cache_create(dm_bufio_buffer-16) failed with error -17 Link: http://lkml.kernel.org/r/alpine.LRH.2.02.1806151817130.6333@file01.intranet.prod.int.rdu2.redhat.com Signed-off-by: Mikulas Patocka Reported-by: Mike Snitzer Tested-by: Mike Snitzer Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 09fa2c6f0e68..3a1a1dbc6f49 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -155,8 +155,12 @@ struct kmem_cache { #ifdef CONFIG_SYSFS #define SLAB_SUPPORTS_SYSFS +void sysfs_slab_unlink(struct kmem_cache *); void sysfs_slab_release(struct kmem_cache *); #else +static inline void sysfs_slab_unlink(struct kmem_cache *s) +{ +} static inline void sysfs_slab_release(struct kmem_cache *s) { } -- cgit v1.2.3 From f77bc3a82ce58fe7977a11f28e0b6cac8a9e087c Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Wed, 27 Jun 2018 23:26:17 -0700 Subject: include/linux/dax.h: dax_iomap_fault() returns vm_fault_t Commit 1c8f422059ae ("mm: change return type to vm_fault_t") missed a conversion. It's not a big problem at present because mainline is still using typedef int vm_fault_t; Fixes: 1c8f422059ae ("mm: change return type to vm_fault_t") Link: http://lkml.kernel.org/r/20180620172046.GA27894@jordon-HP-15-Notebook-PC Signed-off-by: Souptick Joarder Reviewed-by: Andrew Morton Cc: Matthew Wilcox Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dax.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 3855e3800f48..deb0f663252f 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -135,7 +135,7 @@ void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops); -int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, +vm_fault_t dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, pfn_t *pfnp, int *errp, const struct iomap_ops *ops); vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size, pfn_t pfn); -- cgit v1.2.3 From 9544bc5347207a68eb308cc8aaaed6c3a687cabd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 29 Jun 2018 08:48:06 -0600 Subject: sg: remove ->sg_magic member This was introduced more than a decade ago when sg chaining was added, but we never really caught anything with it. The scatterlist entry size can be critical, since drivers allocate it, so remove the magic member. Recently it's been triggering allocation stalls and failures in NVMe. Tested-by: Jordan Glover Acked-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/scatterlist.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 51f52020ad5f..093aa57120b0 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -9,9 +9,6 @@ #include struct scatterlist { -#ifdef CONFIG_DEBUG_SG - unsigned long sg_magic; -#endif unsigned long page_link; unsigned int offset; unsigned int length; @@ -64,7 +61,6 @@ struct sg_table { * */ -#define SG_MAGIC 0x87654321 #define SG_CHAIN 0x01UL #define SG_END 0x02UL @@ -98,7 +94,6 @@ static inline void sg_assign_page(struct scatterlist *sg, struct page *page) */ BUG_ON((unsigned long) page & (SG_CHAIN | SG_END)); #ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); BUG_ON(sg_is_chain(sg)); #endif sg->page_link = page_link | (unsigned long) page; @@ -129,7 +124,6 @@ static inline void sg_set_page(struct scatterlist *sg, struct page *page, static inline struct page *sg_page(struct scatterlist *sg) { #ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); BUG_ON(sg_is_chain(sg)); #endif return (struct page *)((sg)->page_link & ~(SG_CHAIN | SG_END)); @@ -195,9 +189,6 @@ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, **/ static inline void sg_mark_end(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif /* * Set termination bit, clear potential chain bit */ @@ -215,9 +206,6 @@ static inline void sg_mark_end(struct scatterlist *sg) **/ static inline void sg_unmark_end(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif sg->page_link &= ~SG_END; } @@ -260,12 +248,6 @@ static inline void *sg_virt(struct scatterlist *sg) static inline void sg_init_marker(struct scatterlist *sgl, unsigned int nents) { -#ifdef CONFIG_DEBUG_SG - unsigned int i; - - for (i = 0; i < nents; i++) - sgl[i].sg_magic = SG_MAGIC; -#endif sg_mark_end(&sgl[nents - 1]); } -- cgit v1.2.3 From 85782e037f8aba8922dadb24a1523ca0b82ab8bc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 28 Jun 2018 23:34:59 +0200 Subject: bpf: undo prog rejection on read-only lock failure Partially undo commit 9facc336876f ("bpf: reject any prog that failed read-only lock") since it caused a regression, that is, syzkaller was able to manage to cause a panic via fault injection deep in set_memory_ro() path by letting an allocation fail: In x86's __change_page_attr_set_clr() it was able to change the attributes of the primary mapping but not in the alias mapping via cpa_process_alias(), so the second, inner call to the __change_page_attr() via __change_page_attr_set_clr() had to split a larger page and failed in the alloc_pages() with the artifically triggered allocation error which is then propagated down to the call site. Thus, for set_memory_ro() this means that it returned with an error, but from debugging a probe_kernel_write() revealed EFAULT on that memory since the primary mapping succeeded to get changed. Therefore the subsequent hdr->locked = 0 reset triggered the panic as it was performed on read-only memory, so call-site assumptions were infact wrong to assume that it would either succeed /or/ not succeed at all since there's no such rollback in set_memory_*() calls from partial change of mappings, in other words, we're left in a state that is "half done". A later undo via set_memory_rw() is succeeding though due to matching permissions on that part (aka due to the try_preserve_large_page() succeeding). While reproducing locally with explicitly triggering this error, the initial splitting only happens on rare occasions and in real world it would additionally need oom conditions, but that said, it could partially fail. Therefore, it is definitely wrong to bail out on set_memory_ro() error and reject the program with the set_memory_*() semantics we have today. Shouldn't have gone the extra mile since no other user in tree today infact checks for any set_memory_*() errors, e.g. neither module_enable_ro() / module_disable_ro() for module RO/NX handling which is mostly default these days nor kprobes core with alloc_insn_page() / free_insn_page() as examples that could be invoked long after bootup and original 314beb9bcabf ("x86: bpf_jit_comp: secure bpf jit against spraying attacks") did neither when it got first introduced to BPF so "improving" with bailing out was clearly not right when set_memory_*() cannot handle it today. Kees suggested that if set_memory_*() can fail, we should annotate it with __must_check, and all callers need to deal with it gracefully given those set_memory_*() markings aren't "advisory", but they're expected to actually do what they say. This might be an option worth to move forward in future but would at the same time require that set_memory_*() calls from supporting archs are guaranteed to be "atomic" in that they provide rollback if part of the range fails, once that happened, the transition from RW -> RO could be made more robust that way, while subsequent RO -> RW transition /must/ continue guaranteeing to always succeed the undo part. Reported-by: syzbot+a4eb8c7766952a1ca872@syzkaller.appspotmail.com Reported-by: syzbot+d866d1925855328eac3b@syzkaller.appspotmail.com Fixes: 9facc336876f ("bpf: reject any prog that failed read-only lock") Cc: Laura Abbott Cc: Kees Cook Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 56 ++++++++------------------------------------------ 1 file changed, 8 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 20f2659dd829..300baad62c88 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -470,9 +470,7 @@ struct sock_fprog_kern { }; struct bpf_binary_header { - u16 pages; - u16 locked:1; - + u32 pages; /* Some arches need word alignment for their instructions */ u8 image[] __aligned(4); }; @@ -481,7 +479,7 @@ struct bpf_prog { u16 pages; /* Number of allocated pages */ u16 jited:1, /* Is our filter JIT'ed? */ jit_requested:1,/* archs need to JIT the prog */ - locked:1, /* Program image locked? */ + undo_set_mem:1, /* Passed set_memory_ro() checkpoint */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ dst_needed:1, /* Do we need dst entry? */ @@ -677,46 +675,24 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default) static inline void bpf_prog_lock_ro(struct bpf_prog *fp) { -#ifdef CONFIG_ARCH_HAS_SET_MEMORY - fp->locked = 1; - if (set_memory_ro((unsigned long)fp, fp->pages)) - fp->locked = 0; -#endif + fp->undo_set_mem = 1; + set_memory_ro((unsigned long)fp, fp->pages); } static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) { -#ifdef CONFIG_ARCH_HAS_SET_MEMORY - if (fp->locked) { - WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages)); - /* In case set_memory_rw() fails, we want to be the first - * to crash here instead of some random place later on. - */ - fp->locked = 0; - } -#endif + if (fp->undo_set_mem) + set_memory_rw((unsigned long)fp, fp->pages); } static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) { -#ifdef CONFIG_ARCH_HAS_SET_MEMORY - hdr->locked = 1; - if (set_memory_ro((unsigned long)hdr, hdr->pages)) - hdr->locked = 0; -#endif + set_memory_ro((unsigned long)hdr, hdr->pages); } static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) { -#ifdef CONFIG_ARCH_HAS_SET_MEMORY - if (hdr->locked) { - WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages)); - /* In case set_memory_rw() fails, we want to be the first - * to crash here instead of some random place later on. - */ - hdr->locked = 0; - } -#endif + set_memory_rw((unsigned long)hdr, hdr->pages); } static inline struct bpf_binary_header * @@ -728,22 +704,6 @@ bpf_jit_binary_hdr(const struct bpf_prog *fp) return (void *)addr; } -#ifdef CONFIG_ARCH_HAS_SET_MEMORY -static inline int bpf_prog_check_pages_ro_single(const struct bpf_prog *fp) -{ - if (!fp->locked) - return -ENOLCK; - if (fp->jited) { - const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp); - - if (!hdr->locked) - return -ENOLCK; - } - - return 0; -} -#endif - int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); static inline int sk_filter(struct sock *sk, struct sk_buff *skb) { -- cgit v1.2.3 From 603d4cf8fe095b1ee78f423d514427be507fb513 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Sat, 30 Jun 2018 17:38:55 +0200 Subject: net: fix use-after-free in GRO with ESP Since the addition of GRO for ESP, gro_receive can consume the skb and return -EINPROGRESS. In that case, the lower layer GRO handler cannot touch the skb anymore. Commit 5f114163f2f5 ("net: Add a skb_gro_flush_final helper.") converted some of the gro_receive handlers that can lead to ESP's gro_receive so that they wouldn't access the skb when -EINPROGRESS is returned, but missed other spots, mainly in tunneling protocols. This patch finishes the conversion to using skb_gro_flush_final(), and adds a new helper, skb_gro_flush_final_remcsum(), used in VXLAN and GUE. Fixes: 5f114163f2f5 ("net: Add a skb_gro_flush_final helper.") Signed-off-by: Sabrina Dubroca Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller --- include/linux/netdevice.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3ec9850c7936..3d0cc0b5cec2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2789,11 +2789,31 @@ static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, if (PTR_ERR(pp) != -EINPROGRESS) NAPI_GRO_CB(skb)->flush |= flush; } +static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, + struct sk_buff **pp, + int flush, + struct gro_remcsum *grc) +{ + if (PTR_ERR(pp) != -EINPROGRESS) { + NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_remcsum_cleanup(skb, grc); + skb->remcsum_offload = 0; + } +} #else static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush) { NAPI_GRO_CB(skb)->flush |= flush; } +static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, + struct sk_buff **pp, + int flush, + struct gro_remcsum *grc) +{ + NAPI_GRO_CB(skb)->flush |= flush; + skb_gro_remcsum_cleanup(skb, grc); + skb->remcsum_offload = 0; +} #endif static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, -- cgit v1.2.3 From 240630e61870e62e39a97225048f9945848fa5f5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 1 Jul 2018 12:15:46 +0200 Subject: ahci: Disable LPM on Lenovo 50 series laptops with a too old BIOS There have been several reports of LPM related hard freezes about once a day on multiple Lenovo 50 series models. Strange enough these reports where not disk model specific as LPM issues usually are and some users with the exact same disk + laptop where seeing them while other users where not seeing these issues. It turns out that enabling LPM triggers a firmware bug somewhere, which has been fixed in later BIOS versions. This commit adds a new ahci_broken_lpm() function and a new ATA_FLAG_NO_LPM for dealing with this. The ahci_broken_lpm() function contains DMI match info for the 4 models which are known to be affected by this and the DMI BIOS date field for known good BIOS versions. If the BIOS date is older then the one in the table LPM will be disabled and a warning will be printed. Note the BIOS dates are for known good versions, some older versions may work too, but we don't know for sure, the table is using dates from BIOS versions for which users have confirmed that upgrading to that version makes the problem go away. Unfortunately I've been unable to get hold of the reporter who reported that BIOS version 2.35 fixed the problems on the W541 for him. I've been able to verify the DMI_SYS_VENDOR and DMI_PRODUCT_VERSION from an older dmidecode, but I don't know the exact BIOS date as reported in the DMI. Lenovo keeps a changelog with dates in their release notes, but the dates there are the release dates not the build dates which are in DMI. So I've chosen to set the date to which we compare to one day past the release date of the 2.34 BIOS. I plan to fix this with a follow up commit once I've the necessary info. Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index a2257e380789..32f247cb5e9e 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -210,6 +210,7 @@ enum { ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */ /* (doesn't imply presence) */ ATA_FLAG_SATA = (1 << 1), + ATA_FLAG_NO_LPM = (1 << 2), /* host not happy with LPM */ ATA_FLAG_NO_LOG_PAGE = (1 << 5), /* do not issue log page read */ ATA_FLAG_NO_ATAPI = (1 << 6), /* No ATAPI support */ ATA_FLAG_PIO_DMA = (1 << 7), /* PIO cmds via DMA */ -- cgit v1.2.3 From 1cef1150ef40ec52f507436a14230cbc2623299c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 7 Jun 2018 11:45:49 +0200 Subject: kthread, sched/core: Fix kthread_parkme() (again...) Gaurav reports that commit: 85f1abe0019f ("kthread, sched/wait: Fix kthread_parkme() completion issue") isn't working for him. Because of the following race: > controller Thread CPUHP Thread > takedown_cpu > kthread_park > kthread_parkme > Set KTHREAD_SHOULD_PARK > smpboot_thread_fn > set Task interruptible > > > wake_up_process > if (!(p->state & state)) > goto out; > > Kthread_parkme > SET TASK_PARKED > schedule > raw_spin_lock(&rq->lock) > ttwu_remote > waiting for __task_rq_lock > context_switch > > finish_lock_switch > > > > Case TASK_PARKED > kthread_park_complete > > > SET Running Furthermore, Oleg noticed that the whole scheduler TASK_PARKED handling is buggered because the TASK_DEAD thing is done with preemption disabled, the current code can still complete early on preemption :/ So basically revert that earlier fix and go with a variant of the alternative mentioned in the commit. Promote TASK_PARKED to special state to avoid the store-store issue on task->state leading to the WARN in kthread_unpark() -> __kthread_bind(). But in addition, add wait_task_inactive() to kthread_park() to ensure the task really is PARKED when we return from kthread_park(). This avoids the whole kthread still gets migrated nonsense -- although it would be really good to get this done differently. Reported-by: Gaurav Kohli Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 85f1abe0019f ("kthread, sched/wait: Fix kthread_parkme() completion issue") Signed-off-by: Ingo Molnar --- include/linux/kthread.h | 1 - include/linux/sched.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 2803264c512f..c1961761311d 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -62,7 +62,6 @@ void *kthread_probe_data(struct task_struct *k); int kthread_park(struct task_struct *k); void kthread_unpark(struct task_struct *k); void kthread_parkme(void); -void kthread_park_complete(struct task_struct *k); int kthreadd(void *unused); extern struct task_struct *kthreadd_task; diff --git a/include/linux/sched.h b/include/linux/sched.h index 9256118bd40c..43731fe51c97 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -118,7 +118,7 @@ struct task_group; * the comment with set_special_state(). */ #define is_special_task_state(state) \ - ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_DEAD)) + ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD)) #define __set_current_state(state_value) \ do { \ -- cgit v1.2.3 From d03db2bc26f0e4a6849ad649a09c9c73fccdc656 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 21 Jun 2018 09:23:22 -0700 Subject: compiler-gcc.h: Add __attribute__((gnu_inline)) to all inline declarations Functions marked extern inline do not emit an externally visible function when the gnu89 C standard is used. Some KBUILD Makefiles overwrite KBUILD_CFLAGS. This is an issue for GCC 5.1+ users as without an explicit C standard specified, the default is gnu11. Since c99, the semantics of extern inline have changed such that an externally visible function is always emitted. This can lead to multiple definition errors of extern inline functions at link time of compilation units whose build files have removed an explicit C standard compiler flag for users of GCC 5.1+ or Clang. Suggested-by: Arnd Bergmann Suggested-by: H. Peter Anvin Suggested-by: Joe Perches Signed-off-by: Nick Desaulniers Acked-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@redhat.com Cc: akataria@vmware.com Cc: akpm@linux-foundation.org Cc: andrea.parri@amarulasolutions.com Cc: ard.biesheuvel@linaro.org Cc: aryabinin@virtuozzo.com Cc: astrachan@google.com Cc: boris.ostrovsky@oracle.com Cc: brijesh.singh@amd.com Cc: caoj.fnst@cn.fujitsu.com Cc: geert@linux-m68k.org Cc: ghackmann@google.com Cc: gregkh@linuxfoundation.org Cc: jan.kiszka@siemens.com Cc: jarkko.sakkinen@linux.intel.com Cc: jpoimboe@redhat.com Cc: keescook@google.com Cc: kirill.shutemov@linux.intel.com Cc: kstewart@linuxfoundation.org Cc: linux-efi@vger.kernel.org Cc: linux-kbuild@vger.kernel.org Cc: manojgupta@google.com Cc: mawilcox@microsoft.com Cc: michal.lkml@markovi.net Cc: mjg59@google.com Cc: mka@chromium.org Cc: pombredanne@nexb.com Cc: rientjes@google.com Cc: rostedt@goodmis.org Cc: sedat.dilek@gmail.com Cc: thomas.lendacky@amd.com Cc: tstellar@redhat.com Cc: tweek@google.com Cc: virtualization@lists.linux-foundation.org Cc: will.deacon@arm.com Cc: yamada.masahiro@socionext.com Link: http://lkml.kernel.org/r/20180621162324.36656-2-ndesaulniers@google.com Signed-off-by: Ingo Molnar --- include/linux/compiler-gcc.h | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index fd282c7d3e5e..573f5a7d42d4 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -65,6 +65,18 @@ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) #endif +/* + * Feature detection for gnu_inline (gnu89 extern inline semantics). Either + * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics, + * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not + * defined so the gnu89 semantics are the default. + */ +#ifdef __GNUC_STDC_INLINE__ +# define __gnu_inline __attribute__((gnu_inline)) +#else +# define __gnu_inline +#endif + /* * Force always-inline if the user requests it so via the .config, * or if gcc is too old. @@ -72,19 +84,22 @@ * -Wunused-function. This turns out to avoid the need for complex #ifdef * directives. Suppress the warning in clang as well by using "unused" * function attribute, which is redundant but not harmful for gcc. + * Prefer gnu_inline, so that extern inline functions do not emit an + * externally visible function. This makes extern inline behave as per gnu89 + * semantics rather than c99. This prevents multiple symbol definition errors + * of extern inline functions at link time. + * A lot of inline functions can cause havoc with function tracing. */ #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \ !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4) -#define inline inline __attribute__((always_inline,unused)) notrace -#define __inline__ __inline__ __attribute__((always_inline,unused)) notrace -#define __inline __inline __attribute__((always_inline,unused)) notrace +#define inline \ + inline __attribute__((always_inline, unused)) notrace __gnu_inline #else -/* A lot of inline functions can cause havoc with function tracing */ -#define inline inline __attribute__((unused)) notrace -#define __inline__ __inline__ __attribute__((unused)) notrace -#define __inline __inline __attribute__((unused)) notrace +#define inline inline __attribute__((unused)) notrace __gnu_inline #endif +#define __inline__ inline +#define __inline inline #define __always_inline inline __attribute__((always_inline)) #define noinline __attribute__((noinline)) -- cgit v1.2.3 From 5ccba64a560fa6ca06008d4001f5d46ebeb34b41 Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Fri, 2 Feb 2018 10:14:49 +0800 Subject: ftrace: Nuke clear_ftrace_function clear_ftrace_function is not used outside of ftrace.c and is not help to use a function, so nuke it per Steve's suggestion. Link: http://lkml.kernel.org/r/1517537689-34947-1-git-send-email-xieyisheng1@huawei.com Suggested-by: Steven Rostedt Signed-off-by: Yisheng Xie Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8154f4920fcb..ebb77674be90 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -223,7 +223,6 @@ extern enum ftrace_tracing_type_t ftrace_tracing_type; */ int register_ftrace_function(struct ftrace_ops *ops); int unregister_ftrace_function(struct ftrace_ops *ops); -void clear_ftrace_function(void); extern void ftrace_stub(unsigned long a0, unsigned long a1, struct ftrace_ops *op, struct pt_regs *regs); @@ -239,7 +238,6 @@ static inline int ftrace_nr_registered_ops(void) { return 0; } -static inline void clear_ftrace_function(void) { } static inline void ftrace_kill(void) { } static inline void ftrace_free_init_mem(void) { } static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { } -- cgit v1.2.3 From 543af5861f41af0a5d2432f6fb5976af50f9cee5 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 6 Jul 2018 22:05:38 -0400 Subject: uio: change to use the mutex lock instead of the spin lock We are hitting a regression with the following commit: commit a93e7b331568227500186a465fee3c2cb5dffd1f Author: Hamish Martin Date: Mon May 14 13:32:23 2018 +1200 uio: Prevent device destruction while fds are open The problem is the addition of spin_lock_irqsave in uio_write. This leads to hitting uio_write -> copy_from_user -> _copy_from_user -> might_fault and the logs filling up with sleeping warnings. I also noticed some uio drivers allocate memory, sleep, grab mutexes from callouts like open() and release and uio is now doing spin_lock_irqsave while calling them. Reported-by: Mike Christie CC: Hamish Martin Reviewed-by: Hamish Martin Signed-off-by: Xiubo Li Signed-off-by: Greg Kroah-Hartman --- include/linux/uio_driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 6c5f2074e14f..6f8b68cd460f 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -75,7 +75,7 @@ struct uio_device { struct fasync_struct *async_queue; wait_queue_head_t wait; struct uio_info *info; - spinlock_t info_lock; + struct mutex info_lock; struct kobject *map_dir; struct kobject *portio_dir; }; -- cgit v1.2.3