From 8e742aa79780b13cd300a42198c1a4cea9c89905 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 10 Jun 2020 13:48:51 +0200 Subject: syscalls: Fix offset type of ksys_ftruncate() After the commit below, truncate() on x86 32bit uses ksys_ftruncate(). But ksys_ftruncate() truncates the offset to unsigned long. Switch the type of offset to loff_t which is what do_sys_ftruncate() expects. Fixes: 121b32a58a3a (x86/entry/32: Use IA32-specific wrappers for syscalls taking 64-bit arguments) Signed-off-by: Jiri Slaby Signed-off-by: Thomas Gleixner Reviewed-by: Brian Gerst Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20200610114851.28549-1-jslaby@suse.cz --- include/linux/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 7c354c2955f5..b951a87da987 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -1360,7 +1360,7 @@ static inline long ksys_lchown(const char __user *filename, uid_t user, extern long do_sys_ftruncate(unsigned int fd, loff_t length, int small); -static inline long ksys_ftruncate(unsigned int fd, unsigned long length) +static inline long ksys_ftruncate(unsigned int fd, loff_t length) { return do_sys_ftruncate(fd, length, 1); } -- cgit v1.2.3 From e79302ae8c8cceb51cf642d5ace9da02668cb7b4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2020 17:04:03 +0200 Subject: kcsan: Remove __no_kcsan_or_inline There are no more user of this function attribute, also, with us now actively supporting '__no_kcsan inline' it doesn't make sense to have in any case. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/compiler_types.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 21aed0981edf..938249809511 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -193,10 +193,7 @@ struct ftrace_likely_data { #define __no_kcsan __no_sanitize_thread #ifdef __SANITIZE_THREAD__ -# define __no_kcsan_or_inline __no_kcsan notrace __maybe_unused -# define __no_sanitize_or_inline __no_kcsan_or_inline -#else -# define __no_kcsan_or_inline __always_inline +# define __no_sanitize_or_inline __no_kcsan notrace __maybe_unused #endif #ifndef __no_sanitize_or_inline -- cgit v1.2.3 From 5ddbc4082e1072eeeae52ff561a88620a05be08f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2020 18:47:11 +0200 Subject: x86, kcsan: Add __no_kcsan to noinstr The 'noinstr' function attribute means no-instrumentation, this should very much include *SAN. Because lots of that is broken at present, only include KCSAN for now, as that is limited to clang11, which has sane function attribute behaviour. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/compiler_types.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 938249809511..a8b4266084a1 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -118,10 +118,6 @@ struct ftrace_likely_data { #define notrace __attribute__((__no_instrument_function__)) #endif -/* Section for code which can't be instrumented at all */ -#define noinstr \ - noinline notrace __attribute((__section__(".noinstr.text"))) - /* * it doesn't make sense on ARM (currently the only user of __naked) * to trace naked functions because then mcount is called without @@ -200,6 +196,10 @@ struct ftrace_likely_data { #define __no_sanitize_or_inline __always_inline #endif +/* Section for code which can't be instrumented at all */ +#define noinstr \ + noinline notrace __attribute((__section__(".noinstr.text"))) __no_kcsan + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From 5144f8a8dfd7b3681f0a2b5bf599a210b2315018 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 4 Jun 2020 07:58:11 +0200 Subject: compiler_types.h: Add __no_sanitize_{address,undefined} to noinstr Adds the portable definitions for __no_sanitize_address, and __no_sanitize_undefined, and subsequently changes noinstr to use the attributes to disable instrumentation via KASAN or UBSAN. Reported-by: syzbot+dc1fa714cb070b184db5@syzkaller.appspotmail.com Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Acked-by: Miguel Ojeda Link: https://lore.kernel.org/lkml/000000000000d2474c05a6c938fe@google.com/ --- include/linux/compiler-clang.h | 8 ++++++++ include/linux/compiler-gcc.h | 6 ++++++ include/linux/compiler_types.h | 3 ++- 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index ee37256ec8bd..5e55302e3bf6 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -33,6 +33,14 @@ #define __no_sanitize_thread #endif +#if __has_feature(undefined_behavior_sanitizer) +/* GCC does not have __SANITIZE_UNDEFINED__ */ +#define __no_sanitize_undefined \ + __attribute__((no_sanitize("undefined"))) +#else +#define __no_sanitize_undefined +#endif + /* * Not all versions of clang implement the the type-generic versions * of the builtin overflow checkers. Fortunately, clang implements diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 7dd4e0349ef3..1c74464c80c6 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -150,6 +150,12 @@ #define __no_sanitize_thread #endif +#if __has_attribute(__no_sanitize_undefined__) +#define __no_sanitize_undefined __attribute__((no_sanitize_undefined)) +#else +#define __no_sanitize_undefined +#endif + #if GCC_VERSION >= 50100 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 #endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index a8b4266084a1..85b8d2370c24 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -198,7 +198,8 @@ struct ftrace_likely_data { /* Section for code which can't be instrumented at all */ #define noinstr \ - noinline notrace __attribute((__section__(".noinstr.text"))) __no_kcsan + noinline notrace __attribute((__section__(".noinstr.text"))) \ + __no_kcsan __no_sanitize_address __no_sanitize_undefined #endif /* __KERNEL__ */ -- cgit v1.2.3 From 6b643a07a7e41f9e11cfbb9bba4c5c9791ac2997 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Jun 2020 20:09:06 +0200 Subject: x86/entry, ubsan, objtool: Whitelist __ubsan_handle_*() The UBSAN instrumentation only inserts external CALLs when things go 'BAD', much like WARN(). So treat them similar to WARN()s for noinstr, that is: allow them, at the risk of taking the machine down, to get their message out. Suggested-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Acked-by: Marco Elver --- include/linux/compiler_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 85b8d2370c24..14513e88b7e0 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -199,7 +199,7 @@ struct ftrace_likely_data { /* Section for code which can't be instrumented at all */ #define noinstr \ noinline notrace __attribute((__section__(".noinstr.text"))) \ - __no_kcsan __no_sanitize_address __no_sanitize_undefined + __no_kcsan __no_sanitize_address #endif /* __KERNEL__ */ -- cgit v1.2.3 From 7dfc06a0f25b593a9f51992f540c0f80a57f3629 Mon Sep 17 00:00:00 2001 From: Fabian Vogt Date: Mon, 15 Jun 2020 09:16:36 +0200 Subject: efi/tpm: Verify event log header before parsing It is possible that the first event in the event log is not actually a log header at all, but rather a normal event. This leads to the cast in __calc_tpm2_event_size being an invalid conversion, which means that the values read are effectively garbage. Depending on the first event's contents, this leads either to apparently normal behaviour, a crash or a freeze. While this behaviour of the firmware is not in accordance with the TCG Client EFI Specification, this happens on a Dell Precision 5510 with the TPM enabled but hidden from the OS ("TPM On" disabled, state otherwise untouched). The EFI firmware claims that the TPM is present and active and that it supports the TCG 2.0 event log format. Fortunately, this can be worked around by simply checking the header of the first event and the event log header signature itself. Commit b4f1874c6216 ("tpm: check event log version before reading final events") addressed a similar issue also found on Dell models. Fixes: 6b0326190205 ("efi: Attempt to get the TCG2 event log in the boot stub") Signed-off-by: Fabian Vogt Link: https://lore.kernel.org/r/1927248.evlx2EsYKh@linux-e202.suse.de Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1165773 Signed-off-by: Ard Biesheuvel --- include/linux/tpm_eventlog.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h index 4f8c90c93c29..64356b199e94 100644 --- a/include/linux/tpm_eventlog.h +++ b/include/linux/tpm_eventlog.h @@ -81,6 +81,8 @@ struct tcg_efi_specid_event_algs { u16 digest_size; } __packed; +#define TCG_SPECID_SIG "Spec ID Event03" + struct tcg_efi_specid_event_head { u8 signature[16]; u32 platform_class; @@ -171,6 +173,7 @@ static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, int i; int j; u32 count, event_type; + const u8 zero_digest[sizeof(event_header->digest)] = {0}; marker = event; marker_start = marker; @@ -198,10 +201,19 @@ static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, count = READ_ONCE(event->count); event_type = READ_ONCE(event->event_type); + /* Verify that it's the log header */ + if (event_header->pcr_idx != 0 || + event_header->event_type != NO_ACTION || + memcmp(event_header->digest, zero_digest, sizeof(zero_digest))) { + size = 0; + goto out; + } + efispecid = (struct tcg_efi_specid_event_head *)event_header->event; /* Check if event is malformed. */ - if (count > efispecid->num_algs) { + if (memcmp(efispecid->signature, TCG_SPECID_SIG, + sizeof(TCG_SPECID_SIG)) || count > efispecid->num_algs) { size = 0; goto out; } -- cgit v1.2.3 From 2963795122f50b36ed16e3ba880c3ed2de1bda6e Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 27 May 2020 12:14:25 -0500 Subject: efi: Replace zero-length array and use struct_size() helper The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] sizeof(flexible-array-member) triggers a warning because flexible array members have incomplete type[1]. There are some instances of code in which the sizeof operator is being incorrectly/erroneously applied to zero-length arrays and the result is zero. Such instances may be hiding some bugs. So, this work (flexible-array member conversions) will also help to get completely rid of those sorts of issues. Lastly, make use of the sizeof_field() helper instead of an open-coded version. This issue was found with the help of Coccinelle and audited _manually_. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20200527171425.GA4053@embeddedor Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 2c6495f72f79..c3449c9699d0 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1236,14 +1236,11 @@ struct linux_efi_memreserve { struct { phys_addr_t base; phys_addr_t size; - } entry[0]; + } entry[]; }; -#define EFI_MEMRESERVE_SIZE(count) (sizeof(struct linux_efi_memreserve) + \ - (count) * sizeof(((struct linux_efi_memreserve *)0)->entry[0])) - #define EFI_MEMRESERVE_COUNT(size) (((size) - sizeof(struct linux_efi_memreserve)) \ - / sizeof(((struct linux_efi_memreserve *)0)->entry[0])) + / sizeof_field(struct linux_efi_memreserve, entry[0])) void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size); -- cgit v1.2.3 From 33aea07f30c261eff7ba229f19fd1b161e0fb851 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 16 Jun 2020 01:15:29 +0200 Subject: compiler_attributes.h: Support no_sanitize_undefined check with GCC 4 UBSAN is supported since GCC 4.9, which unfortunately did not yet have __has_attribute(). To work around, the __GCC4_has_attribute workaround requires defining which compiler version supports the given attribute. In the case of no_sanitize_undefined, it is the first version that supports UBSAN, which is GCC 4.9. Reported-by: kernel test robot Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miguel Ojeda Link: https://lkml.kernel.org/r/20200615231529.GA119644@google.com --- include/linux/compiler_attributes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index cdf016596659..c8f03d2969df 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -40,6 +40,7 @@ # define __GCC4_has_attribute___noclone__ 1 # define __GCC4_has_attribute___nonstring__ 0 # define __GCC4_has_attribute___no_sanitize_address__ (__GNUC_MINOR__ >= 8) +# define __GCC4_has_attribute___no_sanitize_undefined__ (__GNUC_MINOR__ >= 9) # define __GCC4_has_attribute___fallthrough__ 0 #endif -- cgit v1.2.3 From 2fd2bc7f49324e7b439e3c5584762abd0a9a13bc Mon Sep 17 00:00:00 2001 From: Colton Lewis Date: Sun, 14 Jun 2020 04:08:04 +0000 Subject: gpu: host1x: Correct trivial kernel-doc inconsistencies Silence documentation build warnings by adding kernel-doc fields. ./include/linux/host1x.h:69: warning: Function parameter or member 'parent' not described in 'host1x_client' ./include/linux/host1x.h:69: warning: Function parameter or member 'usecount' not described in 'host1x_client' ./include/linux/host1x.h:69: warning: Function parameter or member 'lock' not described in 'host1x_client' Signed-off-by: Colton Lewis Signed-off-by: Thierry Reding --- include/linux/host1x.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index c230b4e70d75..a3a568bf9686 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -48,6 +48,9 @@ struct host1x_client_ops { * @channel: host1x channel associated with this client * @syncpts: array of syncpoints requested for this client * @num_syncpts: number of syncpoints requested for this client + * @parent: pointer to parent structure + * @usecount: reference count for this structure + * @lock: mutex for mutually exclusive concurrency */ struct host1x_client { struct list_head list; -- cgit v1.2.3 From 26749b3201ab05e288fbf78fbc8585dfa2da3218 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 15 Jun 2020 08:52:31 +0200 Subject: dma-direct: mark __dma_direct_alloc_pages static Signed-off-by: Christoph Hellwig --- include/linux/dma-direct.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 136f984df0d9..cdfa400f89b3 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -77,8 +77,6 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); -struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, - gfp_t gfp, unsigned long attrs); int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); -- cgit v1.2.3 From 2a55280a3675203496d302463b941834228b9875 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 7 Jun 2020 15:41:35 +0200 Subject: efi/libstub: arm: Print CPU boot mode and MMU state at boot On 32-bit ARM, we may boot at HYP mode, or with the MMU and caches off (or both), even though the EFI spec does not actually support this. While booting at HYP mode is something we might tolerate, fiddling with the caches is a more serious issue, as disabling the caches is tricky to do safely from C code, and running without the Dcache makes it impossible to support unaligned memory accesses, which is another explicit requirement imposed by the EFI spec. So take note of the CPU mode and MMU state in the EFI stub diagnostic output so that we can easily diagnose any issues that may arise from this. E.g., EFI stub: Entering in SVC mode with MMU enabled Also, capture the CPSR and SCTLR system register values at EFI stub entry, and after ExitBootServices() returns, and check whether the MMU and Dcache were disabled at any point. If this is the case, a diagnostic message like the following will be emitted: efi: [Firmware Bug]: EFI stub was entered with MMU and Dcache disabled, please fix your firmware! efi: CPSR at EFI stub entry : 0x600001d3 efi: SCTLR at EFI stub entry : 0x00c51838 efi: CPSR after ExitBootServices() : 0x600001d3 efi: SCTLR after ExitBootServices(): 0x00c50838 Signed-off-by: Ard Biesheuvel Reviewed-by: Leif Lindholm --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index c3449c9699d0..bb35f3305e55 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -350,6 +350,7 @@ void efi_native_runtime_setup(void); * associated with ConOut */ #define LINUX_EFI_ARM_SCREEN_INFO_TABLE_GUID EFI_GUID(0xe03fc20a, 0x85dc, 0x406e, 0xb9, 0x0e, 0x4a, 0xb5, 0x02, 0x37, 0x1d, 0x95) +#define LINUX_EFI_ARM_CPU_STATE_TABLE_GUID EFI_GUID(0xef79e4aa, 0x3c3d, 0x4989, 0xb9, 0x02, 0x07, 0xa9, 0x43, 0xe5, 0x50, 0xd2) #define LINUX_EFI_LOADER_ENTRY_GUID EFI_GUID(0x4a67b082, 0x0a4c, 0x41cf, 0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f) #define LINUX_EFI_RANDOM_SEED_TABLE_GUID EFI_GUID(0x1ce1e5bc, 0x7ceb, 0x42f2, 0x81, 0xe5, 0x8a, 0xad, 0xf1, 0x80, 0xf5, 0x7b) #define LINUX_EFI_TPM_EVENT_LOG_GUID EFI_GUID(0xb7799cb0, 0xeca2, 0x4943, 0x96, 0x67, 0x1f, 0xae, 0x07, 0xb7, 0x47, 0xfa) -- cgit v1.2.3 From f097eb38f71391ff2cf078788bad5a00eb3bd96a Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Tue, 9 Jun 2020 10:17:26 +0200 Subject: timekeeping: Fix kerneldoc system_device_crosststamp & al Make kernel doc comments actually work and fix the syncronized typo. [ tglx: Added the missing /** and fixed up formatting ] Signed-off-by: Kurt Kanzenbach Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200609081726.5657-1-kurt@linutronix.de --- include/linux/timekeeping.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index b27e2ffa96c1..d5471d6fa778 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -222,9 +222,9 @@ extern bool timekeeping_rtc_skipresume(void); extern void timekeeping_inject_sleeptime64(const struct timespec64 *delta); -/* +/** * struct system_time_snapshot - simultaneous raw/real time capture with - * counter value + * counter value * @cycles: Clocksource counter value to produce the system times * @real: Realtime system time * @raw: Monotonic raw system time @@ -239,9 +239,9 @@ struct system_time_snapshot { u8 cs_was_changed_seq; }; -/* +/** * struct system_device_crosststamp - system/device cross-timestamp - * (syncronized capture) + * (synchronized capture) * @device: Device time * @sys_realtime: Realtime simultaneous with device time * @sys_monoraw: Monotonic raw simultaneous with device time @@ -252,12 +252,12 @@ struct system_device_crosststamp { ktime_t sys_monoraw; }; -/* +/** * struct system_counterval_t - system counter value with the pointer to the - * corresponding clocksource + * corresponding clocksource * @cycles: System counter value * @cs: Clocksource corresponding to system counter value. Used by - * timekeeping code to verify comparibility of two cycle values + * timekeeping code to verify comparibility of two cycle values */ struct system_counterval_t { u64 cycles; -- cgit v1.2.3 From ab183d460daac6292cb0cfd989d88b37b2437844 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 16 Jun 2020 13:45:36 +0300 Subject: RDMA/mlx5: Add missed RST2INIT and INIT2INIT steps during ECE handshake Missed steps during ECE handshake left userspace application with less options for the ECE handshake. Pass ECE options in the additional transitions. Fixes: 50aec2c3135e ("RDMA/mlx5: Return ECE data after modify QP") Link: https://lore.kernel.org/r/20200616104536.2426384-1-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/mlx5_ifc.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 116bd9bb347f..ca1887dd0423 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4283,7 +4283,8 @@ struct mlx5_ifc_rst2init_qp_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x20]; + u8 ece[0x20]; }; struct mlx5_ifc_rst2init_qp_in_bits { @@ -4300,7 +4301,7 @@ struct mlx5_ifc_rst2init_qp_in_bits { u8 opt_param_mask[0x20]; - u8 reserved_at_a0[0x20]; + u8 ece[0x20]; struct mlx5_ifc_qpc_bits qpc; @@ -6619,7 +6620,8 @@ struct mlx5_ifc_init2init_qp_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x40]; + u8 reserved_at_40[0x20]; + u8 ece[0x20]; }; struct mlx5_ifc_init2init_qp_in_bits { @@ -6636,7 +6638,7 @@ struct mlx5_ifc_init2init_qp_in_bits { u8 opt_param_mask[0x20]; - u8 reserved_at_a0[0x20]; + u8 ece[0x20]; struct mlx5_ifc_qpc_bits qpc; -- cgit v1.2.3 From fb7861d14c8d7edac65b2fcb6e8031cb138457b2 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 16 Jun 2020 15:52:05 +0000 Subject: net: core: reduce recursion limit value In the current code, ->ndo_start_xmit() can be executed recursively only 10 times because of stack memory. But, in the case of the vxlan, 10 recursion limit value results in a stack overflow. In the current code, the nested interface is limited by 8 depth. There is no critical reason that the recursion limitation value should be 10. So, it would be good to be the same value with the limitation value of nesting interface depth. Test commands: ip link add vxlan10 type vxlan vni 10 dstport 4789 srcport 4789 4789 ip link set vxlan10 up ip a a 192.168.10.1/24 dev vxlan10 ip n a 192.168.10.2 dev vxlan10 lladdr fc:22:33:44:55:66 nud permanent for i in {9..0} do let A=$i+1 ip link add vxlan$i type vxlan vni $i dstport 4789 srcport 4789 4789 ip link set vxlan$i up ip a a 192.168.$i.1/24 dev vxlan$i ip n a 192.168.$i.2 dev vxlan$i lladdr fc:22:33:44:55:66 nud permanent bridge fdb add fc:22:33:44:55:66 dev vxlan$A dst 192.168.$i.2 self done hping3 192.168.10.2 -2 -d 60000 Splat looks like: [ 103.814237][ T1127] ============================================================================= [ 103.871955][ T1127] BUG kmalloc-2k (Tainted: G B ): Padding overwritten. 0x00000000897a2e4f-0x000 [ 103.873187][ T1127] ----------------------------------------------------------------------------- [ 103.873187][ T1127] [ 103.874252][ T1127] INFO: Slab 0x000000005cccc724 objects=5 used=5 fp=0x0000000000000000 flags=0x10000000001020 [ 103.881323][ T1127] CPU: 3 PID: 1127 Comm: hping3 Tainted: G B 5.7.0+ #575 [ 103.882131][ T1127] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 103.883006][ T1127] Call Trace: [ 103.883324][ T1127] dump_stack+0x96/0xdb [ 103.883716][ T1127] slab_err+0xad/0xd0 [ 103.884106][ T1127] ? _raw_spin_unlock+0x1f/0x30 [ 103.884620][ T1127] ? get_partial_node.isra.78+0x140/0x360 [ 103.885214][ T1127] slab_pad_check.part.53+0xf7/0x160 [ 103.885769][ T1127] ? pskb_expand_head+0x110/0xe10 [ 103.886316][ T1127] check_slab+0x97/0xb0 [ 103.886763][ T1127] alloc_debug_processing+0x84/0x1a0 [ 103.887308][ T1127] ___slab_alloc+0x5a5/0x630 [ 103.887765][ T1127] ? pskb_expand_head+0x110/0xe10 [ 103.888265][ T1127] ? lock_downgrade+0x730/0x730 [ 103.888762][ T1127] ? pskb_expand_head+0x110/0xe10 [ 103.889244][ T1127] ? __slab_alloc+0x3e/0x80 [ 103.889675][ T1127] __slab_alloc+0x3e/0x80 [ 103.890108][ T1127] __kmalloc_node_track_caller+0xc7/0x420 [ ... ] Fixes: 11a766ce915f ("net: Increase xmit RECURSION_LIMIT to 10.") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6fc613ed8eae..39e28e11863c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3157,7 +3157,7 @@ static inline int dev_recursion_level(void) return this_cpu_read(softnet_data.xmit.recursion); } -#define XMIT_RECURSION_LIMIT 10 +#define XMIT_RECURSION_LIMIT 8 static inline bool dev_xmit_recursion(void) { return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > -- cgit v1.2.3 From 4bc799dcb67066e0531004d5bdbe755bb02b5488 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 15 Jun 2020 11:12:32 -0700 Subject: security: fix the key_permission LSM hook function type Commit 8c0637e950d6 ("keys: Make the KEY_NEED_* perms an enum rather than a mask") changed the type of the key_permission callback functions, but didn't change the type of the hook, which trips indirect call checking with Control-Flow Integrity (CFI). This change fixes the issue by changing the hook type to match the functions. Fixes: 8c0637e950d6 ("keys: Make the KEY_NEED_* perms an enum rather than a mask") Signed-off-by: Sami Tolvanen Acked-by: Kees Cook Signed-off-by: James Morris --- include/linux/lsm_hook_defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 6791813cd439..24f6683f1cfc 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -360,7 +360,7 @@ LSM_HOOK(int, 0, key_alloc, struct key *key, const struct cred *cred, unsigned long flags) LSM_HOOK(void, LSM_RET_VOID, key_free, struct key *key) LSM_HOOK(int, 0, key_permission, key_ref_t key_ref, const struct cred *cred, - unsigned perm) + enum key_need_perm need_perm) LSM_HOOK(int, 0, key_getsecurity, struct key *key, char **_buffer) #endif /* CONFIG_KEYS */ -- cgit v1.2.3 From 16ecf10e815d70d11d2300243f4a3b4c7c5acac7 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 23 Jun 2020 07:13:41 +0800 Subject: iommu/vt-d: Set U/S bit in first level page table by default When using first-level translation for IOVA, currently the U/S bit in the page table is cleared which implies DMA requests with user privilege are blocked. As the result, following error messages might be observed when passing through a device to user level: DMAR: DRHD: handling fault status reg 3 DMAR: [DMA Read] Request device [41:00.0] PASID 1 fault addr 7ecdcd000 [fault reason 129] SM: U/S set 0 for first-level translation with user privilege This fixes it by setting U/S bit in the first level page table and makes IOVA over first level compatible with previous second-level translation. Fixes: b802d070a52a1 ("iommu/vt-d: Use iova over first level") Reported-by: Xin Zeng Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20200622231345.29722-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 4100bd224f5c..3e8fa1c7a1e6 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -41,6 +41,7 @@ #define DMA_PTE_SNP BIT_ULL(11) #define DMA_FL_PTE_PRESENT BIT_ULL(0) +#define DMA_FL_PTE_US BIT_ULL(2) #define DMA_FL_PTE_XD BIT_ULL(63) #define ADDR_WIDTH_5LEVEL (57) -- cgit v1.2.3 From 97dd1abd026ae4e6a82fa68645928404ad483409 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 23 Jun 2020 16:51:29 +0300 Subject: net: qed: fix left elements count calculation qed_chain_get_element_left{,_u32} returned 0 when the difference between producer and consumer page count was equal to the total page count. Fix this by conditional expanding of producer value (vs unconditional). This allowed to eliminate normalizaton against total page count, which was the cause of this bug. Misc: replace open-coded constants with common defines. Fixes: a91eb52abb50 ("qed: Revisit chain implementation") Signed-off-by: Alexander Lobakin Signed-off-by: Igor Russkikh Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- include/linux/qed/qed_chain.h | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 733fad7dfbed..6d15040c642c 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -207,28 +207,34 @@ static inline u32 qed_chain_get_cons_idx_u32(struct qed_chain *p_chain) static inline u16 qed_chain_get_elem_left(struct qed_chain *p_chain) { + u16 elem_per_page = p_chain->elem_per_page; + u32 prod = p_chain->u.chain16.prod_idx; + u32 cons = p_chain->u.chain16.cons_idx; u16 used; - used = (u16) (((u32)0x10000 + - (u32)p_chain->u.chain16.prod_idx) - - (u32)p_chain->u.chain16.cons_idx); + if (prod < cons) + prod += (u32)U16_MAX + 1; + + used = (u16)(prod - cons); if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR) - used -= p_chain->u.chain16.prod_idx / p_chain->elem_per_page - - p_chain->u.chain16.cons_idx / p_chain->elem_per_page; + used -= prod / elem_per_page - cons / elem_per_page; return (u16)(p_chain->capacity - used); } static inline u32 qed_chain_get_elem_left_u32(struct qed_chain *p_chain) { + u16 elem_per_page = p_chain->elem_per_page; + u64 prod = p_chain->u.chain32.prod_idx; + u64 cons = p_chain->u.chain32.cons_idx; u32 used; - used = (u32) (((u64)0x100000000ULL + - (u64)p_chain->u.chain32.prod_idx) - - (u64)p_chain->u.chain32.cons_idx); + if (prod < cons) + prod += (u64)U32_MAX + 1; + + used = (u32)(prod - cons); if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR) - used -= p_chain->u.chain32.prod_idx / p_chain->elem_per_page - - p_chain->u.chain32.cons_idx / p_chain->elem_per_page; + used -= (u32)(prod / elem_per_page - cons / elem_per_page); return p_chain->capacity - used; } -- cgit v1.2.3 From 23e390cdbe6f85827a43d38f9288dcd3066fa376 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Mon, 22 Jun 2020 00:21:35 +0200 Subject: security: Fix hook iteration and default value for inode_copy_up_xattr inode_copy_up_xattr returns 0 to indicate the acceptance of the xattr and 1 to reject it. If the LSM does not know about the xattr, it's expected to return -EOPNOTSUPP, which is the correct default value for this hook. BPF LSM, currently, uses 0 as the default value and thereby falsely allows all overlay fs xattributes to be copied up. The iteration logic is also updated from the "bail-on-fail" call_int_hook to continue on the non-decisive -EOPNOTSUPP and bail out on other values. Fixes: 98e828a0650f ("security: Refactor declaration of LSM hooks") Signed-off-by: KP Singh Signed-off-by: James Morris --- include/linux/lsm_hook_defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 24f6683f1cfc..af998f93d256 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -150,7 +150,7 @@ LSM_HOOK(int, 0, inode_listsecurity, struct inode *inode, char *buffer, size_t buffer_size) LSM_HOOK(void, LSM_RET_VOID, inode_getsecid, struct inode *inode, u32 *secid) LSM_HOOK(int, 0, inode_copy_up, struct dentry *src, struct cred **new) -LSM_HOOK(int, 0, inode_copy_up_xattr, const char *name) +LSM_HOOK(int, -EOPNOTSUPP, inode_copy_up_xattr, const char *name) LSM_HOOK(int, 0, kernfs_init_security, struct kernfs_node *kn_dir, struct kernfs_node *kn) LSM_HOOK(int, 0, file_permission, struct file *file, int mask) -- cgit v1.2.3 From aad4b4d15f30de087c5972cfb767fadb5dbc3c52 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 Jun 2020 09:13:02 +0200 Subject: scsi: libata: Fix the ata_scsi_dma_need_drain stub We not only need the stub when libata is disabled, but also if it is modular and there are built-in SAS drivers (which can happen when SCSI_SAS_ATA is disabled). Link: https://lore.kernel.org/r/20200620071302.462974-2-hch@lst.de Fixes: b8f1d1e05817 ("scsi: Wire up ata_scsi_dma_need_drain for SAS HBA drivers") Signed-off-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 042e584daca7..c57bf6749681 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1092,7 +1092,7 @@ extern int ata_scsi_ioctl(struct scsi_device *dev, unsigned int cmd, #define ATA_SCSI_COMPAT_IOCTL /* empty */ #endif extern int ata_scsi_queuecmd(struct Scsi_Host *h, struct scsi_cmnd *cmd); -#if IS_ENABLED(CONFIG_ATA) +#if IS_REACHABLE(CONFIG_ATA) bool ata_scsi_dma_need_drain(struct request *rq); #else #define ata_scsi_dma_need_drain NULL -- cgit v1.2.3 From 3dd4ef1bdbac959bb20faec93937720ddd9917c6 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Wed, 24 Jun 2020 15:58:24 +0800 Subject: net: phy: make phy_disable_interrupts() non-static We face an issue with rtl8211f, a pin is shared between INTB and PMEB, and the PHY Register Accessible Interrupt is enabled by default, so the INTB/PMEB pin is always active in polling mode case. As Heiner pointed out "I was thinking about calling phy_disable_interrupts() in phy_init_hw(), to have a defined init state as we don't know in which state the PHY is if the PHY driver is loaded. We shouldn't assume that it's the chip power-on defaults, BIOS or boot loader could have changed this. Or in case of dual-boot systems the other OS could leave the PHY in whatever state." Make phy_disable_interrupts() non-static so that it could be used in phy_init_hw() to have a defined init state. Suggested-by: Heiner Kallweit Signed-off-by: Jisheng Zhang Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 8c05d0fb5c00..b693b609b2f5 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1416,6 +1416,7 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev, int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd); int phy_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); int phy_do_ioctl_running(struct net_device *dev, struct ifreq *ifr, int cmd); +int phy_disable_interrupts(struct phy_device *phydev); void phy_request_interrupt(struct phy_device *phydev); void phy_free_interrupt(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); -- cgit v1.2.3 From 1cbf90985f7448f1b0dd630e17ee1070f7d58665 Mon Sep 17 00:00:00 2001 From: David Wilder Date: Mon, 22 Jun 2020 10:10:11 -0700 Subject: netfilter: iptables: Split ipt_unregister_table() into pre_exit and exit helpers. The pre_exit will un-register the underlying hook and .exit will do the table freeing. The netns core does an unconditional synchronize_rcu after the pre_exit hooks insuring no packets are in flight that have picked up the pointer before completing the un-register. Fixes: b9e69e127397 ("netfilter: xtables: don't hook tables by default") Signed-off-by: David Wilder Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv4/ip_tables.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index b394bd4f68a3..c4676d6feeff 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -25,6 +25,12 @@ int ipt_register_table(struct net *net, const struct xt_table *table, const struct ipt_replace *repl, const struct nf_hook_ops *ops, struct xt_table **res); + +void ipt_unregister_table_pre_exit(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops); + +void ipt_unregister_table_exit(struct net *net, struct xt_table *table); + void ipt_unregister_table(struct net *net, struct xt_table *table, const struct nf_hook_ops *ops); -- cgit v1.2.3 From 57ea5f18882a3d7cf6135fa8c949a37c89395837 Mon Sep 17 00:00:00 2001 From: David Wilder Date: Mon, 22 Jun 2020 10:10:13 -0700 Subject: netfilter: ip6tables: Split ip6t_unregister_table() into pre_exit and exit helpers. The pre_exit will un-register the underlying hook and .exit will do the table freeing. The netns core does an unconditional synchronize_rcu after the pre_exit hooks insuring no packets are in flight that have picked up the pointer before completing the un-register. Fixes: b9e69e127397 ("netfilter: xtables: don't hook tables by default") Signed-off-by: David Wilder Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6/ip6_tables.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 8225f7821a29..1547d5f9ae06 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -29,6 +29,9 @@ int ip6t_register_table(struct net *net, const struct xt_table *table, const struct nf_hook_ops *ops, struct xt_table **res); void ip6t_unregister_table(struct net *net, struct xt_table *table, const struct nf_hook_ops *ops); +void ip6t_unregister_table_pre_exit(struct net *net, struct xt_table *table, + const struct nf_hook_ops *ops); +void ip6t_unregister_table_exit(struct net *net, struct xt_table *table); extern unsigned int ip6t_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct xt_table *table); -- cgit v1.2.3 From 0c1a7f13c9ec1ceb18d97ef4b1dd20ec71ffba31 Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Wed, 17 Jun 2020 17:01:32 +0530 Subject: ieee80211: Add missing and new AKM suite selector definitions Add the definitions for missing AKM selectors defined in IEEE P802.11-REVmd/D3.0, table 9-151. These definitions will be used by various drivers that support these new AKM suites. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/20200617113132.13477-1-vjakkam@codeaurora.org Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index fe15f831841b..9f732499ea88 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -3333,13 +3333,17 @@ struct ieee80211_multiple_bssid_configuration { #define WLAN_AKM_SUITE_TDLS SUITE(0x000FAC, 7) #define WLAN_AKM_SUITE_SAE SUITE(0x000FAC, 8) #define WLAN_AKM_SUITE_FT_OVER_SAE SUITE(0x000FAC, 9) +#define WLAN_AKM_SUITE_AP_PEER_KEY SUITE(0x000FAC, 10) #define WLAN_AKM_SUITE_8021X_SUITE_B SUITE(0x000FAC, 11) #define WLAN_AKM_SUITE_8021X_SUITE_B_192 SUITE(0x000FAC, 12) +#define WLAN_AKM_SUITE_FT_8021X_SHA384 SUITE(0x000FAC, 13) #define WLAN_AKM_SUITE_FILS_SHA256 SUITE(0x000FAC, 14) #define WLAN_AKM_SUITE_FILS_SHA384 SUITE(0x000FAC, 15) #define WLAN_AKM_SUITE_FT_FILS_SHA256 SUITE(0x000FAC, 16) #define WLAN_AKM_SUITE_FT_FILS_SHA384 SUITE(0x000FAC, 17) #define WLAN_AKM_SUITE_OWE SUITE(0x000FAC, 18) +#define WLAN_AKM_SUITE_FT_PSK_SHA384 SUITE(0x000FAC, 19) +#define WLAN_AKM_SUITE_PSK_SHA384 SUITE(0x000FAC, 20) #define WLAN_MAX_KEY_LEN 32 -- cgit v1.2.3 From a9b59159d338d414acaa8e2f569d129d51c76452 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 24 Jun 2020 15:20:39 -0700 Subject: bpf: Do not allow btf_ctx_access with __int128 types To ensure btf_ctx_access() is safe the verifier checks that the BTF arg type is an int, enum, or pointer. When the function does the BTF arg lookup it uses the calculation 'arg = off / 8' using the fact that registers are 8B. This requires that the first arg is in the first reg, the second in the second, and so on. However, for __int128 the arg will consume two registers by default LLVM implementation. So this will cause the arg layout assumed by the 'arg = off / 8' calculation to be incorrect. Because __int128 is uncommon this patch applies the easiest fix and will force int types to be sizeof(u64) or smaller so that they will fit in a single register. v2: remove unneeded parens per Andrii's feedback Fixes: 9e15db66136a1 ("bpf: Implement accurate raw_tp context access via BTF") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/159303723962.11287.13309537171132420717.stgit@john-Precision-5820-Tower --- include/linux/btf.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/btf.h b/include/linux/btf.h index 5c1ea99b480f..8b81fbb4497c 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -82,6 +82,11 @@ static inline bool btf_type_is_int(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_INT; } +static inline bool btf_type_is_small_int(const struct btf_type *t) +{ + return btf_type_is_int(t) && t->size <= sizeof(u64); +} + static inline bool btf_type_is_enum(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM; -- cgit v1.2.3 From 5faafd5685764e4d75376aceac91fdf75b3b16f8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Jun 2020 15:55:14 +0200 Subject: locking/atomics: Provide the arch_atomic_ interface to generic code Architectures with instrumented (KASAN/KCSAN) atomic operations natively provide arch_atomic_ variants that are not instrumented. It turns out that some generic code also requires arch_atomic_ in order to avoid instrumentation, so provide the arch_atomic_ interface as a direct map into the regular atomic_ interface for non-instrumented architectures. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Paul E. McKenney --- include/linux/atomic-fallback.h | 236 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 235 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/atomic-fallback.h b/include/linux/atomic-fallback.h index 2c4927bf7b8d..fd525c71d676 100644 --- a/include/linux/atomic-fallback.h +++ b/include/linux/atomic-fallback.h @@ -77,6 +77,9 @@ #endif /* cmpxchg64_relaxed */ +#define arch_atomic_read atomic_read +#define arch_atomic_read_acquire atomic_read_acquire + #ifndef atomic_read_acquire static __always_inline int atomic_read_acquire(const atomic_t *v) @@ -86,6 +89,9 @@ atomic_read_acquire(const atomic_t *v) #define atomic_read_acquire atomic_read_acquire #endif +#define arch_atomic_set atomic_set +#define arch_atomic_set_release atomic_set_release + #ifndef atomic_set_release static __always_inline void atomic_set_release(atomic_t *v, int i) @@ -95,6 +101,13 @@ atomic_set_release(atomic_t *v, int i) #define atomic_set_release atomic_set_release #endif +#define arch_atomic_add atomic_add + +#define arch_atomic_add_return atomic_add_return +#define arch_atomic_add_return_acquire atomic_add_return_acquire +#define arch_atomic_add_return_release atomic_add_return_release +#define arch_atomic_add_return_relaxed atomic_add_return_relaxed + #ifndef atomic_add_return_relaxed #define atomic_add_return_acquire atomic_add_return #define atomic_add_return_release atomic_add_return @@ -137,6 +150,11 @@ atomic_add_return(int i, atomic_t *v) #endif /* atomic_add_return_relaxed */ +#define arch_atomic_fetch_add atomic_fetch_add +#define arch_atomic_fetch_add_acquire atomic_fetch_add_acquire +#define arch_atomic_fetch_add_release atomic_fetch_add_release +#define arch_atomic_fetch_add_relaxed atomic_fetch_add_relaxed + #ifndef atomic_fetch_add_relaxed #define atomic_fetch_add_acquire atomic_fetch_add #define atomic_fetch_add_release atomic_fetch_add @@ -179,6 +197,13 @@ atomic_fetch_add(int i, atomic_t *v) #endif /* atomic_fetch_add_relaxed */ +#define arch_atomic_sub atomic_sub + +#define arch_atomic_sub_return atomic_sub_return +#define arch_atomic_sub_return_acquire atomic_sub_return_acquire +#define arch_atomic_sub_return_release atomic_sub_return_release +#define arch_atomic_sub_return_relaxed atomic_sub_return_relaxed + #ifndef atomic_sub_return_relaxed #define atomic_sub_return_acquire atomic_sub_return #define atomic_sub_return_release atomic_sub_return @@ -221,6 +246,11 @@ atomic_sub_return(int i, atomic_t *v) #endif /* atomic_sub_return_relaxed */ +#define arch_atomic_fetch_sub atomic_fetch_sub +#define arch_atomic_fetch_sub_acquire atomic_fetch_sub_acquire +#define arch_atomic_fetch_sub_release atomic_fetch_sub_release +#define arch_atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed + #ifndef atomic_fetch_sub_relaxed #define atomic_fetch_sub_acquire atomic_fetch_sub #define atomic_fetch_sub_release atomic_fetch_sub @@ -263,6 +293,8 @@ atomic_fetch_sub(int i, atomic_t *v) #endif /* atomic_fetch_sub_relaxed */ +#define arch_atomic_inc atomic_inc + #ifndef atomic_inc static __always_inline void atomic_inc(atomic_t *v) @@ -272,6 +304,11 @@ atomic_inc(atomic_t *v) #define atomic_inc atomic_inc #endif +#define arch_atomic_inc_return atomic_inc_return +#define arch_atomic_inc_return_acquire atomic_inc_return_acquire +#define arch_atomic_inc_return_release atomic_inc_return_release +#define arch_atomic_inc_return_relaxed atomic_inc_return_relaxed + #ifndef atomic_inc_return_relaxed #ifdef atomic_inc_return #define atomic_inc_return_acquire atomic_inc_return @@ -353,6 +390,11 @@ atomic_inc_return(atomic_t *v) #endif /* atomic_inc_return_relaxed */ +#define arch_atomic_fetch_inc atomic_fetch_inc +#define arch_atomic_fetch_inc_acquire atomic_fetch_inc_acquire +#define arch_atomic_fetch_inc_release atomic_fetch_inc_release +#define arch_atomic_fetch_inc_relaxed atomic_fetch_inc_relaxed + #ifndef atomic_fetch_inc_relaxed #ifdef atomic_fetch_inc #define atomic_fetch_inc_acquire atomic_fetch_inc @@ -434,6 +476,8 @@ atomic_fetch_inc(atomic_t *v) #endif /* atomic_fetch_inc_relaxed */ +#define arch_atomic_dec atomic_dec + #ifndef atomic_dec static __always_inline void atomic_dec(atomic_t *v) @@ -443,6 +487,11 @@ atomic_dec(atomic_t *v) #define atomic_dec atomic_dec #endif +#define arch_atomic_dec_return atomic_dec_return +#define arch_atomic_dec_return_acquire atomic_dec_return_acquire +#define arch_atomic_dec_return_release atomic_dec_return_release +#define arch_atomic_dec_return_relaxed atomic_dec_return_relaxed + #ifndef atomic_dec_return_relaxed #ifdef atomic_dec_return #define atomic_dec_return_acquire atomic_dec_return @@ -524,6 +573,11 @@ atomic_dec_return(atomic_t *v) #endif /* atomic_dec_return_relaxed */ +#define arch_atomic_fetch_dec atomic_fetch_dec +#define arch_atomic_fetch_dec_acquire atomic_fetch_dec_acquire +#define arch_atomic_fetch_dec_release atomic_fetch_dec_release +#define arch_atomic_fetch_dec_relaxed atomic_fetch_dec_relaxed + #ifndef atomic_fetch_dec_relaxed #ifdef atomic_fetch_dec #define atomic_fetch_dec_acquire atomic_fetch_dec @@ -605,6 +659,13 @@ atomic_fetch_dec(atomic_t *v) #endif /* atomic_fetch_dec_relaxed */ +#define arch_atomic_and atomic_and + +#define arch_atomic_fetch_and atomic_fetch_and +#define arch_atomic_fetch_and_acquire atomic_fetch_and_acquire +#define arch_atomic_fetch_and_release atomic_fetch_and_release +#define arch_atomic_fetch_and_relaxed atomic_fetch_and_relaxed + #ifndef atomic_fetch_and_relaxed #define atomic_fetch_and_acquire atomic_fetch_and #define atomic_fetch_and_release atomic_fetch_and @@ -647,6 +708,8 @@ atomic_fetch_and(int i, atomic_t *v) #endif /* atomic_fetch_and_relaxed */ +#define arch_atomic_andnot atomic_andnot + #ifndef atomic_andnot static __always_inline void atomic_andnot(int i, atomic_t *v) @@ -656,6 +719,11 @@ atomic_andnot(int i, atomic_t *v) #define atomic_andnot atomic_andnot #endif +#define arch_atomic_fetch_andnot atomic_fetch_andnot +#define arch_atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire +#define arch_atomic_fetch_andnot_release atomic_fetch_andnot_release +#define arch_atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed + #ifndef atomic_fetch_andnot_relaxed #ifdef atomic_fetch_andnot #define atomic_fetch_andnot_acquire atomic_fetch_andnot @@ -737,6 +805,13 @@ atomic_fetch_andnot(int i, atomic_t *v) #endif /* atomic_fetch_andnot_relaxed */ +#define arch_atomic_or atomic_or + +#define arch_atomic_fetch_or atomic_fetch_or +#define arch_atomic_fetch_or_acquire atomic_fetch_or_acquire +#define arch_atomic_fetch_or_release atomic_fetch_or_release +#define arch_atomic_fetch_or_relaxed atomic_fetch_or_relaxed + #ifndef atomic_fetch_or_relaxed #define atomic_fetch_or_acquire atomic_fetch_or #define atomic_fetch_or_release atomic_fetch_or @@ -779,6 +854,13 @@ atomic_fetch_or(int i, atomic_t *v) #endif /* atomic_fetch_or_relaxed */ +#define arch_atomic_xor atomic_xor + +#define arch_atomic_fetch_xor atomic_fetch_xor +#define arch_atomic_fetch_xor_acquire atomic_fetch_xor_acquire +#define arch_atomic_fetch_xor_release atomic_fetch_xor_release +#define arch_atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed + #ifndef atomic_fetch_xor_relaxed #define atomic_fetch_xor_acquire atomic_fetch_xor #define atomic_fetch_xor_release atomic_fetch_xor @@ -821,6 +903,11 @@ atomic_fetch_xor(int i, atomic_t *v) #endif /* atomic_fetch_xor_relaxed */ +#define arch_atomic_xchg atomic_xchg +#define arch_atomic_xchg_acquire atomic_xchg_acquire +#define arch_atomic_xchg_release atomic_xchg_release +#define arch_atomic_xchg_relaxed atomic_xchg_relaxed + #ifndef atomic_xchg_relaxed #define atomic_xchg_acquire atomic_xchg #define atomic_xchg_release atomic_xchg @@ -863,6 +950,11 @@ atomic_xchg(atomic_t *v, int i) #endif /* atomic_xchg_relaxed */ +#define arch_atomic_cmpxchg atomic_cmpxchg +#define arch_atomic_cmpxchg_acquire atomic_cmpxchg_acquire +#define arch_atomic_cmpxchg_release atomic_cmpxchg_release +#define arch_atomic_cmpxchg_relaxed atomic_cmpxchg_relaxed + #ifndef atomic_cmpxchg_relaxed #define atomic_cmpxchg_acquire atomic_cmpxchg #define atomic_cmpxchg_release atomic_cmpxchg @@ -905,6 +997,11 @@ atomic_cmpxchg(atomic_t *v, int old, int new) #endif /* atomic_cmpxchg_relaxed */ +#define arch_atomic_try_cmpxchg atomic_try_cmpxchg +#define arch_atomic_try_cmpxchg_acquire atomic_try_cmpxchg_acquire +#define arch_atomic_try_cmpxchg_release atomic_try_cmpxchg_release +#define arch_atomic_try_cmpxchg_relaxed atomic_try_cmpxchg_relaxed + #ifndef atomic_try_cmpxchg_relaxed #ifdef atomic_try_cmpxchg #define atomic_try_cmpxchg_acquire atomic_try_cmpxchg @@ -1002,6 +1099,8 @@ atomic_try_cmpxchg(atomic_t *v, int *old, int new) #endif /* atomic_try_cmpxchg_relaxed */ +#define arch_atomic_sub_and_test atomic_sub_and_test + #ifndef atomic_sub_and_test /** * atomic_sub_and_test - subtract value from variable and test result @@ -1020,6 +1119,8 @@ atomic_sub_and_test(int i, atomic_t *v) #define atomic_sub_and_test atomic_sub_and_test #endif +#define arch_atomic_dec_and_test atomic_dec_and_test + #ifndef atomic_dec_and_test /** * atomic_dec_and_test - decrement and test @@ -1037,6 +1138,8 @@ atomic_dec_and_test(atomic_t *v) #define atomic_dec_and_test atomic_dec_and_test #endif +#define arch_atomic_inc_and_test atomic_inc_and_test + #ifndef atomic_inc_and_test /** * atomic_inc_and_test - increment and test @@ -1054,6 +1157,8 @@ atomic_inc_and_test(atomic_t *v) #define atomic_inc_and_test atomic_inc_and_test #endif +#define arch_atomic_add_negative atomic_add_negative + #ifndef atomic_add_negative /** * atomic_add_negative - add and test if negative @@ -1072,6 +1177,8 @@ atomic_add_negative(int i, atomic_t *v) #define atomic_add_negative atomic_add_negative #endif +#define arch_atomic_fetch_add_unless atomic_fetch_add_unless + #ifndef atomic_fetch_add_unless /** * atomic_fetch_add_unless - add unless the number is already a given value @@ -1097,6 +1204,8 @@ atomic_fetch_add_unless(atomic_t *v, int a, int u) #define atomic_fetch_add_unless atomic_fetch_add_unless #endif +#define arch_atomic_add_unless atomic_add_unless + #ifndef atomic_add_unless /** * atomic_add_unless - add unless the number is already a given value @@ -1115,6 +1224,8 @@ atomic_add_unless(atomic_t *v, int a, int u) #define atomic_add_unless atomic_add_unless #endif +#define arch_atomic_inc_not_zero atomic_inc_not_zero + #ifndef atomic_inc_not_zero /** * atomic_inc_not_zero - increment unless the number is zero @@ -1131,6 +1242,8 @@ atomic_inc_not_zero(atomic_t *v) #define atomic_inc_not_zero atomic_inc_not_zero #endif +#define arch_atomic_inc_unless_negative atomic_inc_unless_negative + #ifndef atomic_inc_unless_negative static __always_inline bool atomic_inc_unless_negative(atomic_t *v) @@ -1147,6 +1260,8 @@ atomic_inc_unless_negative(atomic_t *v) #define atomic_inc_unless_negative atomic_inc_unless_negative #endif +#define arch_atomic_dec_unless_positive atomic_dec_unless_positive + #ifndef atomic_dec_unless_positive static __always_inline bool atomic_dec_unless_positive(atomic_t *v) @@ -1163,6 +1278,8 @@ atomic_dec_unless_positive(atomic_t *v) #define atomic_dec_unless_positive atomic_dec_unless_positive #endif +#define arch_atomic_dec_if_positive atomic_dec_if_positive + #ifndef atomic_dec_if_positive static __always_inline int atomic_dec_if_positive(atomic_t *v) @@ -1184,6 +1301,9 @@ atomic_dec_if_positive(atomic_t *v) #include #endif +#define arch_atomic64_read atomic64_read +#define arch_atomic64_read_acquire atomic64_read_acquire + #ifndef atomic64_read_acquire static __always_inline s64 atomic64_read_acquire(const atomic64_t *v) @@ -1193,6 +1313,9 @@ atomic64_read_acquire(const atomic64_t *v) #define atomic64_read_acquire atomic64_read_acquire #endif +#define arch_atomic64_set atomic64_set +#define arch_atomic64_set_release atomic64_set_release + #ifndef atomic64_set_release static __always_inline void atomic64_set_release(atomic64_t *v, s64 i) @@ -1202,6 +1325,13 @@ atomic64_set_release(atomic64_t *v, s64 i) #define atomic64_set_release atomic64_set_release #endif +#define arch_atomic64_add atomic64_add + +#define arch_atomic64_add_return atomic64_add_return +#define arch_atomic64_add_return_acquire atomic64_add_return_acquire +#define arch_atomic64_add_return_release atomic64_add_return_release +#define arch_atomic64_add_return_relaxed atomic64_add_return_relaxed + #ifndef atomic64_add_return_relaxed #define atomic64_add_return_acquire atomic64_add_return #define atomic64_add_return_release atomic64_add_return @@ -1244,6 +1374,11 @@ atomic64_add_return(s64 i, atomic64_t *v) #endif /* atomic64_add_return_relaxed */ +#define arch_atomic64_fetch_add atomic64_fetch_add +#define arch_atomic64_fetch_add_acquire atomic64_fetch_add_acquire +#define arch_atomic64_fetch_add_release atomic64_fetch_add_release +#define arch_atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed + #ifndef atomic64_fetch_add_relaxed #define atomic64_fetch_add_acquire atomic64_fetch_add #define atomic64_fetch_add_release atomic64_fetch_add @@ -1286,6 +1421,13 @@ atomic64_fetch_add(s64 i, atomic64_t *v) #endif /* atomic64_fetch_add_relaxed */ +#define arch_atomic64_sub atomic64_sub + +#define arch_atomic64_sub_return atomic64_sub_return +#define arch_atomic64_sub_return_acquire atomic64_sub_return_acquire +#define arch_atomic64_sub_return_release atomic64_sub_return_release +#define arch_atomic64_sub_return_relaxed atomic64_sub_return_relaxed + #ifndef atomic64_sub_return_relaxed #define atomic64_sub_return_acquire atomic64_sub_return #define atomic64_sub_return_release atomic64_sub_return @@ -1328,6 +1470,11 @@ atomic64_sub_return(s64 i, atomic64_t *v) #endif /* atomic64_sub_return_relaxed */ +#define arch_atomic64_fetch_sub atomic64_fetch_sub +#define arch_atomic64_fetch_sub_acquire atomic64_fetch_sub_acquire +#define arch_atomic64_fetch_sub_release atomic64_fetch_sub_release +#define arch_atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed + #ifndef atomic64_fetch_sub_relaxed #define atomic64_fetch_sub_acquire atomic64_fetch_sub #define atomic64_fetch_sub_release atomic64_fetch_sub @@ -1370,6 +1517,8 @@ atomic64_fetch_sub(s64 i, atomic64_t *v) #endif /* atomic64_fetch_sub_relaxed */ +#define arch_atomic64_inc atomic64_inc + #ifndef atomic64_inc static __always_inline void atomic64_inc(atomic64_t *v) @@ -1379,6 +1528,11 @@ atomic64_inc(atomic64_t *v) #define atomic64_inc atomic64_inc #endif +#define arch_atomic64_inc_return atomic64_inc_return +#define arch_atomic64_inc_return_acquire atomic64_inc_return_acquire +#define arch_atomic64_inc_return_release atomic64_inc_return_release +#define arch_atomic64_inc_return_relaxed atomic64_inc_return_relaxed + #ifndef atomic64_inc_return_relaxed #ifdef atomic64_inc_return #define atomic64_inc_return_acquire atomic64_inc_return @@ -1460,6 +1614,11 @@ atomic64_inc_return(atomic64_t *v) #endif /* atomic64_inc_return_relaxed */ +#define arch_atomic64_fetch_inc atomic64_fetch_inc +#define arch_atomic64_fetch_inc_acquire atomic64_fetch_inc_acquire +#define arch_atomic64_fetch_inc_release atomic64_fetch_inc_release +#define arch_atomic64_fetch_inc_relaxed atomic64_fetch_inc_relaxed + #ifndef atomic64_fetch_inc_relaxed #ifdef atomic64_fetch_inc #define atomic64_fetch_inc_acquire atomic64_fetch_inc @@ -1541,6 +1700,8 @@ atomic64_fetch_inc(atomic64_t *v) #endif /* atomic64_fetch_inc_relaxed */ +#define arch_atomic64_dec atomic64_dec + #ifndef atomic64_dec static __always_inline void atomic64_dec(atomic64_t *v) @@ -1550,6 +1711,11 @@ atomic64_dec(atomic64_t *v) #define atomic64_dec atomic64_dec #endif +#define arch_atomic64_dec_return atomic64_dec_return +#define arch_atomic64_dec_return_acquire atomic64_dec_return_acquire +#define arch_atomic64_dec_return_release atomic64_dec_return_release +#define arch_atomic64_dec_return_relaxed atomic64_dec_return_relaxed + #ifndef atomic64_dec_return_relaxed #ifdef atomic64_dec_return #define atomic64_dec_return_acquire atomic64_dec_return @@ -1631,6 +1797,11 @@ atomic64_dec_return(atomic64_t *v) #endif /* atomic64_dec_return_relaxed */ +#define arch_atomic64_fetch_dec atomic64_fetch_dec +#define arch_atomic64_fetch_dec_acquire atomic64_fetch_dec_acquire +#define arch_atomic64_fetch_dec_release atomic64_fetch_dec_release +#define arch_atomic64_fetch_dec_relaxed atomic64_fetch_dec_relaxed + #ifndef atomic64_fetch_dec_relaxed #ifdef atomic64_fetch_dec #define atomic64_fetch_dec_acquire atomic64_fetch_dec @@ -1712,6 +1883,13 @@ atomic64_fetch_dec(atomic64_t *v) #endif /* atomic64_fetch_dec_relaxed */ +#define arch_atomic64_and atomic64_and + +#define arch_atomic64_fetch_and atomic64_fetch_and +#define arch_atomic64_fetch_and_acquire atomic64_fetch_and_acquire +#define arch_atomic64_fetch_and_release atomic64_fetch_and_release +#define arch_atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed + #ifndef atomic64_fetch_and_relaxed #define atomic64_fetch_and_acquire atomic64_fetch_and #define atomic64_fetch_and_release atomic64_fetch_and @@ -1754,6 +1932,8 @@ atomic64_fetch_and(s64 i, atomic64_t *v) #endif /* atomic64_fetch_and_relaxed */ +#define arch_atomic64_andnot atomic64_andnot + #ifndef atomic64_andnot static __always_inline void atomic64_andnot(s64 i, atomic64_t *v) @@ -1763,6 +1943,11 @@ atomic64_andnot(s64 i, atomic64_t *v) #define atomic64_andnot atomic64_andnot #endif +#define arch_atomic64_fetch_andnot atomic64_fetch_andnot +#define arch_atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire +#define arch_atomic64_fetch_andnot_release atomic64_fetch_andnot_release +#define arch_atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed + #ifndef atomic64_fetch_andnot_relaxed #ifdef atomic64_fetch_andnot #define atomic64_fetch_andnot_acquire atomic64_fetch_andnot @@ -1844,6 +2029,13 @@ atomic64_fetch_andnot(s64 i, atomic64_t *v) #endif /* atomic64_fetch_andnot_relaxed */ +#define arch_atomic64_or atomic64_or + +#define arch_atomic64_fetch_or atomic64_fetch_or +#define arch_atomic64_fetch_or_acquire atomic64_fetch_or_acquire +#define arch_atomic64_fetch_or_release atomic64_fetch_or_release +#define arch_atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed + #ifndef atomic64_fetch_or_relaxed #define atomic64_fetch_or_acquire atomic64_fetch_or #define atomic64_fetch_or_release atomic64_fetch_or @@ -1886,6 +2078,13 @@ atomic64_fetch_or(s64 i, atomic64_t *v) #endif /* atomic64_fetch_or_relaxed */ +#define arch_atomic64_xor atomic64_xor + +#define arch_atomic64_fetch_xor atomic64_fetch_xor +#define arch_atomic64_fetch_xor_acquire atomic64_fetch_xor_acquire +#define arch_atomic64_fetch_xor_release atomic64_fetch_xor_release +#define arch_atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed + #ifndef atomic64_fetch_xor_relaxed #define atomic64_fetch_xor_acquire atomic64_fetch_xor #define atomic64_fetch_xor_release atomic64_fetch_xor @@ -1928,6 +2127,11 @@ atomic64_fetch_xor(s64 i, atomic64_t *v) #endif /* atomic64_fetch_xor_relaxed */ +#define arch_atomic64_xchg atomic64_xchg +#define arch_atomic64_xchg_acquire atomic64_xchg_acquire +#define arch_atomic64_xchg_release atomic64_xchg_release +#define arch_atomic64_xchg_relaxed atomic64_xchg_relaxed + #ifndef atomic64_xchg_relaxed #define atomic64_xchg_acquire atomic64_xchg #define atomic64_xchg_release atomic64_xchg @@ -1970,6 +2174,11 @@ atomic64_xchg(atomic64_t *v, s64 i) #endif /* atomic64_xchg_relaxed */ +#define arch_atomic64_cmpxchg atomic64_cmpxchg +#define arch_atomic64_cmpxchg_acquire atomic64_cmpxchg_acquire +#define arch_atomic64_cmpxchg_release atomic64_cmpxchg_release +#define arch_atomic64_cmpxchg_relaxed atomic64_cmpxchg_relaxed + #ifndef atomic64_cmpxchg_relaxed #define atomic64_cmpxchg_acquire atomic64_cmpxchg #define atomic64_cmpxchg_release atomic64_cmpxchg @@ -2012,6 +2221,11 @@ atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) #endif /* atomic64_cmpxchg_relaxed */ +#define arch_atomic64_try_cmpxchg atomic64_try_cmpxchg +#define arch_atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg_acquire +#define arch_atomic64_try_cmpxchg_release atomic64_try_cmpxchg_release +#define arch_atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg_relaxed + #ifndef atomic64_try_cmpxchg_relaxed #ifdef atomic64_try_cmpxchg #define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg @@ -2109,6 +2323,8 @@ atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) #endif /* atomic64_try_cmpxchg_relaxed */ +#define arch_atomic64_sub_and_test atomic64_sub_and_test + #ifndef atomic64_sub_and_test /** * atomic64_sub_and_test - subtract value from variable and test result @@ -2127,6 +2343,8 @@ atomic64_sub_and_test(s64 i, atomic64_t *v) #define atomic64_sub_and_test atomic64_sub_and_test #endif +#define arch_atomic64_dec_and_test atomic64_dec_and_test + #ifndef atomic64_dec_and_test /** * atomic64_dec_and_test - decrement and test @@ -2144,6 +2362,8 @@ atomic64_dec_and_test(atomic64_t *v) #define atomic64_dec_and_test atomic64_dec_and_test #endif +#define arch_atomic64_inc_and_test atomic64_inc_and_test + #ifndef atomic64_inc_and_test /** * atomic64_inc_and_test - increment and test @@ -2161,6 +2381,8 @@ atomic64_inc_and_test(atomic64_t *v) #define atomic64_inc_and_test atomic64_inc_and_test #endif +#define arch_atomic64_add_negative atomic64_add_negative + #ifndef atomic64_add_negative /** * atomic64_add_negative - add and test if negative @@ -2179,6 +2401,8 @@ atomic64_add_negative(s64 i, atomic64_t *v) #define atomic64_add_negative atomic64_add_negative #endif +#define arch_atomic64_fetch_add_unless atomic64_fetch_add_unless + #ifndef atomic64_fetch_add_unless /** * atomic64_fetch_add_unless - add unless the number is already a given value @@ -2204,6 +2428,8 @@ atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) #define atomic64_fetch_add_unless atomic64_fetch_add_unless #endif +#define arch_atomic64_add_unless atomic64_add_unless + #ifndef atomic64_add_unless /** * atomic64_add_unless - add unless the number is already a given value @@ -2222,6 +2448,8 @@ atomic64_add_unless(atomic64_t *v, s64 a, s64 u) #define atomic64_add_unless atomic64_add_unless #endif +#define arch_atomic64_inc_not_zero atomic64_inc_not_zero + #ifndef atomic64_inc_not_zero /** * atomic64_inc_not_zero - increment unless the number is zero @@ -2238,6 +2466,8 @@ atomic64_inc_not_zero(atomic64_t *v) #define atomic64_inc_not_zero atomic64_inc_not_zero #endif +#define arch_atomic64_inc_unless_negative atomic64_inc_unless_negative + #ifndef atomic64_inc_unless_negative static __always_inline bool atomic64_inc_unless_negative(atomic64_t *v) @@ -2254,6 +2484,8 @@ atomic64_inc_unless_negative(atomic64_t *v) #define atomic64_inc_unless_negative atomic64_inc_unless_negative #endif +#define arch_atomic64_dec_unless_positive atomic64_dec_unless_positive + #ifndef atomic64_dec_unless_positive static __always_inline bool atomic64_dec_unless_positive(atomic64_t *v) @@ -2270,6 +2502,8 @@ atomic64_dec_unless_positive(atomic64_t *v) #define atomic64_dec_unless_positive atomic64_dec_unless_positive #endif +#define arch_atomic64_dec_if_positive atomic64_dec_if_positive + #ifndef atomic64_dec_if_positive static __always_inline s64 atomic64_dec_if_positive(atomic64_t *v) @@ -2288,4 +2522,4 @@ atomic64_dec_if_positive(atomic64_t *v) #endif #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// 1fac0941c79bf0ae100723cc2ac9b94061f0b67a +// 9d95b56f98d82a2a26c7b79ccdd0c47572d50a6f -- cgit v1.2.3 From b58e733fd774f3f4b49d9e7640d172a57e35200e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Jun 2020 18:24:27 +0200 Subject: rcu: Fixup noinstr warnings A KCSAN build revealed we have explicit annoations through atomic_*() usage, switch to arch_atomic_*() for the respective functions. vmlinux.o: warning: objtool: rcu_nmi_exit()+0x4d: call to __kcsan_check_access() leaves .noinstr.text section vmlinux.o: warning: objtool: rcu_dynticks_eqs_enter()+0x25: call to __kcsan_check_access() leaves .noinstr.text section vmlinux.o: warning: objtool: rcu_nmi_enter()+0x4f: call to __kcsan_check_access() leaves .noinstr.text section vmlinux.o: warning: objtool: rcu_dynticks_eqs_exit()+0x2a: call to __kcsan_check_access() leaves .noinstr.text section vmlinux.o: warning: objtool: __rcu_is_watching()+0x25: call to __kcsan_check_access() leaves .noinstr.text section Additionally, without the NOP in instrumentation_begin(), objtool would not detect the lack of the 'else instrumentation_begin();' branch in rcu_nmi_enter(). Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Paul E. McKenney --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 30827f82ad62..204e76856435 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -123,7 +123,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #ifdef CONFIG_DEBUG_ENTRY /* Begin/end of an instrumentation safe region */ #define instrumentation_begin() ({ \ - asm volatile("%c0:\n\t" \ + asm volatile("%c0: nop\n\t" \ ".pushsection .discard.instr_begin\n\t" \ ".long %c0b - .\n\t" \ ".popsection\n\t" : : "i" (__COUNTER__)); \ -- cgit v1.2.3 From 31d8fcac00fcf4007f3921edc69ab4dcb3abcd4d Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 25 Jun 2020 20:30:31 -0700 Subject: mm: workingset: age nonresident information alongside anonymous pages Patch series "fix for "mm: balance LRU lists based on relative thrashing" patchset" This patchset fixes some problems of the patchset, "mm: balance LRU lists based on relative thrashing", which is now merged on the mainline. Patch "mm: workingset: let cache workingset challenge anon fix" is the result of discussion with Johannes. See following link. http://lkml.kernel.org/r/20200520232525.798933-6-hannes@cmpxchg.org And, the other two are minor things which are found when I try to rebase my patchset. This patch (of 3): After ("mm: workingset: let cache workingset challenge anon fix"), we compare refault distances to active_file + anon. But age of the non-resident information is only driven by the file LRU. As a result, we may overestimate the recency of any incoming refaults and activate them too eagerly, causing unnecessary LRU churn in certain situations. Make anon aging drive nonresident age as well to address that. Link: http://lkml.kernel.org/r/1592288204-27734-1-git-send-email-iamjoonsoo.kim@lge.com Link: http://lkml.kernel.org/r/1592288204-27734-2-git-send-email-iamjoonsoo.kim@lge.com Fixes: 34e58cac6d8f2a ("mm: workingset: let cache workingset challenge anon") Reported-by: Joonsoo Kim Signed-off-by: Johannes Weiner Signed-off-by: Joonsoo Kim Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 4 ++-- include/linux/swap.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c4c37fd12104..f6f884970511 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -257,8 +257,8 @@ struct lruvec { */ unsigned long anon_cost; unsigned long file_cost; - /* Evictions & activations on the inactive file list */ - atomic_long_t inactive_age; + /* Non-resident age, driven by LRU movement */ + atomic_long_t nonresident_age; /* Refaults at the time of last reclaim cycle */ unsigned long refaults; /* Various lruvec state flags (enum lruvec_flags) */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 4c5974bb9ba9..5b3216ba39a9 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -313,6 +313,7 @@ struct vma_swap_readahead { }; /* linux/mm/workingset.c */ +void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg); void workingset_refault(struct page *page, void *shadow); void workingset_activation(struct page *page); -- cgit v1.2.3 From 7a0e27b2a0ce2735e27e21ebc8b777550fe0ed81 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 25 Jun 2020 20:30:47 -0700 Subject: mm: remove vmalloc_exec Merge vmalloc_exec into its only caller. Note that for !CONFIG_MMU __vmalloc_node_range maps to __vmalloc, which directly clears the __GFP_HIGHMEM added by the vmalloc_exec stub anyway. Link: http://lkml.kernel.org/r/20200618064307.32739-4-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: David Hildenbrand Acked-by: Peter Zijlstra (Intel) Cc: Catalin Marinas Cc: Dexuan Cui Cc: Jessica Yu Cc: Vitaly Kuznetsov Cc: Wei Liu Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 48bb681e6c2a..0221f852a7e1 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -106,7 +106,6 @@ extern void *vzalloc(unsigned long size); extern void *vmalloc_user(unsigned long size); extern void *vmalloc_node(unsigned long size, int node); extern void *vzalloc_node(unsigned long size, int node); -extern void *vmalloc_exec(unsigned long size); extern void *vmalloc_32(unsigned long size); extern void *vmalloc_32_user(unsigned long size); extern void *__vmalloc(unsigned long size, gfp_t gfp_mask); -- cgit v1.2.3 From 5946d1f5b309381805bad3ddc3054c04f4ae9c24 Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Thu, 4 Jun 2020 15:31:19 +0530 Subject: kdb: Switch to use safer dbg_io_ops over console APIs In kgdb context, calling console handlers aren't safe due to locks used in those handlers which could in turn lead to a deadlock. Although, using oops_in_progress increases the chance to bypass locks in most console handlers but it might not be sufficient enough in case a console uses more locks (VT/TTY is good example). Currently when a driver provides both polling I/O and a console then kdb will output using the console. We can increase robustness by using the currently active polling I/O driver (which should be lockless) instead of the corresponding console. For several common cases (e.g. an embedded system with a single serial port that is used both for console output and debugger I/O) this will result in no console handler being used. In order to achieve this we need to reverse the order of preference to use dbg_io_ops (uses polling I/O mode) over console APIs. So we just store "struct console" that represents debugger I/O in dbg_io_ops and while emitting kdb messages, skip console that matches dbg_io_ops console in order to avoid duplicate messages. After this change, "is_console" param becomes redundant and hence removed. Suggested-by: Daniel Thompson Signed-off-by: Sumit Garg Link: https://lore.kernel.org/r/1591264879-25920-5-git-send-email-sumit.garg@linaro.org Reviewed-by: Douglas Anderson Reviewed-by: Petr Mladek Acked-by: Greg Kroah-Hartman Signed-off-by: Daniel Thompson --- include/linux/kgdb.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index c62d76478adc..529116b0cabe 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -276,8 +276,7 @@ struct kgdb_arch { * the I/O driver. * @post_exception: Pointer to a function that will do any cleanup work * for the I/O driver. - * @is_console: 1 if the end device is a console 0 if the I/O device is - * not a console + * @cons: valid if the I/O device is a console; else NULL. */ struct kgdb_io { const char *name; @@ -288,7 +287,7 @@ struct kgdb_io { void (*deinit) (void); void (*pre_exception) (void); void (*post_exception) (void); - int is_console; + struct console *cons; }; extern const struct kgdb_arch arch_kgdb_ops; -- cgit v1.2.3 From 10652a9e9fe3fbcaca090f99cd3060ac3fee2913 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 10 Jun 2020 17:22:30 +0200 Subject: Revert "serial: core: Refactor uart_unlock_and_check_sysrq()" This reverts commit da9a5aa3402db0ff3b57216d8dbf2478e1046cae. In order to ease backporting a fix for a sysrq regression, revert this rewrite which was since added on top. The other sysrq helpers now bail out early when sysrq is not enabled; it's better to keep that pattern here as well. Note that the __releases() attribute won't be needed after the follow-on fix either. Fixes: da9a5aa3402d ("serial: core: Refactor uart_unlock_and_check_sysrq()") Cc: stable Signed-off-by: Johan Hovold Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200610152232.16925-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 9fd550e7946a..ef4921ddbe97 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -464,7 +464,8 @@ extern void uart_insert_char(struct uart_port *port, unsigned int status, extern int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch); extern int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch); -extern void uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long flags); +extern void uart_unlock_and_check_sysrq(struct uart_port *port, + unsigned long irqflags); extern int uart_handle_break(struct uart_port *port); /* -- cgit v1.2.3 From 08d5470308ac3598e7709d08b8979ce6e9de8da2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 10 Jun 2020 17:22:31 +0200 Subject: serial: core: fix sysrq overhead regression Commit 8e20fc391711 ("serial_core: Move sysrq functions from header file") converted the inline sysrq helpers to exported functions which are now called for every received character, interrupt and break signal also on systems without CONFIG_MAGIC_SYSRQ_SERIAL instead of being optimised away by the compiler. Inlining these helpers again also avoids the function call overhead when CONFIG_MAGIC_SYSRQ_SERIAL is enabled (e.g. when the port is not used as a console). Fixes: 8e20fc391711 ("serial_core: Move sysrq functions from header file") Cc: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Johan Hovold Cc: stable Reviewed-by: Andy Shevchenko Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://lore.kernel.org/r/20200610152232.16925-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 103 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 98 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index ef4921ddbe97..03fa7b967103 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -462,11 +462,104 @@ extern void uart_handle_cts_change(struct uart_port *uport, extern void uart_insert_char(struct uart_port *port, unsigned int status, unsigned int overrun, unsigned int ch, unsigned int flag); -extern int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch); -extern int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch); -extern void uart_unlock_and_check_sysrq(struct uart_port *port, - unsigned long irqflags); -extern int uart_handle_break(struct uart_port *port); +#ifdef CONFIG_MAGIC_SYSRQ_SERIAL +#define SYSRQ_TIMEOUT (HZ * 5) + +bool uart_try_toggle_sysrq(struct uart_port *port, unsigned int ch); + +static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) +{ + if (!port->has_sysrq || !port->sysrq) + return 0; + + if (ch && time_before(jiffies, port->sysrq)) { + if (sysrq_mask()) { + handle_sysrq(ch); + port->sysrq = 0; + return 1; + } + if (uart_try_toggle_sysrq(port, ch)) + return 1; + } + port->sysrq = 0; + + return 0; +} + +static inline int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) +{ + if (!port->has_sysrq || !port->sysrq) + return 0; + + if (ch && time_before(jiffies, port->sysrq)) { + if (sysrq_mask()) { + port->sysrq_ch = ch; + port->sysrq = 0; + return 1; + } + if (uart_try_toggle_sysrq(port, ch)) + return 1; + } + port->sysrq = 0; + + return 0; +} + +static inline void uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags) +{ + int sysrq_ch; + + if (!port->has_sysrq) { + spin_unlock_irqrestore(&port->lock, irqflags); + return; + } + + sysrq_ch = port->sysrq_ch; + port->sysrq_ch = 0; + + spin_unlock_irqrestore(&port->lock, irqflags); + + if (sysrq_ch) + handle_sysrq(sysrq_ch); +} +#else /* CONFIG_MAGIC_SYSRQ_SERIAL */ +static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) +{ + return 0; +} +static inline int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) +{ + return 0; +} +static inline void uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags) +{ + spin_unlock_irqrestore(&port->lock, irqflags); +} +#endif /* CONFIG_MAGIC_SYSRQ_SERIAL */ + +/* + * We do the SysRQ and SAK checking like this... + */ +static inline int uart_handle_break(struct uart_port *port) +{ + struct uart_state *state = port->state; + + if (port->handle_break) + port->handle_break(port); + +#ifdef CONFIG_MAGIC_SYSRQ_SERIAL + if (port->has_sysrq && uart_console(port)) { + if (!port->sysrq) { + port->sysrq = jiffies + SYSRQ_TIMEOUT; + return 1; + } + port->sysrq = 0; + } +#endif + if (port->flags & UPF_SAK) + do_SAK(state->port.tty); + return 0; +} /* * UART_ENABLE_MS - determine if port should enable modem status irqs -- cgit v1.2.3 From 225385657b7d81a99e17e04cd01f9ed5bb3109a8 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 10 Jun 2020 17:22:32 +0200 Subject: serial: core: drop redundant sysrq checks The sysrq timestamp will never be set unless port->has_sysrq is set (see uart_handle_break()) so drop the redundant checks that were added by commit 1997e9dfdc84 ("serial_core: Un-ifdef sysrq SUPPORT_SYSRQ"). Signed-off-by: Johan Hovold Reviewed-by: Andy Shevchenko Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Link: https://lore.kernel.org/r/20200610152232.16925-4-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 03fa7b967103..791f4844efeb 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -469,7 +469,7 @@ bool uart_try_toggle_sysrq(struct uart_port *port, unsigned int ch); static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) { - if (!port->has_sysrq || !port->sysrq) + if (!port->sysrq) return 0; if (ch && time_before(jiffies, port->sysrq)) { @@ -488,7 +488,7 @@ static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch static inline int uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) { - if (!port->has_sysrq || !port->sysrq) + if (!port->sysrq) return 0; if (ch && time_before(jiffies, port->sysrq)) { -- cgit v1.2.3 From 4f311afc2035f7b33d97e69ffaa2e6cd67fca16d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Jun 2020 12:14:09 +0200 Subject: sched/core: Fix CONFIG_GCC_PLUGIN_RANDSTRUCT build fail As a temporary build fix, the proper cleanup needs more work. Reported-by: Guenter Roeck Reported-by: Eric Biggers Suggested-by: Eric Biggers Suggested-by: Kees Cook Fixes: a148866489fb ("sched: Replace rq::wake_list") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b62e6aaf28f0..224b5de568e7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -654,8 +654,10 @@ struct task_struct { unsigned int ptrace; #ifdef CONFIG_SMP - struct llist_node wake_entry; - unsigned int wake_entry_type; + struct { + struct llist_node wake_entry; + unsigned int wake_entry_type; + }; int on_cpu; #ifdef CONFIG_THREAD_INFO_IN_TASK /* Current CPU: */ -- cgit v1.2.3 From 8c4890d1c3358fb8023d46e1e554c41d54f02878 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 22 Jun 2020 12:01:25 +0200 Subject: smp, irq_work: Continue smp_call_function*() and irq_work*() integration Instead of relying on BUG_ON() to ensure the various data structures line up, use a bunch of horrible unions to make it all automatic. Much of the union magic is to ensure irq_work and smp_call_function do not (yet) see the members of their respective data structures change name. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Frederic Weisbecker Link: https://lkml.kernel.org/r/20200622100825.844455025@infradead.org --- include/linux/irq_work.h | 26 ++++++------------- include/linux/sched.h | 5 +--- include/linux/smp.h | 23 ++++++----------- include/linux/smp_types.h | 66 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 83 insertions(+), 37 deletions(-) create mode 100644 include/linux/smp_types.h (limited to 'include/linux') diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 2735da5f839e..30823780c192 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -2,7 +2,7 @@ #ifndef _LINUX_IRQ_WORK_H #define _LINUX_IRQ_WORK_H -#include +#include /* * An entry can be in one of four states: @@ -13,24 +13,14 @@ * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed */ -/* flags share CSD_FLAG_ space */ - -#define IRQ_WORK_PENDING BIT(0) -#define IRQ_WORK_BUSY BIT(1) - -/* Doesn't want IPI, wait for tick: */ -#define IRQ_WORK_LAZY BIT(2) -/* Run hard IRQ context, even on RT */ -#define IRQ_WORK_HARD_IRQ BIT(3) - -#define IRQ_WORK_CLAIMED (IRQ_WORK_PENDING | IRQ_WORK_BUSY) - -/* - * structure shares layout with single_call_data_t. - */ struct irq_work { - struct llist_node llnode; - atomic_t flags; + union { + struct __call_single_node node; + struct { + struct llist_node llnode; + atomic_t flags; + }; + }; void (*func)(struct irq_work *); }; diff --git a/include/linux/sched.h b/include/linux/sched.h index 224b5de568e7..692e327d7455 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -654,11 +654,8 @@ struct task_struct { unsigned int ptrace; #ifdef CONFIG_SMP - struct { - struct llist_node wake_entry; - unsigned int wake_entry_type; - }; int on_cpu; + struct __call_single_node wake_entry; #ifdef CONFIG_THREAD_INFO_IN_TASK /* Current CPU: */ unsigned int cpu; diff --git a/include/linux/smp.h b/include/linux/smp.h index 7ee202ad21a6..80d557ef8a11 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -12,29 +12,22 @@ #include #include #include -#include +#include typedef void (*smp_call_func_t)(void *info); typedef bool (*smp_cond_func_t)(int cpu, void *info); -enum { - CSD_FLAG_LOCK = 0x01, - - /* IRQ_WORK_flags */ - - CSD_TYPE_ASYNC = 0x00, - CSD_TYPE_SYNC = 0x10, - CSD_TYPE_IRQ_WORK = 0x20, - CSD_TYPE_TTWU = 0x30, - CSD_FLAG_TYPE_MASK = 0xF0, -}; - /* * structure shares (partial) layout with struct irq_work */ struct __call_single_data { - struct llist_node llist; - unsigned int flags; + union { + struct __call_single_node node; + struct { + struct llist_node llist; + unsigned int flags; + }; + }; smp_call_func_t func; void *info; }; diff --git a/include/linux/smp_types.h b/include/linux/smp_types.h new file mode 100644 index 000000000000..364b3ae3e41d --- /dev/null +++ b/include/linux/smp_types.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_SMP_TYPES_H +#define __LINUX_SMP_TYPES_H + +#include + +enum { + CSD_FLAG_LOCK = 0x01, + + IRQ_WORK_PENDING = 0x01, + IRQ_WORK_BUSY = 0x02, + IRQ_WORK_LAZY = 0x04, /* No IPI, wait for tick */ + IRQ_WORK_HARD_IRQ = 0x08, /* IRQ context on PREEMPT_RT */ + + IRQ_WORK_CLAIMED = (IRQ_WORK_PENDING | IRQ_WORK_BUSY), + + CSD_TYPE_ASYNC = 0x00, + CSD_TYPE_SYNC = 0x10, + CSD_TYPE_IRQ_WORK = 0x20, + CSD_TYPE_TTWU = 0x30, + + CSD_FLAG_TYPE_MASK = 0xF0, +}; + +/* + * struct __call_single_node is the primary type on + * smp.c:call_single_queue. + * + * flush_smp_call_function_queue() only reads the type from + * __call_single_node::u_flags as a regular load, the above + * (anonymous) enum defines all the bits of this word. + * + * Other bits are not modified until the type is known. + * + * CSD_TYPE_SYNC/ASYNC: + * struct { + * struct llist_node node; + * unsigned int flags; + * smp_call_func_t func; + * void *info; + * }; + * + * CSD_TYPE_IRQ_WORK: + * struct { + * struct llist_node node; + * atomic_t flags; + * void (*func)(struct irq_work *); + * }; + * + * CSD_TYPE_TTWU: + * struct { + * struct llist_node node; + * unsigned int flags; + * }; + * + */ + +struct __call_single_node { + struct llist_node llist; + union { + unsigned int u_flags; + atomic_t a_flags; + }; +}; + +#endif /* __LINUX_SMP_TYPES_H */ -- cgit v1.2.3 From bfe373f608cf81b7626dfeb904001b0e867c5110 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Tue, 28 Apr 2020 09:54:56 +0800 Subject: blk-mq-debugfs: update blk_queue_flag_name[] accordingly for new flags Else there may be magic numbers in /sys/kernel/debug/block/*/state. Signed-off-by: Hou Tao Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8fd900998b4e..57241417ff2f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -590,6 +590,7 @@ struct request_queue { u64 write_hints[BLK_MAX_WRITE_HINTS]; }; +/* Keep blk_queue_flag_name[] in sync with the definitions below */ #define QUEUE_FLAG_STOPPED 0 /* queue is stopped */ #define QUEUE_FLAG_DYING 1 /* queue being torn down */ #define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */ -- cgit v1.2.3 From 3aa91625007807bfca4155df1867a5c924a08662 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 29 Jun 2020 15:03:56 +0200 Subject: dma-mapping: Add a new dma_need_sync API Add a new API to check if calls to dma_sync_single_for_{device,cpu} are required for a given DMA streaming mapping. Signed-off-by: Christoph Hellwig Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200629130359.2690853-2-hch@lst.de --- include/linux/dma-direct.h | 1 + include/linux/dma-mapping.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 136f984df0d9..8b006730687b 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -87,4 +87,5 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); int dma_direct_supported(struct device *dev, u64 mask); +bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr); #endif /* _LINUX_DMA_DIRECT_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 78f677cf45ab..a33ed3954ed4 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -461,6 +461,7 @@ int dma_set_mask(struct device *dev, u64 mask); int dma_set_coherent_mask(struct device *dev, u64 mask); u64 dma_get_required_mask(struct device *dev); size_t dma_max_mapping_size(struct device *dev); +bool dma_need_sync(struct device *dev, dma_addr_t dma_addr); unsigned long dma_get_merge_boundary(struct device *dev); #else /* CONFIG_HAS_DMA */ static inline dma_addr_t dma_map_page_attrs(struct device *dev, @@ -571,6 +572,10 @@ static inline size_t dma_max_mapping_size(struct device *dev) { return 0; } +static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) +{ + return false; +} static inline unsigned long dma_get_merge_boundary(struct device *dev) { return 0; -- cgit v1.2.3 From 4ac2add65974e4efafb8d4ccd8fc5660417ea312 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 29 Jun 2020 10:56:26 +0100 Subject: bpf: flow_dissector: Check value of unused flags to BPF_PROG_DETACH Using BPF_PROG_DETACH on a flow dissector program supports neither attach_flags nor attach_bpf_fd. Yet no value is enforced for them. Enforce that attach_flags are zero, and require the current program to be passed via attach_bpf_fd. This allows us to remove the check for CAP_SYS_ADMIN, since userspace can now no longer remove arbitrary flow dissector programs. Fixes: b27f7bb590ba ("flow_dissector: Move out netns_bpf prog callbacks") Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200629095630.7933-3-lmb@cloudflare.com --- include/linux/bpf-netns.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-netns.h b/include/linux/bpf-netns.h index 4052d649f36d..47d5b0c708c9 100644 --- a/include/linux/bpf-netns.h +++ b/include/linux/bpf-netns.h @@ -33,7 +33,7 @@ int netns_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); -int netns_bpf_prog_detach(const union bpf_attr *attr); +int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int netns_bpf_link_create(const union bpf_attr *attr, struct bpf_prog *prog); #else @@ -49,7 +49,8 @@ static inline int netns_bpf_prog_attach(const union bpf_attr *attr, return -EOPNOTSUPP; } -static inline int netns_bpf_prog_detach(const union bpf_attr *attr) +static inline int netns_bpf_prog_detach(const union bpf_attr *attr, + enum bpf_prog_type ptype) { return -EOPNOTSUPP; } -- cgit v1.2.3 From bb0de3131f4c60a9bf976681e0fe4d1e55c7a821 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Mon, 29 Jun 2020 10:56:28 +0100 Subject: bpf: sockmap: Require attach_bpf_fd when detaching a program The sockmap code currently ignores the value of attach_bpf_fd when detaching a program. This is contrary to the usual behaviour of checking that attach_bpf_fd represents the currently attached program. Ensure that attach_bpf_fd is indeed the currently attached program. It turns out that all sockmap selftests already do this, which indicates that this is unlikely to cause breakage. Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: Lorenz Bauer Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200629095630.7933-5-lmb@cloudflare.com --- include/linux/bpf.h | 13 +++++++++++-- include/linux/skmsg.h | 13 +++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 07052d44bca1..9750a1902ee5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1543,13 +1543,16 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map) #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ #if defined(CONFIG_BPF_STREAM_PARSER) -int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which); +int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, + struct bpf_prog *old, u32 which); int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); +int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); void sock_map_unhash(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); #else static inline int sock_map_prog_update(struct bpf_map *map, - struct bpf_prog *prog, u32 which) + struct bpf_prog *prog, + struct bpf_prog *old, u32 which) { return -EOPNOTSUPP; } @@ -1559,6 +1562,12 @@ static inline int sock_map_get_from_fd(const union bpf_attr *attr, { return -EINVAL; } + +static inline int sock_map_prog_detach(const union bpf_attr *attr, + enum bpf_prog_type ptype) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_BPF_STREAM_PARSER */ #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 08674cd14d5a..1e9ed840b9fc 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -430,6 +430,19 @@ static inline void psock_set_prog(struct bpf_prog **pprog, bpf_prog_put(prog); } +static inline int psock_replace_prog(struct bpf_prog **pprog, + struct bpf_prog *prog, + struct bpf_prog *old) +{ + if (cmpxchg(pprog, old, prog) != old) + return -ENOENT; + + if (old) + bpf_prog_put(old); + + return 0; +} + static inline void psock_progs_drop(struct sk_psock_progs *progs) { psock_set_prog(&progs->msg_parser, NULL); -- cgit v1.2.3 From e91b48162332480f5840902268108bb7fb7a44c7 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 30 Jun 2020 17:32:54 +0200 Subject: task_work: teach task_work_add() to do signal_wake_up() So that the target task will exit the wait_event_interruptible-like loop and call task_work_run() asap. The patch turns "bool notify" into 0,TWA_RESUME,TWA_SIGNAL enum, the new TWA_SIGNAL flag implies signal_wake_up(). However, it needs to avoid the race with recalc_sigpending(), so the patch also adds the new JOBCTL_TASK_WORK bit included in JOBCTL_PENDING_MASK. TODO: once this patch is merged we need to change all current users of task_work_add(notify = true) to use TWA_RESUME. Cc: stable@vger.kernel.org # v5.7 Acked-by: Peter Zijlstra (Intel) Signed-off-by: Oleg Nesterov Signed-off-by: Jens Axboe --- include/linux/sched/jobctl.h | 4 +++- include/linux/task_work.h | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/jobctl.h b/include/linux/sched/jobctl.h index fa067de9f1a9..d2b4204ba4d3 100644 --- a/include/linux/sched/jobctl.h +++ b/include/linux/sched/jobctl.h @@ -19,6 +19,7 @@ struct task_struct; #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ #define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */ +#define JOBCTL_TASK_WORK_BIT 24 /* set by TWA_SIGNAL */ #define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) #define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) @@ -28,9 +29,10 @@ struct task_struct; #define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) #define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) #define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT) +#define JOBCTL_TASK_WORK (1UL << JOBCTL_TASK_WORK_BIT) #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) -#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) +#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK | JOBCTL_TASK_WORK) extern bool task_set_jobctl_pending(struct task_struct *task, unsigned long mask); extern void task_clear_jobctl_trapping(struct task_struct *task); diff --git a/include/linux/task_work.h b/include/linux/task_work.h index bd9a6a91c097..0fb93aafa478 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -13,7 +13,10 @@ init_task_work(struct callback_head *twork, task_work_func_t func) twork->func = func; } -int task_work_add(struct task_struct *task, struct callback_head *twork, bool); +#define TWA_RESUME 1 +#define TWA_SIGNAL 2 +int task_work_add(struct task_struct *task, struct callback_head *twork, int); + struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t); void task_work_run(void); -- cgit v1.2.3 From c463bb2a8f8d7d97aa414bf7714fc77e9d3b10df Mon Sep 17 00:00:00 2001 From: Merlijn Wajer Date: Tue, 30 Jun 2020 11:47:04 -0700 Subject: Input: add `SW_MACHINE_COVER` This event code represents the state of a removable cover of a device. Value 0 means that the cover is open or removed, value 1 means that the cover is closed. Reviewed-by: Sebastian Reichel Acked-by: Tony Lindgren Signed-off-by: Merlijn Wajer Link: https://lore.kernel.org/r/20200612125402.18393-2-merlijn@wizzup.org Signed-off-by: Dmitry Torokhov --- include/linux/mod_devicetable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index e3596db077dc..04a19e30b168 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -318,7 +318,7 @@ struct pcmcia_device_id { #define INPUT_DEVICE_ID_LED_MAX 0x0f #define INPUT_DEVICE_ID_SND_MAX 0x07 #define INPUT_DEVICE_ID_FF_MAX 0x7f -#define INPUT_DEVICE_ID_SW_MAX 0x0f +#define INPUT_DEVICE_ID_SW_MAX 0x10 #define INPUT_DEVICE_ID_PROP_MAX 0x1f #define INPUT_DEVICE_ID_MATCH_BUS 1 -- cgit v1.2.3 From 5396956cc7c6874180c9bfc1ceceb02b739a6a87 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 22 Jun 2020 19:12:48 +0300 Subject: PCI: Make pcie_find_root_port() work for Root Ports Commit 6ae72bfa656e ("PCI: Unify pcie_find_root_port() and pci_find_pcie_root_port()") broke acpi_pci_bridge_d3() because calling pcie_find_root_port() on a Root Port returned NULL when it should return the Root Port, which in turn broke power management of PCIe hierarchies. Rework pcie_find_root_port() so it returns its argument when it is already a Root Port. [bhelgaas: test device only once, test for PCIe] Fixes: 6ae72bfa656e ("PCI: Unify pcie_find_root_port() and pci_find_pcie_root_port()") Link: https://lore.kernel.org/r/20200622161248.51099-1-mika.westerberg@linux.intel.com Signed-off-by: Mika Westerberg Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index c79d83304e52..34c1c4f45288 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2169,12 +2169,11 @@ static inline int pci_pcie_type(const struct pci_dev *dev) */ static inline struct pci_dev *pcie_find_root_port(struct pci_dev *dev) { - struct pci_dev *bridge = pci_upstream_bridge(dev); - - while (bridge) { - if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) - return bridge; - bridge = pci_upstream_bridge(bridge); + while (dev) { + if (pci_is_pcie(dev) && + pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) + return dev; + dev = pci_upstream_bridge(dev); } return NULL; -- cgit v1.2.3 From d7bf2ebebc2bd61ab95e2a8e33541ef282f303d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 3 Jul 2020 22:26:43 +0200 Subject: sched: consistently handle layer3 header accesses in the presence of VLANs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are a couple of places in net/sched/ that check skb->protocol and act on the value there. However, in the presence of VLAN tags, the value stored in skb->protocol can be inconsistent based on whether VLAN acceleration is enabled. The commit quoted in the Fixes tag below fixed the users of skb->protocol to use a helper that will always see the VLAN ethertype. However, most of the callers don't actually handle the VLAN ethertype, but expect to find the IP header type in the protocol field. This means that things like changing the ECN field, or parsing diffserv values, stops working if there's a VLAN tag, or if there are multiple nested VLAN tags (QinQ). To fix this, change the helper to take an argument that indicates whether the caller wants to skip the VLAN tags or not. When skipping VLAN tags, we make sure to skip all of them, so behaviour is consistent even in QinQ mode. To make the helper usable from the ECN code, move it to if_vlan.h instead of pkt_sched.h. v3: - Remove empty lines - Move vlan variable definitions inside loop in skb_protocol() - Also use skb_protocol() helper in IP{,6}_ECN_decapsulate() and bpf_skb_ecn_set_ce() v2: - Use eth_type_vlan() helper in skb_protocol() - Also fix code that reads skb->protocol directly - Change a couple of 'if/else if' statements to switch constructs to avoid calling the helper twice Reported-by: Ilya Ponetayev Fixes: d8b9605d2697 ("net: sched: fix skb->protocol use in case of accelerated vlan path") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index b05e855f1ddd..427a5b8597c2 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -308,6 +308,34 @@ static inline bool eth_type_vlan(__be16 ethertype) } } +/* A getter for the SKB protocol field which will handle VLAN tags consistently + * whether VLAN acceleration is enabled or not. + */ +static inline __be16 skb_protocol(const struct sk_buff *skb, bool skip_vlan) +{ + unsigned int offset = skb_mac_offset(skb) + sizeof(struct ethhdr); + __be16 proto = skb->protocol; + + if (!skip_vlan) + /* VLAN acceleration strips the VLAN header from the skb and + * moves it to skb->vlan_proto + */ + return skb_vlan_tag_present(skb) ? skb->vlan_proto : proto; + + while (eth_type_vlan(proto)) { + struct vlan_hdr vhdr, *vh; + + vh = skb_header_pointer(skb, offset, sizeof(vhdr), &vhdr); + if (!vh) + break; + + proto = vh->h_vlan_encapsulated_proto; + offset += sizeof(vhdr); + } + + return proto; +} + static inline bool vlan_hw_offload_capable(netdev_features_t features, __be16 proto) { -- cgit v1.2.3 From 68d237056e007c88031d80900cdba0945121a287 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 30 Jun 2020 10:16:02 +0200 Subject: scatterlist: protect parameters of the sg_table related macros Add brackets to protect parameters of the recently added sg_table related macros from side-effects. Fixes: 709d6d73c756 ("scatterlist: add generic wrappers for iterating over sgtable objects") Signed-off-by: Marek Szyprowski Signed-off-by: Christoph Hellwig --- include/linux/scatterlist.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 4f922afb607a..45cf7b69d852 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -155,7 +155,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf, * Loop over each sg element in the given sg_table object. */ #define for_each_sgtable_sg(sgt, sg, i) \ - for_each_sg(sgt->sgl, sg, sgt->orig_nents, i) + for_each_sg((sgt)->sgl, sg, (sgt)->orig_nents, i) /* * Loop over each sg element in the given *DMA mapped* sg_table object. @@ -163,7 +163,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf, * of the each element. */ #define for_each_sgtable_dma_sg(sgt, sg, i) \ - for_each_sg(sgt->sgl, sg, sgt->nents, i) + for_each_sg((sgt)->sgl, sg, (sgt)->nents, i) /** * sg_chain - Chain two sglists together @@ -451,7 +451,7 @@ sg_page_iter_dma_address(struct sg_dma_page_iter *dma_iter) * See also for_each_sg_page(). In each loop it operates on PAGE_SIZE unit. */ #define for_each_sgtable_page(sgt, piter, pgoffset) \ - for_each_sg_page(sgt->sgl, piter, sgt->orig_nents, pgoffset) + for_each_sg_page((sgt)->sgl, piter, (sgt)->orig_nents, pgoffset) /** * for_each_sgtable_dma_page - iterate over the DMA mapped sg_table object @@ -465,7 +465,7 @@ sg_page_iter_dma_address(struct sg_dma_page_iter *dma_iter) * unit. */ #define for_each_sgtable_dma_page(sgt, dma_iter, pgoffset) \ - for_each_sg_dma_page(sgt->sgl, dma_iter, sgt->nents, pgoffset) + for_each_sg_dma_page((sgt)->sgl, dma_iter, (sgt)->nents, pgoffset) /* -- cgit v1.2.3 From ad0f75e5f57ccbceec13274e1e242f2b5a6397ed Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 2 Jul 2020 11:52:56 -0700 Subject: cgroup: fix cgroup_sk_alloc() for sk_clone_lock() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we clone a socket in sk_clone_lock(), its sk_cgrp_data is copied, so the cgroup refcnt must be taken too. And, unlike the sk_alloc() path, sock_update_netprioidx() is not called here. Therefore, it is safe and necessary to grab the cgroup refcnt even when cgroup_sk_alloc is disabled. sk_clone_lock() is in BH context anyway, the in_interrupt() would terminate this function if called there. And for sk_alloc() skcd->val is always zero. So it's safe to factor out the code to make it more readable. The global variable 'cgroup_sk_alloc_disabled' is used to determine whether to take these reference counts. It is impossible to make the reference counting correct unless we save this bit of information in skcd->val. So, add a new bit there to record whether the socket has already taken the reference counts. This obviously relies on kmalloc() to align cgroup pointers to at least 4 bytes, ARCH_KMALLOC_MINALIGN is certainly larger than that. This bug seems to be introduced since the beginning, commit d979a39d7242 ("cgroup: duplicate cgroup reference when cloning sockets") tried to fix it but not compeletely. It seems not easy to trigger until the recent commit 090e28b229af ("netprio_cgroup: Fix unlimited memory leak of v2 cgroups") was merged. Fixes: bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup") Reported-by: Cameron Berkenpas Reported-by: Peter Geis Reported-by: Lu Fengqi Reported-by: Daniël Sonck Reported-by: Zhang Qiang Tested-by: Cameron Berkenpas Tested-by: Peter Geis Tested-by: Thomas Lamprecht Cc: Daniel Borkmann Cc: Zefan Li Cc: Tejun Heo Cc: Roman Gushchin Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/cgroup-defs.h | 6 ++++-- include/linux/cgroup.h | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 52661155f85f..4f1cd0edc57d 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -790,7 +790,8 @@ struct sock_cgroup_data { union { #ifdef __LITTLE_ENDIAN struct { - u8 is_data; + u8 is_data : 1; + u8 no_refcnt : 1; u8 padding; u16 prioidx; u32 classid; @@ -800,7 +801,8 @@ struct sock_cgroup_data { u32 classid; u16 prioidx; u8 padding; - u8 is_data; + u8 no_refcnt : 1; + u8 is_data : 1; } __packed; #endif u64 val; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 4598e4da6b1b..618838c48313 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -822,6 +822,7 @@ extern spinlock_t cgroup_sk_update_lock; void cgroup_sk_alloc_disable(void); void cgroup_sk_alloc(struct sock_cgroup_data *skcd); +void cgroup_sk_clone(struct sock_cgroup_data *skcd); void cgroup_sk_free(struct sock_cgroup_data *skcd); static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) @@ -835,7 +836,7 @@ static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) */ v = READ_ONCE(skcd->val); - if (v & 1) + if (v & 3) return &cgrp_dfl_root.cgrp; return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp; @@ -847,6 +848,7 @@ static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd) #else /* CONFIG_CGROUP_DATA */ static inline void cgroup_sk_alloc(struct sock_cgroup_data *skcd) {} +static inline void cgroup_sk_clone(struct sock_cgroup_data *skcd) {} static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {} #endif /* CONFIG_CGROUP_DATA */ -- cgit v1.2.3 From 41da51bce36f44eefc1e3d0f47d18841cbd065ba Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 21 Nov 2019 23:25:07 +0000 Subject: fs: Add IOCB_NOIO flag for generic_file_read_iter Add an IOCB_NOIO flag that indicates to generic_file_read_iter that it shouldn't trigger any filesystem I/O for the actual request or for readahead. This allows to do tentative reads out of the page cache as some filesystems allow, and to take the appropriate locks and retry the reads only if the requested pages are not cached. Signed-off-by: Andreas Gruenbacher --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3f881a892ea7..4b7cb76e5837 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -315,6 +315,7 @@ enum rw_hint { #define IOCB_SYNC (1 << 5) #define IOCB_WRITE (1 << 6) #define IOCB_NOWAIT (1 << 7) +#define IOCB_NOIO (1 << 9) struct kiocb { struct file *ki_filp; -- cgit v1.2.3 From 469aceddfa3ed16e17ee30533fae45e90f62efd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 7 Jul 2020 13:03:25 +0200 Subject: vlan: consolidate VLAN parsing code and limit max parsing depth MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Toshiaki pointed out that we now have two very similar functions to extract the L3 protocol number in the presence of VLAN tags. And Daniel pointed out that the unbounded parsing loop makes it possible for maliciously crafted packets to loop through potentially hundreds of tags. Fix both of these issues by consolidating the two parsing functions and limiting the VLAN tag parsing to a max depth of 8 tags. As part of this, switch over __vlan_get_protocol() to use skb_header_pointer() instead of pskb_may_pull(), to avoid the possible side effects of the latter and keep the skb pointer 'const' through all the parsing functions. v2: - Use limit of 8 tags instead of 32 (matching XMIT_RECURSION_LIMIT) Reported-by: Toshiaki Makita Reported-by: Daniel Borkmann Fixes: d7bf2ebebc2b ("sched: consistently handle layer3 header accesses in the presence of VLANs") Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 57 +++++++++++++++++++------------------------------ 1 file changed, 22 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 427a5b8597c2..41a518336673 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -25,6 +25,8 @@ #define VLAN_ETH_DATA_LEN 1500 /* Max. octets in payload */ #define VLAN_ETH_FRAME_LEN 1518 /* Max. octets in frame sans FCS */ +#define VLAN_MAX_DEPTH 8 /* Max. number of nested VLAN tags parsed */ + /* * struct vlan_hdr - vlan header * @h_vlan_TCI: priority and VLAN ID @@ -308,34 +310,6 @@ static inline bool eth_type_vlan(__be16 ethertype) } } -/* A getter for the SKB protocol field which will handle VLAN tags consistently - * whether VLAN acceleration is enabled or not. - */ -static inline __be16 skb_protocol(const struct sk_buff *skb, bool skip_vlan) -{ - unsigned int offset = skb_mac_offset(skb) + sizeof(struct ethhdr); - __be16 proto = skb->protocol; - - if (!skip_vlan) - /* VLAN acceleration strips the VLAN header from the skb and - * moves it to skb->vlan_proto - */ - return skb_vlan_tag_present(skb) ? skb->vlan_proto : proto; - - while (eth_type_vlan(proto)) { - struct vlan_hdr vhdr, *vh; - - vh = skb_header_pointer(skb, offset, sizeof(vhdr), &vhdr); - if (!vh) - break; - - proto = vh->h_vlan_encapsulated_proto; - offset += sizeof(vhdr); - } - - return proto; -} - static inline bool vlan_hw_offload_capable(netdev_features_t features, __be16 proto) { @@ -605,10 +579,10 @@ static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) * Returns the EtherType of the packet, regardless of whether it is * vlan encapsulated (normal or hardware accelerated) or not. */ -static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, +static inline __be16 __vlan_get_protocol(const struct sk_buff *skb, __be16 type, int *depth) { - unsigned int vlan_depth = skb->mac_len; + unsigned int vlan_depth = skb->mac_len, parse_depth = VLAN_MAX_DEPTH; /* if type is 802.1Q/AD then the header should already be * present at mac_len - VLAN_HLEN (if mac_len > 0), or at @@ -623,13 +597,12 @@ static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, vlan_depth = ETH_HLEN; } do { - struct vlan_hdr *vh; + struct vlan_hdr vhdr, *vh; - if (unlikely(!pskb_may_pull(skb, - vlan_depth + VLAN_HLEN))) + vh = skb_header_pointer(skb, vlan_depth, sizeof(vhdr), &vhdr); + if (unlikely(!vh || !--parse_depth)) return 0; - vh = (struct vlan_hdr *)(skb->data + vlan_depth); type = vh->h_vlan_encapsulated_proto; vlan_depth += VLAN_HLEN; } while (eth_type_vlan(type)); @@ -648,11 +621,25 @@ static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, * Returns the EtherType of the packet, regardless of whether it is * vlan encapsulated (normal or hardware accelerated) or not. */ -static inline __be16 vlan_get_protocol(struct sk_buff *skb) +static inline __be16 vlan_get_protocol(const struct sk_buff *skb) { return __vlan_get_protocol(skb, skb->protocol, NULL); } +/* A getter for the SKB protocol field which will handle VLAN tags consistently + * whether VLAN acceleration is enabled or not. + */ +static inline __be16 skb_protocol(const struct sk_buff *skb, bool skip_vlan) +{ + if (!skip_vlan) + /* VLAN acceleration strips the VLAN header from the skb and + * moves it to skb->vlan_proto + */ + return skb_vlan_tag_present(skb) ? skb->vlan_proto : skb->protocol; + + return vlan_get_protocol(skb); +} + static inline void vlan_set_encap_proto(struct sk_buff *skb, struct vlan_hdr *vhdr) { -- cgit v1.2.3 From 61a707c543e2afe3aa7e88f87267c5dafa4b5afa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 May 2020 08:54:16 +0200 Subject: fs: add a __kernel_read helper This is the counterpart to __kernel_write, and skip the rw_verify_area call compared to kernel_read. Signed-off-by: Christoph Hellwig --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3f881a892ea7..22cbe7b2e919 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3033,6 +3033,7 @@ extern int kernel_read_file_from_path_initns(const char *, void **, loff_t *, lo extern int kernel_read_file_from_fd(int, void **, loff_t *, loff_t, enum kernel_read_file_id); extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *); +ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos); extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *); extern ssize_t __kernel_write(struct file *, const void *, size_t, loff_t *); extern struct file * open_exec(const char *); -- cgit v1.2.3 From 775802c0571fb438cd4f6548a323f9e4cb89f5aa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 8 May 2020 11:17:46 +0200 Subject: fs: remove __vfs_read Fold it into the two callers. Signed-off-by: Christoph Hellwig --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 22cbe7b2e919..0c0ec76b600b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1917,7 +1917,6 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, struct iovec *fast_pointer, struct iovec **ret_pointer); -extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_readv(struct file *, const struct iovec __user *, -- cgit v1.2.3 From dbfb089d360b1cc623c51a2c7cf9b99eff78e0e7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 3 Jul 2020 12:40:33 +0200 Subject: sched: Fix loadavg accounting race The recent commit: c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") moved these lines in ttwu(): p->sched_contributes_to_load = !!task_contributes_to_load(p); p->state = TASK_WAKING; up before: smp_cond_load_acquire(&p->on_cpu, !VAL); into the 'p->on_rq == 0' block, with the thinking that once we hit schedule() the current task cannot change it's ->state anymore. And while this is true, it is both incorrect and flawed. It is incorrect in that we need at least an ACQUIRE on 'p->on_rq == 0' to avoid weak hardware from re-ordering things for us. This can fairly easily be achieved by relying on the control-dependency already in place. The second problem, which makes the flaw in the original argument, is that while schedule() will not change prev->state, it will read it a number of times (arguably too many times since it's marked volatile). The previous condition 'p->on_cpu == 0' was sufficient because that indicates schedule() has completed, and will no longer read prev->state. So now the trick is to make this same true for the (much) earlier 'prev->on_rq == 0' case. Furthermore, in order to make the ordering stick, the 'prev->on_rq = 0' assignment needs to he a RELEASE, but adding additional ordering to schedule() is an unwelcome proposition at the best of times, doubly so for mere accounting. Luckily we can push the prev->state load up before rq->lock, with the only caveat that we then have to re-read the state after. However, we know that if it changed, we no longer have to worry about the blocking path. This gives us the required ordering, if we block, we did the prev->state load before an (effective) smp_mb() and the p->on_rq store needs not change. With this we end up with the effective ordering: LOAD p->state LOAD-ACQUIRE p->on_rq == 0 MB STORE p->on_rq, 0 STORE p->state, TASK_WAKING which ensures the TASK_WAKING store happens after the prev->state load, and all is well again. Fixes: c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") Reported-by: Dave Jones Reported-by: Paul Gortmaker Signed-off-by: Peter Zijlstra (Intel) Tested-by: Dave Jones Tested-by: Paul Gortmaker Link: https://lkml.kernel.org/r/20200707102957.GN117543@hirez.programming.kicks-ass.net --- include/linux/sched.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 692e327d7455..683372943093 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -114,10 +114,6 @@ struct task_group; #define task_is_stopped_or_traced(task) ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) -#define task_contributes_to_load(task) ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ - (task->flags & PF_FROZEN) == 0 && \ - (task->state & TASK_NOLOAD) == 0) - #ifdef CONFIG_DEBUG_ATOMIC_SLEEP /* -- cgit v1.2.3 From 6ec4476ac82512f09c94aff5972654b70f3772b2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 8 Jul 2020 10:48:35 -0700 Subject: Raise gcc version requirement to 4.9 I realize that we fairly recently raised it to 4.8, but the fact is, 4.9 is a much better minimum version to target. We have a number of workarounds for actual bugs in pre-4.9 gcc versions (including things like internal compiler errors on ARM), but we also have some syntactic workarounds for lacking features. In particular, raising the minimum to 4.9 means that we can now just assume _Generic() exists, which is likely the much better replacement for a lot of very convoluted built-time magic with conditionals on sizeof and/or __builtin_choose_expr() with same_type() etc. Using _Generic also means that you will need to have a very recent version of 'sparse', but thats easy to build yourself, and much less of a hassle than some old gcc version can be. The latest (in a long string) of reasons for minimum compiler version upgrades was commit 5435f73d5c4a ("efi/x86: Fix build with gcc 4"). Ard points out that RHEL 7 uses gcc-4.8, but the people who stay back on old RHEL versions persumably also don't build their own kernels anyway. And maybe they should cross-built or just have a little side affair with a newer compiler? Acked-by: Ard Biesheuvel Acked-by: Peter Zijlstra Signed-off-by: Linus Torvalds --- include/linux/bits.h | 3 +-- include/linux/compiler-gcc.h | 2 +- include/linux/compiler_types.h | 27 +-------------------------- 3 files changed, 3 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bits.h b/include/linux/bits.h index 4671fbf28842..7f475d59a097 100644 --- a/include/linux/bits.h +++ b/include/linux/bits.h @@ -18,8 +18,7 @@ * position @h. For example * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ -#if !defined(__ASSEMBLY__) && \ - (!defined(CONFIG_CC_IS_GCC) || CONFIG_GCC_VERSION >= 49000) +#if !defined(__ASSEMBLY__) #include #define GENMASK_INPUT_CHECK(h, l) \ (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 1c74464c80c6..0b1dc61f3955 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -11,7 +11,7 @@ + __GNUC_PATCHLEVEL__) /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 */ -#if GCC_VERSION < 40800 +#if GCC_VERSION < 40900 # error Sorry, your compiler is too old - please upgrade it. #endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index c3bf7710f69a..01dd58c74d80 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -252,32 +252,8 @@ struct ftrace_likely_data { * __unqual_scalar_typeof(x) - Declare an unqualified scalar type, leaving * non-scalar types unchanged. */ -#if (defined(CONFIG_CC_IS_GCC) && CONFIG_GCC_VERSION < 40900) || defined(__CHECKER__) /* - * We build this out of a couple of helper macros in a vain attempt to - * help you keep your lunch down while reading it. - */ -#define __pick_scalar_type(x, type, otherwise) \ - __builtin_choose_expr(__same_type(x, type), (type)0, otherwise) - -/* - * 'char' is not type-compatible with either 'signed char' or 'unsigned char', - * so we include the naked type here as well as the signed/unsigned variants. - */ -#define __pick_integer_type(x, type, otherwise) \ - __pick_scalar_type(x, type, \ - __pick_scalar_type(x, unsigned type, \ - __pick_scalar_type(x, signed type, otherwise))) - -#define __unqual_scalar_typeof(x) typeof( \ - __pick_integer_type(x, char, \ - __pick_integer_type(x, short, \ - __pick_integer_type(x, int, \ - __pick_integer_type(x, long, \ - __pick_integer_type(x, long long, x)))))) -#else -/* - * If supported, prefer C11 _Generic for better compile-times. As above, 'char' + * Prefer C11 _Generic for better compile-times and simpler code. Note: 'char' * is not type-compatible with 'signed char', and we define a separate case. */ #define __scalar_type_to_expr_cases(type) \ @@ -293,7 +269,6 @@ struct ftrace_likely_data { __scalar_type_to_expr_cases(long), \ __scalar_type_to_expr_cases(long long), \ default: (x))) -#endif /* Is this type a native word size -- useful for atomic operations */ #define __native_word(t) \ -- cgit v1.2.3 From 160251842cd35a75edfb0a1d76afa3eb674ff40a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 2 Jul 2020 11:49:23 -0700 Subject: kallsyms: Refactor kallsyms_show_value() to take cred In order to perform future tests against the cred saved during open(), switch kallsyms_show_value() to operate on a cred, and have all current callers pass current_cred(). This makes it very obvious where callers are checking the wrong credential in their "read" contexts. These will be fixed in the coming patches. Additionally switch return value to bool, since it is always used as a direct permission check, not a 0-on-success, negative-on-error style function return. Cc: stable@vger.kernel.org Signed-off-by: Kees Cook --- include/linux/filter.h | 2 +- include/linux/kallsyms.h | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 259377723603..55104f6c78e8 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -889,7 +889,7 @@ static inline bool bpf_dump_raw_ok(void) /* Reconstruction of call-sites is dependent on kallsyms, * thus make dump the same restriction. */ - return kallsyms_show_value() == 1; + return kallsyms_show_value(current_cred()); } struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 98338dc6b5d2..481273f0c72d 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -18,6 +18,7 @@ #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + (KSYM_NAME_LEN - 1) + \ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + 1) +struct cred; struct module; static inline int is_kernel_inittext(unsigned long addr) @@ -98,7 +99,7 @@ int lookup_symbol_name(unsigned long addr, char *symname); int lookup_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name); /* How and when do we show kallsyms values? */ -extern int kallsyms_show_value(void); +extern bool kallsyms_show_value(const struct cred *cred); #else /* !CONFIG_KALLSYMS */ @@ -158,7 +159,7 @@ static inline int lookup_symbol_attrs(unsigned long addr, unsigned long *size, u return -ERANGE; } -static inline int kallsyms_show_value(void) +static inline bool kallsyms_show_value(const struct cred *cred) { return false; } -- cgit v1.2.3 From 63960260457a02af2a6cb35d75e6bdb17299c882 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 2 Jul 2020 15:45:23 -0700 Subject: bpf: Check correct cred for CAP_SYSLOG in bpf_dump_raw_ok() When evaluating access control over kallsyms visibility, credentials at open() time need to be used, not the "current" creds (though in BPF's case, this has likely always been the same). Plumb access to associated file->f_cred down through bpf_dump_raw_ok() and its callers now that kallsysm_show_value() has been refactored to take struct cred. Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: bpf@vger.kernel.org Cc: stable@vger.kernel.org Fixes: 7105e828c087 ("bpf: allow for correlation of maps and helpers in dump") Signed-off-by: Kees Cook --- include/linux/filter.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 55104f6c78e8..0b0144752d78 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -884,12 +884,12 @@ void bpf_jit_compile(struct bpf_prog *prog); bool bpf_jit_needs_zext(void); bool bpf_helper_changes_pkt_data(void *func); -static inline bool bpf_dump_raw_ok(void) +static inline bool bpf_dump_raw_ok(const struct cred *cred) { /* Reconstruction of call-sites is dependent on kallsyms, * thus make dump the same restriction. */ - return kallsyms_show_value(current_cred()); + return kallsyms_show_value(cred); } struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, -- cgit v1.2.3 From a50ca29523b18baea548bdf5df9b4b923c2bb4f6 Mon Sep 17 00:00:00 2001 From: Dave Wang Date: Wed, 8 Jul 2020 22:25:03 -0700 Subject: Input: elan_i2c - add more hardware ID for Lenovo laptops This adds more hardware IDs for Elan touchpads found in various Lenovo laptops. Signed-off-by: Dave Wang Link: https://lore.kernel.org/r/000201d5a8bd$9fead3f0$dfc07bd0$@emc.com.tw Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- include/linux/input/elan-i2c-ids.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/input/elan-i2c-ids.h b/include/linux/input/elan-i2c-ids.h index 1ecb6b45812c..520858d12680 100644 --- a/include/linux/input/elan-i2c-ids.h +++ b/include/linux/input/elan-i2c-ids.h @@ -67,8 +67,15 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN062B", 0 }, { "ELAN062C", 0 }, { "ELAN062D", 0 }, + { "ELAN062E", 0 }, /* Lenovo V340 Whiskey Lake U */ + { "ELAN062F", 0 }, /* Lenovo V340 Comet Lake U */ { "ELAN0631", 0 }, { "ELAN0632", 0 }, + { "ELAN0633", 0 }, /* Lenovo S145 */ + { "ELAN0634", 0 }, /* Lenovo V340 Ice lake */ + { "ELAN0635", 0 }, /* Lenovo V1415-IIL */ + { "ELAN0636", 0 }, /* Lenovo V1415-Dali */ + { "ELAN0637", 0 }, /* Lenovo V1415-IGLR */ { "ELAN1000", 0 }, { } }; -- cgit v1.2.3 From 14b032b8f8fce03a546dcf365454bec8c4a58d7d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 9 Jul 2020 16:28:44 -0700 Subject: cgroup: Fix sock_cgroup_data on big-endian. In order for no_refcnt and is_data to be the lowest order two bits in the 'val' we have to pad out the bitfield of the u8. Fixes: ad0f75e5f57c ("cgroup: fix cgroup_sk_alloc() for sk_clone_lock()") Reported-by: Guenter Roeck Signed-off-by: David S. Miller --- include/linux/cgroup-defs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 4f1cd0edc57d..fee0b5547cd0 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -792,6 +792,7 @@ struct sock_cgroup_data { struct { u8 is_data : 1; u8 no_refcnt : 1; + u8 unused : 6; u8 padding; u16 prioidx; u32 classid; @@ -801,6 +802,7 @@ struct sock_cgroup_data { u32 classid; u16 prioidx; u8 padding; + u8 unused : 6; u8 no_refcnt : 1; u8 is_data : 1; } __packed; -- cgit v1.2.3 From 88b3d5c90e9685be54dd5bc441970044020eca76 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 22 Jun 2020 09:03:31 +0300 Subject: net/mlx5e: Fix port buffers cell size value Device unit for port buffers size, xoff_threshold and xon_threshold is cells. Fix a bug in driver where cell unit size was hard-coded to 128 bytes. This hard-coded value is buggy, as it is wrong for some hardware versions. Driver to read cell size from SBCAM register and translate bytes to cell units accordingly. In order to fix the bug, this patch exposes SBCAM (Shared buffer capabilities mask) layout and defines. If SBCAM.cap_cell_size is valid, use it for all bytes to cells calculations. If not valid, fallback to 128. Cell size do not change on the fly per device. Instead of issuing SBCAM access reg command every time such translation is needed, cache it in mlx5e_dcbx as part of mlx5e_dcbnl_initialize(). Pass dcbx.port_buff_cell_sz as a param to every function that needs bytes to cells translation. While fixing the bug, move MLX5E_BUFFER_CELL_SHIFT macro to en_dcbnl.c, as it is only used by that file. Fixes: 0696d60853d5 ("net/mlx5e: Receive buffer configuration") Signed-off-by: Eran Ben Elisha Reviewed-by: Huy Nguyen Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + include/linux/mlx5/mlx5_ifc.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 13c0e4556eda..1e6ca716635a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -147,6 +147,7 @@ enum { MLX5_REG_MCDA = 0x9063, MLX5_REG_MCAM = 0x907f, MLX5_REG_MIRC = 0x9162, + MLX5_REG_SBCAM = 0xB01F, MLX5_REG_RESOURCE_DUMP = 0xC000, }; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ca1887dd0423..073b79eacc99 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9960,6 +9960,34 @@ struct mlx5_ifc_pptb_reg_bits { u8 untagged_buff[0x4]; }; +struct mlx5_ifc_sbcam_reg_bits { + u8 reserved_at_0[0x8]; + u8 feature_group[0x8]; + u8 reserved_at_10[0x8]; + u8 access_reg_group[0x8]; + + u8 reserved_at_20[0x20]; + + u8 sb_access_reg_cap_mask[4][0x20]; + + u8 reserved_at_c0[0x80]; + + u8 sb_feature_cap_mask[4][0x20]; + + u8 reserved_at_1c0[0x40]; + + u8 cap_total_buffer_size[0x20]; + + u8 cap_cell_size[0x10]; + u8 cap_max_pg_buffers[0x8]; + u8 cap_num_pool_supported[0x8]; + + u8 reserved_at_240[0x8]; + u8 cap_sbsr_stat_size[0x8]; + u8 cap_max_tclass_data[0x8]; + u8 cap_max_cpu_ingress_tclass_sb[0x8]; +}; + struct mlx5_ifc_pbmc_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; -- cgit v1.2.3 From 8c080d3a974ad471d8324825851044284f1886c9 Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Tue, 23 Jun 2020 13:36:42 +0800 Subject: kgdb: enable arch to support XML packet. The XML packet could be supported by required architecture if the architecture defines CONFIG_HAVE_ARCH_KGDB_QXFER_PKT and implement its own kgdb_arch_handle_qxfer_pkt(). Except for the kgdb_arch_handle_qxfer_pkt(), the architecture also needs to record the feature supported by gdb stub into the kgdb_arch_gdb_stub_feature, and these features will be reported to host gdb when gdb stub receives the qSupported packet. Signed-off-by: Vincent Chen Acked-by: Daniel Thompson Signed-off-by: Palmer Dabbelt --- include/linux/kgdb.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 529116b0cabe..0e4e3a80d58c 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -176,6 +176,17 @@ kgdb_arch_handle_exception(int vector, int signo, int err_code, char *remcom_out_buffer, struct pt_regs *regs); +/** + * kgdb_arch_handle_qxfer_pkt - Handle architecture specific GDB XML + * packets. + * @remcom_in_buffer: The buffer of the packet we have read. + * @remcom_out_buffer: The buffer of %BUFMAX bytes to write a packet into. + */ + +extern void +kgdb_arch_handle_qxfer_pkt(char *remcom_in_buffer, + char *remcom_out_buffer); + /** * kgdb_call_nmi_hook - Call kgdb_nmicallback() on the current CPU * @ignored: This parameter is only here to match the prototype. -- cgit v1.2.3 From def0aa218e6d42231540329e6f5741fdec9e7da4 Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Tue, 23 Jun 2020 13:37:25 +0800 Subject: kgdb: Move the extern declaration kgdb_has_hit_break() to generic kgdb.h Currently, only riscv kgdb.c uses the kgdb_has_hit_break() to identify the kgdb breakpoint. It causes other architectures will encounter the "no previous prototype" warnings if the compile option has W=1. Moving the declaration of extern kgdb_has_hit_break() from risc-v kgdb.h to generic kgdb.h to avoid generating these warnings. Signed-off-by: Vincent Chen Acked-by: Daniel Thompson Signed-off-by: Palmer Dabbelt --- include/linux/kgdb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 0e4e3a80d58c..477b8b7c908f 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -325,6 +325,7 @@ extern int kgdb_hex2mem(char *buf, char *mem, int count); extern int kgdb_isremovedbreak(unsigned long addr); extern void kgdb_schedule_breakpoint(void); +extern int kgdb_has_hit_break(unsigned long addr); extern int kgdb_handle_exception(int ex_vector, int signo, int err_code, -- cgit v1.2.3 From 6348dd291e3653534a9e28e6917569bc9967b35b Mon Sep 17 00:00:00 2001 From: Charan Teja Kalla Date: Fri, 19 Jun 2020 17:27:19 +0530 Subject: dmabuf: use spinlock to access dmabuf->name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There exists a sleep-while-atomic bug while accessing the dmabuf->name under mutex in the dmabuffs_dname(). This is caused from the SELinux permissions checks on a process where it tries to validate the inherited files from fork() by traversing them through iterate_fd() (which traverse files under spin_lock) and call match_file(security/selinux/hooks.c) where the permission checks happen. This audit information is logged using dump_common_audit_data() where it calls d_path() to get the file path name. If the file check happen on the dmabuf's fd, then it ends up in ->dmabuffs_dname() and use mutex to access dmabuf->name. The flow will be like below: flush_unauthorized_files() iterate_fd() spin_lock() --> Start of the atomic section. match_file() file_has_perm() avc_has_perm() avc_audit() slow_avc_audit() common_lsm_audit() dump_common_audit_data() audit_log_d_path() d_path() dmabuffs_dname() mutex_lock()--> Sleep while atomic. Call trace captured (on 4.19 kernels) is below: ___might_sleep+0x204/0x208 __might_sleep+0x50/0x88 __mutex_lock_common+0x5c/0x1068 __mutex_lock_common+0x5c/0x1068 mutex_lock_nested+0x40/0x50 dmabuffs_dname+0xa0/0x170 d_path+0x84/0x290 audit_log_d_path+0x74/0x130 common_lsm_audit+0x334/0x6e8 slow_avc_audit+0xb8/0xf8 avc_has_perm+0x154/0x218 file_has_perm+0x70/0x180 match_file+0x60/0x78 iterate_fd+0x128/0x168 selinux_bprm_committing_creds+0x178/0x248 security_bprm_committing_creds+0x30/0x48 install_exec_creds+0x1c/0x68 load_elf_binary+0x3a4/0x14e0 search_binary_handler+0xb0/0x1e0 So, use spinlock to access dmabuf->name to avoid sleep-while-atomic. Cc: [5.3+] Signed-off-by: Charan Teja Kalla Reviewed-by: Michael J. Ruhl Acked-by: Christian König [sumits: added comment to spinlock_t definition to avoid warning] Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/a83e7f0d-4e54-9848-4b58-e1acdbe06735@codeaurora.org --- include/linux/dma-buf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index ab0c156abee6..a2ca294eaebe 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -311,6 +311,7 @@ struct dma_buf { void *vmap_ptr; const char *exp_name; const char *name; + spinlock_t name_lock; /* spinlock to protect name access */ struct module *owner; struct list_head list_node; void *priv; -- cgit v1.2.3 From ec7bd78498f29680f536451fbdf9464e851273ed Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Wed, 1 Jul 2020 12:42:58 -0700 Subject: driver core: Rename dev_links_info.defer_sync to defer_hook The defer_sync field is used as a hook to add the device to the deferred_sync list. Rename it so that it's more meaningful for the next patch that'll also use this field as a hook to a deferred_fw_devlink list. Signed-off-by: Saravana Kannan Reviewed-by: Rafael J. Wysocki Tested-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200701194259.3337652-3-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 15460a5ac024..9bb2bc7bb8e3 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -433,7 +433,7 @@ enum dl_dev_state { * @suppliers: List of links to supplier devices. * @consumers: List of links to consumer devices. * @needs_suppliers: Hook to global list of devices waiting for suppliers. - * @defer_sync: Hook to global list of devices that have deferred sync_state. + * @defer_hook: Hook to global list of devices that have deferred sync_state. * @need_for_probe: If needs_suppliers is on a list, this indicates if the * suppliers are needed for probe or not. * @status: Driver status information. @@ -442,7 +442,7 @@ struct dev_links_info { struct list_head suppliers; struct list_head consumers; struct list_head needs_suppliers; - struct list_head defer_sync; + struct list_head defer_hook; bool need_for_probe; enum dl_dev_state status; }; -- cgit v1.2.3 From 2451e746478a6a6e981cfa66b62b791ca93b90c8 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Wed, 1 Jul 2020 12:42:59 -0700 Subject: driver core: Avoid deferred probe due to fw_devlink_pause/resume() With the earlier patch in this series, all devices that deferred probe due to fw_devlink_pause() will have their probes delayed till the deferred probe thread is kicked off during late_initcall. This will also affect all their consumers. This delayed probing in unnecessary. So this patch just keeps track of the devices that had their probe deferred due to fw_devlink_pause() and attempts to probe them once during fw_devlink_resume(). Fixes: 716a7a259690 ("driver core: fw_devlink: Add support for batching fwnode parsing") Signed-off-by: Saravana Kannan Tested-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200701194259.3337652-4-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 9bb2bc7bb8e3..5efed864b387 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -433,7 +433,8 @@ enum dl_dev_state { * @suppliers: List of links to supplier devices. * @consumers: List of links to consumer devices. * @needs_suppliers: Hook to global list of devices waiting for suppliers. - * @defer_hook: Hook to global list of devices that have deferred sync_state. + * @defer_hook: Hook to global list of devices that have deferred sync_state or + * deferred fw_devlink. * @need_for_probe: If needs_suppliers is on a list, this indicates if the * suppliers are needed for probe or not. * @status: Driver status information. -- cgit v1.2.3 From b330966f79fb4fdc49183f58db113303695a750f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 14 Jul 2020 14:45:41 +0200 Subject: fuse: reject options on reconfigure via fsconfig(2) Previous patch changed handling of remount/reconfigure to ignore all options, including those that are unknown to the fuse kernel fs. This was done for backward compatibility, but this likely only affects the old mount(2) API. The new fsconfig(2) based reconfiguration could possibly be improved. This would make the new API less of a drop in replacement for the old, OTOH this is a good chance to get rid of some weirdnesses in the old API. Several other behaviors might make sense: 1) unknown options are rejected, known options are ignored 2) unknown options are rejected, known options are rejected if the value is changed, allowed otherwise 3) all options are rejected Prior to the backward compatibility fix to ignore all options all known options were accepted (1), even if they change the value of a mount parameter; fuse_reconfigure() does not look at the config values set by fuse_parse_param(). To fix that we'd need to verify that the value provided is the same as set in the initial configuration (2). The major drawback is that this is much more complex than just rejecting all attempts at changing options (3); i.e. all options signify initial configuration values and don't make sense on reconfigure. This patch opts for (3) with the rationale that no mount options are reconfigurable in fuse. Signed-off-by: Miklos Szeredi --- include/linux/fs_context.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 5f24fcbfbfb4..37e1e8f7f08d 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -109,6 +109,7 @@ struct fs_context { enum fs_context_phase phase:8; /* The phase the context is in */ bool need_free:1; /* Need to call ops->free() */ bool global:1; /* Goes into &init_user_ns */ + bool oldapi:1; /* Coming from mount(2) */ }; struct fs_context_operations { -- cgit v1.2.3 From 567f6a6eba0c09e5f502e0290e57651befa8aacb Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Tue, 14 Jul 2020 14:39:25 +0200 Subject: dma-direct: provide function to check physical memory area validity dma_coherent_ok() checks if a physical memory area fits a device's DMA constraints. Signed-off-by: Nicolas Saenz Julienne Signed-off-by: Christoph Hellwig --- include/linux/dma-direct.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 5184735a0fe8..ab2e20cba951 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -69,6 +69,7 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size, u64 dma_direct_get_required_mask(struct device *dev); gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, u64 *phys_mask); +bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size); void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, -- cgit v1.2.3