From 8d50cdf8b8341770bc6367bce40c0c1bb0e1d5b3 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Thu, 19 May 2022 20:32:13 -0700 Subject: x86/speculation/mmio: Add sysfs reporting for Processor MMIO Stale Data Add the sysfs reporting file for Processor MMIO Stale Data vulnerability. It exposes the vulnerability and mitigation state similar to the existing files for the other hardware vulnerabilities. Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov --- include/linux/cpu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 54dc2f9a2d56..2c7477354744 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -65,6 +65,9 @@ extern ssize_t cpu_show_tsx_async_abort(struct device *dev, extern ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_mmio_stale_data(struct device *dev, + struct device_attribute *attr, + char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, -- cgit v1.2.3 From b6d9014a3335194590abdd2a2471ef5147a67645 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 30 May 2022 18:40:06 +0200 Subject: netfilter: nf_tables: delete flowtable hooks via transaction list Remove inactive bool field in nft_hook object that was introduced in abadb2f865d7 ("netfilter: nf_tables: delete devices from flowtable"). Move stale flowtable hooks to transaction list instead. Deleting twice the same device does not result in ENOENT. Fixes: abadb2f865d7 ("netfilter: nf_tables: delete devices from flowtable") Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 20af9d3557b9..279ae0fff7ad 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1090,7 +1090,6 @@ struct nft_stats { struct nft_hook { struct list_head list; - bool inactive; struct nf_hook_ops ops; struct rcu_head rcu; }; -- cgit v1.2.3 From a734510fa8b4e61e6a37176f0da01f4c55fa52de Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 25 May 2022 13:49:42 +0200 Subject: ata: libata: drop 'sas_last_tag' Unused now. Fixes: 4f1a22ee7b57 ("libata: Improve ATA queued command allocation") Cc: John Garry Signed-off-by: Hannes Reinecke Reviewed-by: John Garry Signed-off-by: Damien Le Moal --- include/linux/libata.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 732de9014626..0f2a59c9c735 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -822,7 +822,6 @@ struct ata_port { struct ata_queued_cmd qcmd[ATA_MAX_QUEUE + 1]; u64 qc_active; int nr_active_links; /* #links with active qcs */ - unsigned int sas_last_tag; /* track next tag hw expects */ struct ata_link link; /* host default link */ struct ata_link *slave_link; /* see ata_slave_link_init() */ -- cgit v1.2.3 From 2130a790ca49763f724ec45cf93b9dd765e2023e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 2 Jun 2022 15:05:26 +0200 Subject: kernel: add platform_has() infrastructure Add a simple infrastructure for setting, resetting and querying platform feature flags. Flags can be either global or architecture specific. Signed-off-by: Juergen Gross Reviewed-by: Oleksandr Tyshchenko Tested-by: Oleksandr Tyshchenko # Arm64 only Reviewed-by: Christoph Hellwig Acked-by: Borislav Petkov Signed-off-by: Juergen Gross --- include/asm-generic/Kbuild | 1 + include/asm-generic/platform-feature.h | 8 ++++++++ include/linux/platform-feature.h | 15 +++++++++++++++ 3 files changed, 24 insertions(+) create mode 100644 include/asm-generic/platform-feature.h create mode 100644 include/linux/platform-feature.h (limited to 'include') diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 302506bbc2a4..8e47d483b524 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -44,6 +44,7 @@ mandatory-y += msi.h mandatory-y += pci.h mandatory-y += percpu.h mandatory-y += pgalloc.h +mandatory-y += platform-feature.h mandatory-y += preempt.h mandatory-y += rwonce.h mandatory-y += sections.h diff --git a/include/asm-generic/platform-feature.h b/include/asm-generic/platform-feature.h new file mode 100644 index 000000000000..4b0af3d51588 --- /dev/null +++ b/include/asm-generic/platform-feature.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_GENERIC_PLATFORM_FEATURE_H +#define _ASM_GENERIC_PLATFORM_FEATURE_H + +/* Number of arch specific feature flags. */ +#define PLATFORM_ARCH_FEAT_N 0 + +#endif /* _ASM_GENERIC_PLATFORM_FEATURE_H */ diff --git a/include/linux/platform-feature.h b/include/linux/platform-feature.h new file mode 100644 index 000000000000..6ed859928b97 --- /dev/null +++ b/include/linux/platform-feature.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _PLATFORM_FEATURE_H +#define _PLATFORM_FEATURE_H + +#include +#include + +/* The platform features are starting with the architecture specific ones. */ +#define PLATFORM_FEAT_N (0 + PLATFORM_ARCH_FEAT_N) + +void platform_set(unsigned int feature); +void platform_clear(unsigned int feature); +bool platform_has(unsigned int feature); + +#endif /* _PLATFORM_FEATURE_H */ -- cgit v1.2.3 From 3f9dfbebdc48cebfbda738f6f3d1dbf6d7232f90 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 6 Jun 2022 08:09:16 +0200 Subject: virtio: replace arch_has_restricted_virtio_memory_access() Instead of using arch_has_restricted_virtio_memory_access() together with CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS, replace those with platform_has() and a new platform feature PLATFORM_VIRTIO_RESTRICTED_MEM_ACCESS. Signed-off-by: Juergen Gross Reviewed-by: Oleksandr Tyshchenko Tested-by: Oleksandr Tyshchenko # Arm64 only Reviewed-by: Christoph Hellwig Acked-by: Borislav Petkov --- include/linux/platform-feature.h | 6 +++++- include/linux/virtio_config.h | 9 --------- 2 files changed, 5 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/platform-feature.h b/include/linux/platform-feature.h index 6ed859928b97..b2f48be999fa 100644 --- a/include/linux/platform-feature.h +++ b/include/linux/platform-feature.h @@ -6,7 +6,11 @@ #include /* The platform features are starting with the architecture specific ones. */ -#define PLATFORM_FEAT_N (0 + PLATFORM_ARCH_FEAT_N) + +/* Used to enable platform specific DMA handling for virtio devices. */ +#define PLATFORM_VIRTIO_RESTRICTED_MEM_ACCESS (0 + PLATFORM_ARCH_FEAT_N) + +#define PLATFORM_FEAT_N (1 + PLATFORM_ARCH_FEAT_N) void platform_set(unsigned int feature); void platform_clear(unsigned int feature); diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 9a36051ceb76..49c7c32815f1 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -604,13 +604,4 @@ static inline void virtio_cwrite64(struct virtio_device *vdev, _r; \ }) -#ifdef CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS -int arch_has_restricted_virtio_memory_access(void); -#else -static inline int arch_has_restricted_virtio_memory_access(void) -{ - return 0; -} -#endif /* CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS */ - #endif /* _LINUX_VIRTIO_CONFIG_H */ -- cgit v1.2.3 From 9bf22421dc8a69cade3c994771637e9693ff0216 Mon Sep 17 00:00:00 2001 From: Oleksandr Tyshchenko Date: Thu, 2 Jun 2022 22:23:46 +0300 Subject: arm/xen: Introduce xen_setup_dma_ops() This patch introduces new helper and places it in new header. The helper's purpose is to assign any Xen specific DMA ops in a single place. For now, we deal with xen-swiotlb DMA ops only. The one of the subsequent commits in current series will add xen-grant DMA ops case. Also re-use the xen_swiotlb_detect() check on Arm32. Signed-off-by: Oleksandr Tyshchenko Reviewed-by: Stefano Stabellini [For arm64] Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/1654197833-25362-2-git-send-email-olekstysh@gmail.com Signed-off-by: Juergen Gross --- include/xen/arm/xen-ops.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 include/xen/arm/xen-ops.h (limited to 'include') diff --git a/include/xen/arm/xen-ops.h b/include/xen/arm/xen-ops.h new file mode 100644 index 000000000000..288deb1c3ac9 --- /dev/null +++ b/include/xen/arm/xen-ops.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ARM_XEN_OPS_H +#define _ASM_ARM_XEN_OPS_H + +#include + +static inline void xen_setup_dma_ops(struct device *dev) +{ +#ifdef CONFIG_XEN + if (xen_swiotlb_detect()) + dev->dma_ops = &xen_swiotlb_dma_ops; +#endif +} + +#endif /* _ASM_ARM_XEN_OPS_H */ -- cgit v1.2.3 From 02a9e681a3f7998074f39ec265080bf934871530 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 2 Jun 2022 22:23:47 +0300 Subject: xen/grants: support allocating consecutive grants For support of virtio via grant mappings in rare cases larger mappings using consecutive grants are needed. Support those by adding a bitmap of free grants. As consecutive grants will be needed only in very rare cases (e.g. when configuring a virtio device with a multi-page ring), optimize for the normal case of non-consecutive allocations. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/1654197833-25362-3-git-send-email-olekstysh@gmail.com Signed-off-by: Juergen Gross --- include/xen/grant_table.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 527c9907f99c..e279be353e3f 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -127,10 +127,14 @@ int gnttab_try_end_foreign_access(grant_ref_t ref); */ int gnttab_alloc_grant_references(u16 count, grant_ref_t *pprivate_head); +int gnttab_alloc_grant_reference_seq(unsigned int count, grant_ref_t *first); + void gnttab_free_grant_reference(grant_ref_t ref); void gnttab_free_grant_references(grant_ref_t head); +void gnttab_free_grant_reference_seq(grant_ref_t head, unsigned int count); + int gnttab_empty_grant_references(const grant_ref_t *pprivate_head); int gnttab_claim_grant_reference(grant_ref_t *pprivate_head); -- cgit v1.2.3 From d6aca3504c7ded5f4f46957e3685b9344d9743dd Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 2 Jun 2022 22:23:48 +0300 Subject: xen/grant-dma-ops: Add option to restrict memory access under Xen Introduce Xen grant DMA-mapping layer which contains special DMA-mapping routines for providing grant references as DMA addresses to be used by frontends (e.g. virtio) in Xen guests. Add the needed functionality by providing a special set of DMA ops handling the needed grant operations for the I/O pages. The subsequent commit will introduce the use case for xen-grant DMA ops layer to enable using virtio devices in Xen guests in a safe manner. Signed-off-by: Juergen Gross Signed-off-by: Oleksandr Tyshchenko Reviewed-by: Stefano Stabellini Link: https://lore.kernel.org/r/1654197833-25362-4-git-send-email-olekstysh@gmail.com Signed-off-by: Juergen Gross --- include/xen/xen-ops.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index c7c1b46ff4cd..afd586d717a4 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -214,4 +214,12 @@ static inline void xen_preemptible_hcall_end(void) { } #endif /* CONFIG_XEN_PV && !CONFIG_PREEMPTION */ +#ifdef CONFIG_XEN_GRANT_DMA_OPS +void xen_grant_setup_dma_ops(struct device *dev); +#else +static inline void xen_grant_setup_dma_ops(struct device *dev) +{ +} +#endif /* CONFIG_XEN_GRANT_DMA_OPS */ + #endif /* INCLUDE_XEN_OPS_H */ -- cgit v1.2.3 From fa1f57421e0b1c57843902c89728f823abc32f02 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 2 Jun 2022 22:23:49 +0300 Subject: xen/virtio: Enable restricted memory access using Xen grant mappings In order to support virtio in Xen guests add a config option XEN_VIRTIO enabling the user to specify whether in all Xen guests virtio should be able to access memory via Xen grant mappings only on the host side. Also set PLATFORM_VIRTIO_RESTRICTED_MEM_ACCESS feature from the guest initialization code on Arm and x86 if CONFIG_XEN_VIRTIO is enabled. Signed-off-by: Juergen Gross Signed-off-by: Oleksandr Tyshchenko Reviewed-by: Stefano Stabellini Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/1654197833-25362-5-git-send-email-olekstysh@gmail.com Signed-off-by: Juergen Gross --- include/xen/xen.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/xen/xen.h b/include/xen/xen.h index a99bab817523..0780a81e140d 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -52,6 +52,14 @@ bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, extern u64 xen_saved_max_mem_size; #endif +#include + +static inline void xen_set_restricted_virtio_memory_access(void) +{ + if (IS_ENABLED(CONFIG_XEN_VIRTIO) && xen_domain()) + platform_set(PLATFORM_VIRTIO_RESTRICTED_MEM_ACCESS); +} + #ifdef CONFIG_XEN_UNPOPULATED_ALLOC int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages); void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages); -- cgit v1.2.3 From 625ab90ecdf7770bda7ae21c4d5c938aa9b43bb4 Mon Sep 17 00:00:00 2001 From: Oleksandr Tyshchenko Date: Thu, 2 Jun 2022 22:23:52 +0300 Subject: xen/grant-dma-ops: Retrieve the ID of backend's domain for DT devices Use the presence of "iommus" property pointed to the IOMMU node with recently introduced "xen,grant-dma" compatible as a clear indicator of enabling Xen grant mappings scheme for that device and read the ID of Xen domain where the corresponding backend is running. The domid (domain ID) is used as an argument to the Xen grant mapping APIs. To avoid the deferred probe timeout which takes place after reusing generic IOMMU device tree bindings (because the IOMMU device never becomes available) enable recently introduced stub IOMMU driver by selecting XEN_GRANT_DMA_IOMMU. Also introduce xen_is_grant_dma_device() to check whether xen-grant DMA ops need to be set for a passed device. Remove the hardcoded domid 0 in xen_grant_setup_dma_ops(). Signed-off-by: Oleksandr Tyshchenko Reviewed-by: Stefano Stabellini Link: https://lore.kernel.org/r/1654197833-25362-8-git-send-email-olekstysh@gmail.com Signed-off-by: Juergen Gross --- include/xen/xen-ops.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index afd586d717a4..80546960f8b7 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -216,10 +216,15 @@ static inline void xen_preemptible_hcall_end(void) { } #ifdef CONFIG_XEN_GRANT_DMA_OPS void xen_grant_setup_dma_ops(struct device *dev); +bool xen_is_grant_dma_device(struct device *dev); #else static inline void xen_grant_setup_dma_ops(struct device *dev) { } +static inline bool xen_is_grant_dma_device(struct device *dev) +{ + return false; +} #endif /* CONFIG_XEN_GRANT_DMA_OPS */ #endif /* INCLUDE_XEN_OPS_H */ -- cgit v1.2.3 From fea981610c25173e6e5d63ccd4fce49739663ab0 Mon Sep 17 00:00:00 2001 From: Oleksandr Tyshchenko Date: Thu, 2 Jun 2022 22:23:53 +0300 Subject: arm/xen: Assign xen-grant DMA ops for xen-grant DMA devices By assigning xen-grant DMA ops we will restrict memory access for passed device using Xen grant mappings. This is needed for using any virtualized device (e.g. virtio) in Xen guests in a safe manner. Please note, for the virtio devices the XEN_VIRTIO config should be enabled (it forces ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS). Signed-off-by: Oleksandr Tyshchenko Reviewed-by: Stefano Stabellini Link: https://lore.kernel.org/r/1654197833-25362-9-git-send-email-olekstysh@gmail.com Signed-off-by: Juergen Gross --- include/xen/arm/xen-ops.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/xen/arm/xen-ops.h b/include/xen/arm/xen-ops.h index 288deb1c3ac9..b0766a660338 100644 --- a/include/xen/arm/xen-ops.h +++ b/include/xen/arm/xen-ops.h @@ -3,11 +3,14 @@ #define _ASM_ARM_XEN_OPS_H #include +#include static inline void xen_setup_dma_ops(struct device *dev) { #ifdef CONFIG_XEN - if (xen_swiotlb_detect()) + if (xen_is_grant_dma_device(dev)) + xen_grant_setup_dma_ops(dev); + else if (xen_swiotlb_detect()) dev->dma_ops = &xen_swiotlb_dma_ops; #endif } -- cgit v1.2.3 From 3a41c64d9c1185a2f3a184015e2a9b78bfc99c71 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 6 Jun 2022 17:31:29 +0200 Subject: netfilter: nf_tables: bail out early if hardware offload is not supported If user requests for NFT_CHAIN_HW_OFFLOAD, then check if either device provides the .ndo_setup_tc interface or there is an indirect flow block that has been registered. Otherwise, bail out early from the preparation phase. Moreover, validate that family == NFPROTO_NETDEV and hook is NF_NETDEV_INGRESS. Fixes: c9626a2cbdb2 ("netfilter: nf_tables: add hardware offload support") Signed-off-by: Pablo Neira Ayuso --- include/net/flow_offload.h | 1 + include/net/netfilter/nf_tables_offload.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 021778a7e1af..6484095a8c01 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -612,5 +612,6 @@ int flow_indr_dev_setup_offload(struct net_device *dev, struct Qdisc *sch, enum tc_setup_type type, void *data, struct flow_block_offload *bo, void (*cleanup)(struct flow_block_cb *block_cb)); +bool flow_indr_dev_exists(void); #endif /* _NET_FLOW_OFFLOAD_H */ diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h index 797147843958..3568b6a2f5f0 100644 --- a/include/net/netfilter/nf_tables_offload.h +++ b/include/net/netfilter/nf_tables_offload.h @@ -92,7 +92,7 @@ int nft_flow_rule_offload_commit(struct net *net); NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \ memset(&(__reg)->mask, 0xff, (__reg)->len); -int nft_chain_offload_priority(struct nft_base_chain *basechain); +bool nft_chain_offload_support(const struct nft_base_chain *basechain); int nft_offload_init(void); void nft_offload_exit(void); -- cgit v1.2.3 From 7b6c7a877cc616bc7dc9cd39646fe454acbed48b Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 3 Jun 2022 08:04:44 -0700 Subject: x86/ftrace: Remove OBJECT_FILES_NON_STANDARD usage The file-wide OBJECT_FILES_NON_STANDARD annotation is used with CONFIG_FRAME_POINTER to tell objtool to skip the entire file when frame pointers are enabled. However that annotation is now deprecated because it doesn't work with IBT, where objtool runs on vmlinux.o instead of individual translation units. Instead, use more fine-grained function-specific annotations: - The 'save_mcount_regs' macro does funny things with the frame pointer. Use STACK_FRAME_NON_STANDARD_FP to tell objtool to ignore the functions using it. - The return_to_handler() "function" isn't actually a callable function. Instead of being called, it's returned to. The real return address isn't on the stack, so unwinding is already doomed no matter which unwinder is used. So just remove the STT_FUNC annotation, telling objtool to ignore it. That also removes the implicit ANNOTATE_NOENDBR, which now needs to be made explicit. Fixes the following warning: vmlinux.o: warning: objtool: __fentry__+0x16: return with modified stack frame Fixes: ed53a0d97192 ("x86/alternative: Use .ibt_endbr_seal to seal indirect calls") Reported-by: kernel test robot Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/b7a7a42fe306aca37826043dac89e113a1acdbac.1654268610.git.jpoimboe@kernel.org --- include/linux/objtool.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 6491fa8fba6d..15b940ec1eac 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -143,6 +143,12 @@ struct unwind_hint { .popsection .endm +.macro STACK_FRAME_NON_STANDARD_FP func:req +#ifdef CONFIG_FRAME_POINTER + STACK_FRAME_NON_STANDARD \func +#endif +.endm + .macro ANNOTATE_NOENDBR .Lhere_\@: .pushsection .discard.noendbr -- cgit v1.2.3 From 77991645952c21962a095910c51fe0f73d35bf91 Mon Sep 17 00:00:00 2001 From: Roger Knecht Date: Sat, 21 May 2022 14:47:45 +0200 Subject: crc-itu-t: fix typo in CRC ITU-T polynomial comment The code comment says that the polynomial is x^16 + x^12 + x^15 + 1, but the correct polynomial is x^16 + x^12 + x^5 + 1. Quoting from page 2 in the ITU-T V.41 specification [1]: 2 Encoding and checking process The service bits and information bits, taken in conjunction, correspond to the coefficients of a message polynomial having terms from x^(n-1) (n = total number of bits in a block or sequence) down to x^16. This polynomial is divided, modulo 2, by the generating polynomial x^16 + x^12 + x^5 + 1. The hex (truncated) polynomial 0x1021 and CRC code implementation are correct, however. [1] https://www.itu.int/rec/T-REC-V.41-198811-I/en Signed-off-by: Roger Knecht Acked-by: Randy Dunlap Signed-off-by: Jason A. Donenfeld --- include/linux/crc-itu-t.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/crc-itu-t.h b/include/linux/crc-itu-t.h index a4367051e192..2f991a427ade 100644 --- a/include/linux/crc-itu-t.h +++ b/include/linux/crc-itu-t.h @@ -4,7 +4,7 @@ * * Implements the standard CRC ITU-T V.41: * Width 16 - * Poly 0x1021 (x^16 + x^12 + x^15 + 1) + * Poly 0x1021 (x^16 + x^12 + x^5 + 1) * Init 0 */ -- cgit v1.2.3 From c4f135d643823a869becfa87539f7820ef9d5bfa Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 1 Jun 2022 16:32:47 +0900 Subject: workqueue: Wrap flush_workqueue() using a macro Since flush operation synchronously waits for completion, flushing system-wide WQs (e.g. system_wq) might introduce possibility of deadlock due to unexpected locking dependency. Tejun Heo commented at [1] that it makes no sense at all to call flush_workqueue() on the shared WQs as the caller has no idea what it's gonna end up waiting for. Although there is flush_scheduled_work() which flushes system_wq WQ with "Think twice before calling this function! It's very easy to get into trouble if you don't take great care." warning message, syzbot found a circular locking dependency caused by flushing system_wq WQ [2]. Therefore, let's change the direction to that developers had better use their local WQs if flush_scheduled_work()/flush_workqueue(system_*_wq) is inevitable. Steps for converting system-wide WQs into local WQs are explained at [3], and a conversion to stop flushing system-wide WQs is in progress. Now we want some mechanism for preventing developers who are not aware of this conversion from again start flushing system-wide WQs. Since I found that WARN_ON() is complete but awkward approach for teaching developers about this problem, let's use __compiletime_warning() for incomplete but handy approach. For completeness, we will also insert WARN_ON() into __flush_workqueue() after all in-tree users stopped calling flush_scheduled_work(). Link: https://lore.kernel.org/all/YgnQGZWT%2Fn3VAITX@slm.duckdns.org/ [1] Link: https://syzkaller.appspot.com/bug?extid=bde0f89deacca7c765b8 [2] Link: https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp [3] Signed-off-by: Tetsuo Handa Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 64 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 56 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 7fee9b6cfede..e1f1c8b1121b 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -445,7 +445,7 @@ extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay); extern bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork); -extern void flush_workqueue(struct workqueue_struct *wq); +extern void __flush_workqueue(struct workqueue_struct *wq); extern void drain_workqueue(struct workqueue_struct *wq); extern int schedule_on_each_cpu(work_func_t func); @@ -563,15 +563,23 @@ static inline bool schedule_work(struct work_struct *work) return queue_work(system_wq, work); } +/* + * Detect attempt to flush system-wide workqueues at compile time when possible. + * + * See https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp + * for reasons and steps for converting system-wide workqueues into local workqueues. + */ +extern void __warn_flushing_systemwide_wq(void) + __compiletime_warning("Please avoid flushing system-wide workqueues."); + /** * flush_scheduled_work - ensure that any scheduled work has run to completion. * * Forces execution of the kernel-global workqueue and blocks until its * completion. * - * Think twice before calling this function! It's very easy to get into - * trouble if you don't take great care. Either of the following situations - * will lead to deadlock: + * It's very easy to get into trouble if you don't take great care. + * Either of the following situations will lead to deadlock: * * One of the work items currently on the workqueue needs to acquire * a lock held by your code or its caller. @@ -586,11 +594,51 @@ static inline bool schedule_work(struct work_struct *work) * need to know that a particular work item isn't queued and isn't running. * In such cases you should use cancel_delayed_work_sync() or * cancel_work_sync() instead. + * + * Please stop calling this function! A conversion to stop flushing system-wide + * workqueues is in progress. This function will be removed after all in-tree + * users stopped calling this function. */ -static inline void flush_scheduled_work(void) -{ - flush_workqueue(system_wq); -} +/* + * The background of commit 771c035372a036f8 ("deprecate the + * '__deprecated' attribute warnings entirely and for good") is that, + * since Linus builds all modules between every single pull he does, + * the standard kernel build needs to be _clean_ in order to be able to + * notice when new problems happen. Therefore, don't emit warning while + * there are in-tree users. + */ +#define flush_scheduled_work() \ +({ \ + if (0) \ + __warn_flushing_systemwide_wq(); \ + __flush_workqueue(system_wq); \ +}) + +/* + * Although there is no longer in-tree caller, for now just emit warning + * in order to give out-of-tree callers time to update. + */ +#define flush_workqueue(wq) \ +({ \ + struct workqueue_struct *_wq = (wq); \ + \ + if ((__builtin_constant_p(_wq == system_wq) && \ + _wq == system_wq) || \ + (__builtin_constant_p(_wq == system_highpri_wq) && \ + _wq == system_highpri_wq) || \ + (__builtin_constant_p(_wq == system_long_wq) && \ + _wq == system_long_wq) || \ + (__builtin_constant_p(_wq == system_unbound_wq) && \ + _wq == system_unbound_wq) || \ + (__builtin_constant_p(_wq == system_freezable_wq) && \ + _wq == system_freezable_wq) || \ + (__builtin_constant_p(_wq == system_power_efficient_wq) && \ + _wq == system_power_efficient_wq) || \ + (__builtin_constant_p(_wq == system_freezable_power_efficient_wq) && \ + _wq == system_freezable_power_efficient_wq)) \ + __warn_flushing_systemwide_wq(); \ + __flush_workqueue(_wq); \ +}) /** * schedule_delayed_work_on - queue work in global workqueue on CPU after delay -- cgit v1.2.3 From 873a400938b31a1e443c4d94b560b78300787540 Mon Sep 17 00:00:00 2001 From: Wonhyuk Yang Date: Wed, 4 May 2022 11:32:03 +0900 Subject: workqueue: Fix type of cpu in trace event The trace event "workqueue_queue_work" use unsigned int type for req_cpu, cpu. This casue confusing cpu number like below log. $ cat /sys/kernel/debug/tracing/trace cat-317 [001] ...: workqueue_queue_work: ... req_cpu=8192 cpu=4294967295 So, change unsigned type to signed type in the trace event. After applying this patch, cpu number will be printed as -1 instead of 4294967295 as folllows. $ cat /sys/kernel/debug/tracing/trace cat-1338 [002] ...: workqueue_queue_work: ... req_cpu=8192 cpu=-1 Cc: Baik Song An Cc: Hong Yeon Kim Cc: Taeung Song Cc: linuxgeek@linuxgeek.io Signed-off-by: Wonhyuk Yang Signed-off-by: Tejun Heo --- include/trace/events/workqueue.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h index 6154a2e72bce..262d52021c23 100644 --- a/include/trace/events/workqueue.h +++ b/include/trace/events/workqueue.h @@ -22,7 +22,7 @@ struct pool_workqueue; */ TRACE_EVENT(workqueue_queue_work, - TP_PROTO(unsigned int req_cpu, struct pool_workqueue *pwq, + TP_PROTO(int req_cpu, struct pool_workqueue *pwq, struct work_struct *work), TP_ARGS(req_cpu, pwq, work), @@ -31,8 +31,8 @@ TRACE_EVENT(workqueue_queue_work, __field( void *, work ) __field( void *, function) __string( workqueue, pwq->wq->name) - __field( unsigned int, req_cpu ) - __field( unsigned int, cpu ) + __field( int, req_cpu ) + __field( int, cpu ) ), TP_fast_assign( @@ -43,7 +43,7 @@ TRACE_EVENT(workqueue_queue_work, __entry->cpu = pwq->pool->cpu; ), - TP_printk("work struct=%p function=%ps workqueue=%s req_cpu=%u cpu=%u", + TP_printk("work struct=%p function=%ps workqueue=%s req_cpu=%d cpu=%d", __entry->work, __entry->function, __get_str(workqueue), __entry->req_cpu, __entry->cpu) ); -- cgit v1.2.3 From 62ed448cc53b654036f7d7f3c99f299d79ad14c3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 7 Jun 2022 16:47:58 -0400 Subject: SUNRPC: Optimize xdr_reserve_space() Transitioning between encode buffers is quite infrequent. It happens about 1 time in 400 calls to xdr_reserve_space(), measured on NFSD with a typical build/test workload. Force the compiler to remove that code from xdr_reserve_space(), which is a hot path on both the server and the client. This change reduces the size of xdr_reserve_space() from 10 cache lines to 2 when compiled with -Os. Signed-off-by: Chuck Lever Reviewed-by: J. Bruce Fields --- include/linux/sunrpc/xdr.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 4417f667c757..5860f32e3958 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -243,7 +243,7 @@ extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbytes); -extern void xdr_commit_encode(struct xdr_stream *xdr); +extern void __xdr_commit_encode(struct xdr_stream *xdr); extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, @@ -306,6 +306,20 @@ xdr_reset_scratch_buffer(struct xdr_stream *xdr) xdr_set_scratch_buffer(xdr, NULL, 0); } +/** + * xdr_commit_encode - Ensure all data is written to xdr->buf + * @xdr: pointer to xdr_stream + * + * Handle encoding across page boundaries by giving the caller a + * temporary location to write to, then later copying the data into + * place. __xdr_commit_encode() does that copying. + */ +static inline void xdr_commit_encode(struct xdr_stream *xdr) +{ + if (unlikely(xdr->scratch.iov_len)) + __xdr_commit_encode(xdr); +} + /** * xdr_stream_remaining - Return the number of bytes remaining in the stream * @xdr: pointer to struct xdr_stream -- cgit v1.2.3 From f93431c86b631bbca5614c66f966bf3ddb3c2803 Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Tue, 7 Jun 2022 20:00:27 +0800 Subject: ipv6: Fix signed integer overflow in __ip6_append_data Resurrect ubsan overflow checks and ubsan report this warning, fix it by change the variable [length] type to size_t. UBSAN: signed-integer-overflow in net/ipv6/ip6_output.c:1489:19 2147479552 + 8567 cannot be represented in type 'int' CPU: 0 PID: 253 Comm: err Not tainted 5.16.0+ #1 Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace+0x214/0x230 show_stack+0x30/0x78 dump_stack_lvl+0xf8/0x118 dump_stack+0x18/0x30 ubsan_epilogue+0x18/0x60 handle_overflow+0xd0/0xf0 __ubsan_handle_add_overflow+0x34/0x44 __ip6_append_data.isra.48+0x1598/0x1688 ip6_append_data+0x128/0x260 udpv6_sendmsg+0x680/0xdd0 inet6_sendmsg+0x54/0x90 sock_sendmsg+0x70/0x88 ____sys_sendmsg+0xe8/0x368 ___sys_sendmsg+0x98/0xe0 __sys_sendmmsg+0xf4/0x3b8 __arm64_sys_sendmmsg+0x34/0x48 invoke_syscall+0x64/0x160 el0_svc_common.constprop.4+0x124/0x300 do_el0_svc+0x44/0xc8 el0_svc+0x3c/0x1e8 el0t_64_sync_handler+0x88/0xb0 el0t_64_sync+0x16c/0x170 Changes since v1: -Change the variable [length] type to unsigned, as Eric Dumazet suggested. Changes since v2: -Don't change exthdrlen type in ip6_make_skb, as Paolo Abeni suggested. Changes since v3: -Don't change ulen type in udpv6_sendmsg and l2tp_ip6_sendmsg, as Jakub Kicinski suggested. Reported-by: Hulk Robot Signed-off-by: Wang Yufen Link: https://lore.kernel.org/r/20220607120028.845916-1-wangyufen@huawei.com Signed-off-by: Jakub Kicinski --- include/net/ipv6.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 5b38bf1a586b..de9dcc5652c4 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1063,7 +1063,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr); int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), - void *from, int length, int transhdrlen, + void *from, size_t length, int transhdrlen, struct ipcm6_cookie *ipc6, struct flowi6 *fl6, struct rt6_info *rt, unsigned int flags); @@ -1079,7 +1079,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff_head *queue, struct sk_buff *ip6_make_skb(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), - void *from, int length, int transhdrlen, + void *from, size_t length, int transhdrlen, struct ipcm6_cookie *ipc6, struct rt6_info *rt, unsigned int flags, struct inet_cork_full *cork); -- cgit v1.2.3 From d5a37b19983725d2045588cfa3a4699f5b39ae26 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 8 Jun 2022 08:34:07 +0200 Subject: block: remove bioset_init_from_src Unused now, and the interface never really made a whole lot of sense to start with. Signed-off-by: Christoph Hellwig Signed-off-by: Mike Snitzer --- include/linux/bio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 1cf3738ef1ea..992ee987f273 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -403,7 +403,6 @@ enum { extern int bioset_init(struct bio_set *, unsigned int, unsigned int, int flags); extern void bioset_exit(struct bio_set *); extern int biovec_init_pool(mempool_t *pool, int pool_entries); -extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src); struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, unsigned int opf, gfp_t gfp_mask, -- cgit v1.2.3 From 00d1f546470d89e072dd3cda12b5c794341e7268 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 9 Jun 2022 12:19:01 +0800 Subject: vdpa: make get_vq_group and set_group_asid optional This patch makes get_vq_group and set_group_asid optional. This is needed to unbreak the vDPA parent that doesn't support multiple address spaces. Cc: Gautam Dawar Fixes: aaca8373c4b1 ("vhost-vdpa: support ASID based IOTLB API") Signed-off-by: Jason Wang Message-Id: <20220609041901.2029-1-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 4700a88a28f6..7b4a13d3bd91 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -178,7 +178,8 @@ struct vdpa_map_file { * for the device * @vdev: vdpa device * Returns virtqueue algin requirement - * @get_vq_group: Get the group id for a specific virtqueue + * @get_vq_group: Get the group id for a specific + * virtqueue (optional) * @vdev: vdpa device * @idx: virtqueue index * Returns u32: group id for this virtqueue @@ -243,7 +244,7 @@ struct vdpa_map_file { * Returns the iova range supported by * the device. * @set_group_asid: Set address space identifier for a - * virtqueue group + * virtqueue group (optional) * @vdev: vdpa device * @group: virtqueue group * @asid: address space id for this group -- cgit v1.2.3 From 4527d47bb63a134c4483a1a478d0ff5874b466c7 Mon Sep 17 00:00:00 2001 From: "GONG, Ruiqi" Date: Tue, 7 Jun 2022 19:08:48 +0800 Subject: drm/atomic: fix warning of unused variable Fix the `unused-but-set-variable` warning as how other iteration wrappers do. Link: https://lore.kernel.org/all/202206071049.pofHsRih-lkp@intel.com/ Reported-by: kernel test robot Signed-off-by: GONG, Ruiqi Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20220607110848.941486-1-gongruiqi1@huawei.com --- include/drm/drm_atomic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h index 0777725085df..10b1990bc1f6 100644 --- a/include/drm/drm_atomic.h +++ b/include/drm/drm_atomic.h @@ -1022,6 +1022,7 @@ void drm_state_dump(struct drm_device *dev, struct drm_printer *p); for ((__i) = 0; \ (__i) < (__state)->num_private_objs && \ ((obj) = (__state)->private_objs[__i].ptr, \ + (void)(obj) /* Only to avoid unused-but-set-variable warning */, \ (new_obj_state) = (__state)->private_objs[__i].new_state, 1); \ (__i)++) -- cgit v1.2.3 From 69a37a8ba1b408a1c7616494aa7018e4b3844cbe Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 8 Jun 2022 15:18:34 -0400 Subject: mm/huge_memory: Fix xarray node memory leak If xas_split_alloc() fails to allocate the necessary nodes to complete the xarray entry split, it sets the xa_state to -ENOMEM, which xas_nomem() then interprets as "Please allocate more memory", not as "Please free any unnecessary memory" (which was the intended outcome). It's confusing to use xas_nomem() to free memory in this context, so call xas_destroy() instead. Reported-by: syzbot+9e27a75a8c24f3fe75c1@syzkaller.appspotmail.com Fixes: 6b24ca4a1a8d ("mm: Use multi-index entries in the page cache") Cc: stable@vger.kernel.org Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/xarray.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 72feab5ea8d4..c29e11b2c073 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1508,6 +1508,7 @@ void *xas_find_marked(struct xa_state *, unsigned long max, xa_mark_t); void xas_init_marks(const struct xa_state *); bool xas_nomem(struct xa_state *, gfp_t); +void xas_destroy(struct xa_state *); void xas_pause(struct xa_state *); void xas_create_range(struct xa_state *); -- cgit v1.2.3 From 334f6f53abcf57782bd2fe81da1cbd893e4ef05c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 9 Jun 2022 09:13:57 -0400 Subject: mm: Add kernel-doc for folio->mlock_count Fix "./include/linux/mm_types.h:279: warning: Function parameter or member 'mlock_count' not described in 'folio'". Also neaten the html by hiding the anon struct. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/mm_types.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index b34ff2cdbc4f..c29ab4c0cd5c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -227,6 +227,7 @@ struct page { * struct folio - Represents a contiguous set of bytes. * @flags: Identical to the page flags. * @lru: Least Recently Used list; tracks how recently this folio was used. + * @mlock_count: Number of times this folio has been pinned by mlock(). * @mapping: The file this page belongs to, or refers to the anon_vma for * anonymous memory. * @index: Offset within the file, in units of pages. For anonymous memory, @@ -255,10 +256,14 @@ struct folio { unsigned long flags; union { struct list_head lru; + /* private: avoid cluttering the output */ struct { void *__filler; + /* public: */ unsigned int mlock_count; + /* private: */ }; + /* public: */ }; struct address_space *mapping; pgoff_t index; -- cgit v1.2.3 From 874c8ca1e60b2c564a48f7e7acc40d328d5c8733 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 9 Jun 2022 21:46:04 +0100 Subject: netfs: Fix gcc-12 warning by embedding vfs inode in netfs_i_context While randstruct was satisfied with using an open-coded "void *" offset cast for the netfs_i_context <-> inode casting, __builtin_object_size() as used by FORTIFY_SOURCE was not as easily fooled. This was causing the following complaint[1] from gcc v12: In file included from include/linux/string.h:253, from include/linux/ceph/ceph_debug.h:7, from fs/ceph/inode.c:2: In function 'fortify_memset_chk', inlined from 'netfs_i_context_init' at include/linux/netfs.h:326:2, inlined from 'ceph_alloc_inode' at fs/ceph/inode.c:463:2: include/linux/fortify-string.h:242:25: warning: call to '__write_overflow_field' declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning] 242 | __write_overflow_field(p_size_field, size); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fix this by embedding a struct inode into struct netfs_i_context (which should perhaps be renamed to struct netfs_inode). The struct inode vfs_inode fields are then removed from the 9p, afs, ceph and cifs inode structs and vfs_inode is then simply changed to "netfs.inode" in those filesystems. Further, rename netfs_i_context to netfs_inode, get rid of the netfs_inode() function that converted a netfs_i_context pointer to an inode pointer (that can now be done with &ctx->inode) and rename the netfs_i_context() function to netfs_inode() (which is now a wrapper around container_of()). Most of the changes were done with: perl -p -i -e 's/vfs_inode/netfs.inode/'g \ `git grep -l 'vfs_inode' -- fs/{9p,afs,ceph,cifs}/*.[ch]` Kees suggested doing it with a pair structure[2] and a special declarator to insert that into the network filesystem's inode wrapper[3], but I think it's cleaner to embed it - and then it doesn't matter if struct randomisation reorders things. Dave Chinner suggested using a filesystem-specific VFS_I() function in each filesystem to convert that filesystem's own inode wrapper struct into the VFS inode struct[4]. Version #2: - Fix a couple of missed name changes due to a disabled cifs option. - Rename nfs_i_context to nfs_inode - Use "netfs" instead of "nic" as the member name in per-fs inode wrapper structs. [ This also undoes commit 507160f46c55 ("netfs: gcc-12: temporarily disable '-Wattribute-warning' for now") that is no longer needed ] Fixes: bc899ee1c898 ("netfs: Add a netfs inode context") Reported-by: Jeff Layton Signed-off-by: David Howells Reviewed-by: Jeff Layton Reviewed-by: Kees Cook Reviewed-by: Xiubo Li cc: Jonathan Corbet cc: Eric Van Hensbergen cc: Latchesar Ionkov cc: Dominique Martinet cc: Christian Schoenebeck cc: Marc Dionne cc: Ilya Dryomov cc: Steve French cc: William Kucharski cc: "Matthew Wilcox (Oracle)" cc: Dave Chinner cc: linux-doc@vger.kernel.org cc: v9fs-developer@lists.sourceforge.net cc: linux-afs@lists.infradead.org cc: ceph-devel@vger.kernel.org cc: linux-cifs@vger.kernel.org cc: samba-technical@lists.samba.org cc: linux-fsdevel@vger.kernel.org cc: linux-hardening@vger.kernel.org Link: https://lore.kernel.org/r/d2ad3a3d7bdd794c6efb562d2f2b655fb67756b9.camel@kernel.org/ [1] Link: https://lore.kernel.org/r/20220517210230.864239-1-keescook@chromium.org/ [2] Link: https://lore.kernel.org/r/20220518202212.2322058-1-keescook@chromium.org/ [3] Link: https://lore.kernel.org/r/20220524101205.GI2306852@dread.disaster.area/ [4] Link: https://lore.kernel.org/r/165296786831.3591209.12111293034669289733.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/165305805651.4094995.7763502506786714216.stgit@warthog.procyon.org.uk # v2 Signed-off-by: Linus Torvalds --- include/linux/netfs.h | 41 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 77fa6a61706a..6dbb4c9ce50d 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -119,9 +119,10 @@ typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error, bool was_async); /* - * Per-inode description. This must be directly after the inode struct. + * Per-inode context. This wraps the VFS inode. */ -struct netfs_i_context { +struct netfs_inode { + struct inode inode; /* The VFS inode */ const struct netfs_request_ops *ops; #if IS_ENABLED(CONFIG_FSCACHE) struct fscache_cookie *cache; @@ -256,7 +257,7 @@ struct netfs_cache_ops { * boundary as appropriate. */ enum netfs_io_source (*prepare_read)(struct netfs_io_subrequest *subreq, - loff_t i_size); + loff_t i_size); /* Prepare a write operation, working out what part of the write we can * actually do. @@ -288,45 +289,35 @@ extern void netfs_put_subrequest(struct netfs_io_subrequest *subreq, extern void netfs_stats_show(struct seq_file *); /** - * netfs_i_context - Get the netfs inode context from the inode + * netfs_inode - Get the netfs inode context from the inode * @inode: The inode to query * * Get the netfs lib inode context from the network filesystem's inode. The * context struct is expected to directly follow on from the VFS inode struct. */ -static inline struct netfs_i_context *netfs_i_context(struct inode *inode) +static inline struct netfs_inode *netfs_inode(struct inode *inode) { - return (void *)inode + sizeof(*inode); + return container_of(inode, struct netfs_inode, inode); } /** - * netfs_inode - Get the netfs inode from the inode context - * @ctx: The context to query - * - * Get the netfs inode from the netfs library's inode context. The VFS inode - * is expected to directly precede the context struct. - */ -static inline struct inode *netfs_inode(struct netfs_i_context *ctx) -{ - return (void *)ctx - sizeof(struct inode); -} - -/** - * netfs_i_context_init - Initialise a netfs lib context + * netfs_inode_init - Initialise a netfslib inode context * @inode: The inode with which the context is associated * @ops: The netfs's operations list * * Initialise the netfs library context struct. This is expected to follow on * directly from the VFS inode struct. */ -static inline void netfs_i_context_init(struct inode *inode, - const struct netfs_request_ops *ops) +static inline void netfs_inode_init(struct inode *inode, + const struct netfs_request_ops *ops) { - struct netfs_i_context *ctx = netfs_i_context(inode); + struct netfs_inode *ctx = netfs_inode(inode); - memset(ctx, 0, sizeof(*ctx)); ctx->ops = ops; ctx->remote_i_size = i_size_read(inode); +#if IS_ENABLED(CONFIG_FSCACHE) + ctx->cache = NULL; +#endif } /** @@ -338,7 +329,7 @@ static inline void netfs_i_context_init(struct inode *inode, */ static inline void netfs_resize_file(struct inode *inode, loff_t new_i_size) { - struct netfs_i_context *ctx = netfs_i_context(inode); + struct netfs_inode *ctx = netfs_inode(inode); ctx->remote_i_size = new_i_size; } @@ -352,7 +343,7 @@ static inline void netfs_resize_file(struct inode *inode, loff_t new_i_size) static inline struct fscache_cookie *netfs_i_cookie(struct inode *inode) { #if IS_ENABLED(CONFIG_FSCACHE) - struct netfs_i_context *ctx = netfs_i_context(inode); + struct netfs_inode *ctx = netfs_inode(inode); return ctx->cache; #else return NULL; -- cgit v1.2.3 From b489a6e5871690735752f8875f411e4d0cd8e5df Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 8 Jun 2022 18:34:25 +0300 Subject: tls: Rename TLS_INFO_ZC_SENDFILE to TLS_INFO_ZC_TX To embrace possible future optimizations of TLS, rename zerocopy sendfile definitions to more generic ones: * setsockopt: TLS_TX_ZEROCOPY_SENDFILE- > TLS_TX_ZEROCOPY_RO * sock_diag: TLS_INFO_ZC_SENDFILE -> TLS_INFO_ZC_RO_TX RO stands for readonly and emphasizes that the application shouldn't modify the data being transmitted with zerocopy to avoid potential disconnection. Fixes: c1318b39c7d3 ("tls: Add opt-in zerocopy mode of sendfile()") Signed-off-by: Maxim Mikityanskiy Link: https://lore.kernel.org/r/20220608153425.3151146-1-maximmi@nvidia.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/tls.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h index ac39328eabe7..bb8f80812b0b 100644 --- a/include/uapi/linux/tls.h +++ b/include/uapi/linux/tls.h @@ -39,7 +39,7 @@ /* TLS socket options */ #define TLS_TX 1 /* Set transmit parameters */ #define TLS_RX 2 /* Set receive parameters */ -#define TLS_TX_ZEROCOPY_SENDFILE 3 /* transmit zerocopy sendfile */ +#define TLS_TX_ZEROCOPY_RO 3 /* TX zerocopy (only sendfile now) */ /* Supported versions */ #define TLS_VERSION_MINOR(ver) ((ver) & 0xFF) @@ -161,7 +161,7 @@ enum { TLS_INFO_CIPHER, TLS_INFO_TXCONF, TLS_INFO_RXCONF, - TLS_INFO_ZC_SENDFILE, + TLS_INFO_ZC_RO_TX, __TLS_INFO_MAX, }; #define TLS_INFO_MAX (__TLS_INFO_MAX - 1) -- cgit v1.2.3 From 39e0f991a62ed5efabd20711a7b6e7da92603170 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 7 Jun 2022 17:00:16 +0200 Subject: random: mark bootloader randomness code as __init add_bootloader_randomness() and the variables it touches are only used during __init and not after, so mark these as __init. At the same time, unexport this, since it's only called by other __init code that's built-in. Cc: stable@vger.kernel.org Fixes: 428826f5358c ("fdt: add support for rng-seed") Signed-off-by: Jason A. Donenfeld --- include/linux/random.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/random.h b/include/linux/random.h index fae0c84027fd..223b4bd584e7 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -13,7 +13,7 @@ struct notifier_block; void add_device_randomness(const void *buf, size_t len); -void add_bootloader_randomness(const void *buf, size_t len); +void __init add_bootloader_randomness(const void *buf, size_t len); void add_input_randomness(unsigned int type, unsigned int code, unsigned int value) __latent_entropy; void add_interrupt_randomness(int irq) __latent_entropy; -- cgit v1.2.3 From e052a478a7daeca67664f7addd308ff51dd40654 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 8 Jun 2022 10:31:25 +0200 Subject: random: remove rng_has_arch_random() With arch randomness being used by every distro and enabled in defconfigs, the distinction between rng_has_arch_random() and rng_is_initialized() is now rather small. In fact, the places where they differ are now places where paranoid users and system builders really don't want arch randomness to be used, in which case we should respect that choice, or places where arch randomness is known to be broken, in which case that choice is all the more important. So this commit just removes the function and its one user. Reviewed-by: Petr Mladek # for vsprintf.c Signed-off-by: Jason A. Donenfeld --- include/linux/random.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/random.h b/include/linux/random.h index 223b4bd584e7..20e389a14e5c 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -74,7 +74,6 @@ static inline unsigned long get_random_canary(void) int __init random_init(const char *command_line); bool rng_is_initialized(void); -bool rng_has_arch_random(void); int wait_for_random_bytes(void); /* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes). -- cgit v1.2.3 From cfab87c2c2715763dc7e43d9968bdaa01cde4bc3 Mon Sep 17 00:00:00 2001 From: Vijaya Krishna Nivarthi Date: Wed, 8 Jun 2022 00:22:44 +0530 Subject: serial: core: Introduce callback for start_rx and do stop_rx in suspend only if this callback implementation is present. In suspend sequence there is a need to perform stop_rx during suspend sequence to prevent any asynchronous data over rx line. However this can cause problem to drivers which dont do re-start_rx during set_termios. Add new callback start_rx and perform stop_rx only when implementation of start_rx is present. Also add call to start_rx in resume sequence so that drivers who come across this problem can make use of this framework. Fixes: c9d2325cdb92 ("serial: core: Do stop_rx in suspend path for console if console_suspend is disabled") Reviewed-by: Douglas Anderson Signed-off-by: Vijaya Krishna Nivarthi Link: https://lore.kernel.org/r/1654627965-1461-2-git-send-email-quic_vnivarth@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index cbd5070bc87f..657a0fc68a3f 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -45,6 +45,7 @@ struct uart_ops { void (*unthrottle)(struct uart_port *); void (*send_xchar)(struct uart_port *, char ch); void (*stop_rx)(struct uart_port *); + void (*start_rx)(struct uart_port *); void (*enable_ms)(struct uart_port *); void (*break_ctl)(struct uart_port *, int ctl); int (*startup)(struct uart_port *); -- cgit v1.2.3 From cd756dafd86ee3a4969906086f3c2537e0c6d9d0 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Mon, 6 Jun 2022 14:22:00 +0100 Subject: staging: Also remove the Unisys visorbus.h The commit that removed the Unisys s-Par and visorbus drivers left around the include/linux/visorbus.h file mentioned in the MAINTAINERS entry, we can also remove that too. Fixes: e5f45b011e4a ("staging: Remove the drivers for the Unisys s-Par") Reviewed-by: Fabio M. De Francesco Signed-off-by: Peter Robinson Link: https://lore.kernel.org/r/20220606132200.2873243-1-pbrobinson@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/visorbus.h | 344 ----------------------------------------------- 1 file changed, 344 deletions(-) delete mode 100644 include/linux/visorbus.h (limited to 'include') diff --git a/include/linux/visorbus.h b/include/linux/visorbus.h deleted file mode 100644 index 0d8bd6769b13..000000000000 --- a/include/linux/visorbus.h +++ /dev/null @@ -1,344 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Copyright (C) 2010 - 2013 UNISYS CORPORATION - * All rights reserved. - */ - -/* - * This header file is to be included by other kernel mode components that - * implement a particular kind of visor_device. Each of these other kernel - * mode components is called a visor device driver. Refer to visortemplate - * for a minimal sample visor device driver. - * - * There should be nothing in this file that is private to the visorbus - * bus implementation itself. - */ - -#ifndef __VISORBUS_H__ -#define __VISORBUS_H__ - -#include - -#define VISOR_CHANNEL_SIGNATURE ('L' << 24 | 'N' << 16 | 'C' << 8 | 'E') - -/* - * enum channel_serverstate - * @CHANNELSRV_UNINITIALIZED: Channel is in an undefined state. - * @CHANNELSRV_READY: Channel has been initialized by server. - */ -enum channel_serverstate { - CHANNELSRV_UNINITIALIZED = 0, - CHANNELSRV_READY = 1 -}; - -/* - * enum channel_clientstate - * @CHANNELCLI_DETACHED: - * @CHANNELCLI_DISABLED: Client can see channel but is NOT allowed to use it - * unless given TBD* explicit request - * (should actually be < DETACHED). - * @CHANNELCLI_ATTACHING: Legacy EFI client request for EFI server to attach. - * @CHANNELCLI_ATTACHED: Idle, but client may want to use channel any time. - * @CHANNELCLI_BUSY: Client either wants to use or is using channel. - * @CHANNELCLI_OWNED: "No worries" state - client can access channel - * anytime. - */ -enum channel_clientstate { - CHANNELCLI_DETACHED = 0, - CHANNELCLI_DISABLED = 1, - CHANNELCLI_ATTACHING = 2, - CHANNELCLI_ATTACHED = 3, - CHANNELCLI_BUSY = 4, - CHANNELCLI_OWNED = 5 -}; - -/* - * Values for VISOR_CHANNEL_PROTOCOL.Features: This define exists so that - * a guest can look at the FeatureFlags in the io channel, and configure the - * driver to use interrupts or not based on this setting. All feature bits for - * all channels should be defined here. The io channel feature bits are defined - * below. - */ -#define VISOR_DRIVER_ENABLES_INTS (0x1ULL << 1) -#define VISOR_CHANNEL_IS_POLLING (0x1ULL << 3) -#define VISOR_IOVM_OK_DRIVER_DISABLING_INTS (0x1ULL << 4) -#define VISOR_DRIVER_DISABLES_INTS (0x1ULL << 5) -#define VISOR_DRIVER_ENHANCED_RCVBUF_CHECKING (0x1ULL << 6) - -/* - * struct channel_header - Common Channel Header - * @signature: Signature. - * @legacy_state: DEPRECATED - being replaced by. - * @header_size: sizeof(struct channel_header). - * @size: Total size of this channel in bytes. - * @features: Flags to modify behavior. - * @chtype: Channel type: data, bus, control, etc.. - * @partition_handle: ID of guest partition. - * @handle: Device number of this channel in client. - * @ch_space_offset: Offset in bytes to channel specific area. - * @version_id: Struct channel_header Version ID. - * @partition_index: Index of guest partition. - * @zone_uuid: Guid of Channel's zone. - * @cli_str_offset: Offset from channel header to null-terminated - * ClientString (0 if ClientString not present). - * @cli_state_boot: CHANNEL_CLIENTSTATE of pre-boot EFI client of this - * channel. - * @cmd_state_cli: CHANNEL_COMMANDSTATE (overloaded in Windows drivers, see - * ServerStateUp, ServerStateDown, etc). - * @cli_state_os: CHANNEL_CLIENTSTATE of Guest OS client of this channel. - * @ch_characteristic: CHANNEL_CHARACTERISTIC_. - * @cmd_state_srv: CHANNEL_COMMANDSTATE (overloaded in Windows drivers, see - * ServerStateUp, ServerStateDown, etc). - * @srv_state: CHANNEL_SERVERSTATE. - * @cli_error_boot: Bits to indicate err states for boot clients, so err - * messages can be throttled. - * @cli_error_os: Bits to indicate err states for OS clients, so err - * messages can be throttled. - * @filler: Pad out to 128 byte cacheline. - * @recover_channel: Please add all new single-byte values below here. - */ -struct channel_header { - u64 signature; - u32 legacy_state; - /* SrvState, CliStateBoot, and CliStateOS below */ - u32 header_size; - u64 size; - u64 features; - guid_t chtype; - u64 partition_handle; - u64 handle; - u64 ch_space_offset; - u32 version_id; - u32 partition_index; - guid_t zone_guid; - u32 cli_str_offset; - u32 cli_state_boot; - u32 cmd_state_cli; - u32 cli_state_os; - u32 ch_characteristic; - u32 cmd_state_srv; - u32 srv_state; - u8 cli_error_boot; - u8 cli_error_os; - u8 filler[1]; - u8 recover_channel; -} __packed; - -#define VISOR_CHANNEL_ENABLE_INTS (0x1ULL << 0) - -/* - * struct signal_queue_header - Subheader for the Signal Type variation of the - * Common Channel. - * @version: SIGNAL_QUEUE_HEADER Version ID. - * @chtype: Queue type: storage, network. - * @size: Total size of this queue in bytes. - * @sig_base_offset: Offset to signal queue area. - * @features: Flags to modify behavior. - * @num_sent: Total # of signals placed in this queue. - * @num_overflows: Total # of inserts failed due to full queue. - * @signal_size: Total size of a signal for this queue. - * @max_slots: Max # of slots in queue, 1 slot is always empty. - * @max_signals: Max # of signals in queue (MaxSignalSlots-1). - * @head: Queue head signal #. - * @num_received: Total # of signals removed from this queue. - * @tail: Queue tail signal. - * @reserved1: Reserved field. - * @reserved2: Reserved field. - * @client_queue: - * @num_irq_received: Total # of Interrupts received. This is incremented by the - * ISR in the guest windows driver. - * @num_empty: Number of times that visor_signal_remove is called and - * returned Empty Status. - * @errorflags: Error bits set during SignalReinit to denote trouble with - * client's fields. - * @filler: Pad out to 64 byte cacheline. - */ -struct signal_queue_header { - /* 1st cache line */ - u32 version; - u32 chtype; - u64 size; - u64 sig_base_offset; - u64 features; - u64 num_sent; - u64 num_overflows; - u32 signal_size; - u32 max_slots; - u32 max_signals; - u32 head; - /* 2nd cache line */ - u64 num_received; - u32 tail; - u32 reserved1; - u64 reserved2; - u64 client_queue; - u64 num_irq_received; - u64 num_empty; - u32 errorflags; - u8 filler[12]; -} __packed; - -/* VISORCHANNEL Guids */ -/* {414815ed-c58c-11da-95a9-00e08161165f} */ -#define VISOR_VHBA_CHANNEL_GUID \ - GUID_INIT(0x414815ed, 0xc58c, 0x11da, \ - 0x95, 0xa9, 0x0, 0xe0, 0x81, 0x61, 0x16, 0x5f) -#define VISOR_VHBA_CHANNEL_GUID_STR \ - "414815ed-c58c-11da-95a9-00e08161165f" -struct visorchipset_state { - u32 created:1; - u32 attached:1; - u32 configured:1; - u32 running:1; - /* Remaining bits in this 32-bit word are reserved. */ -}; - -/** - * struct visor_device - A device type for things "plugged" into the visorbus - * bus - * @visorchannel: Points to the channel that the device is - * associated with. - * @channel_type_guid: Identifies the channel type to the bus driver. - * @device: Device struct meant for use by the bus driver - * only. - * @list_all: Used by the bus driver to enumerate devices. - * @timer: Timer fired periodically to do interrupt-type - * activity. - * @being_removed: Indicates that the device is being removed from - * the bus. Private bus driver use only. - * @visordriver_callback_lock: Used by the bus driver to lock when adding and - * removing devices. - * @pausing: Indicates that a change towards a paused state. - * is in progress. Only modified by the bus driver. - * @resuming: Indicates that a change towards a running state - * is in progress. Only modified by the bus driver. - * @chipset_bus_no: Private field used by the bus driver. - * @chipset_dev_no: Private field used the bus driver. - * @state: Used to indicate the current state of the - * device. - * @inst: Unique GUID for this instance of the device. - * @name: Name of the device. - * @pending_msg_hdr: For private use by bus driver to respond to - * hypervisor requests. - * @vbus_hdr_info: A pointer to header info. Private use by bus - * driver. - * @partition_guid: Indicates client partion id. This should be the - * same across all visor_devices in the current - * guest. Private use by bus driver only. - */ -struct visor_device { - struct visorchannel *visorchannel; - guid_t channel_type_guid; - /* These fields are for private use by the bus driver only. */ - struct device device; - struct list_head list_all; - struct timer_list timer; - bool timer_active; - bool being_removed; - struct mutex visordriver_callback_lock; /* synchronize probe/remove */ - bool pausing; - bool resuming; - u32 chipset_bus_no; - u32 chipset_dev_no; - struct visorchipset_state state; - guid_t inst; - u8 *name; - struct controlvm_message_header *pending_msg_hdr; - void *vbus_hdr_info; - guid_t partition_guid; - struct dentry *debugfs_dir; - struct dentry *debugfs_bus_info; -}; - -#define to_visor_device(x) container_of(x, struct visor_device, device) - -typedef void (*visorbus_state_complete_func) (struct visor_device *dev, - int status); - -/* - * This struct describes a specific visor channel, by providing its GUID, name, - * and sizes. - */ -struct visor_channeltype_descriptor { - const guid_t guid; - const char *name; - u64 min_bytes; - u32 version; -}; - -/** - * struct visor_driver - Information provided by each visor driver when it - * registers with the visorbus driver - * @name: Name of the visor driver. - * @owner: The module owner. - * @channel_types: Types of channels handled by this driver, ending with - * a zero GUID. Our specialized BUS.match() method knows - * about this list, and uses it to determine whether this - * driver will in fact handle a new device that it has - * detected. - * @probe: Called when a new device comes online, by our probe() - * function specified by driver.probe() (triggered - * ultimately by some call to driver_register(), - * bus_add_driver(), or driver_attach()). - * @remove: Called when a new device is removed, by our remove() - * function specified by driver.remove() (triggered - * ultimately by some call to device_release_driver()). - * @channel_interrupt: Called periodically, whenever there is a possiblity - * that "something interesting" may have happened to the - * channel. - * @pause: Called to initiate a change of the device's state. If - * the return valu`e is < 0, there was an error and the - * state transition will NOT occur. If the return value - * is >= 0, then the state transition was INITIATED - * successfully, and complete_func() will be called (or - * was just called) with the final status when either the - * state transition fails or completes successfully. - * @resume: Behaves similar to pause. - * @driver: Private reference to the device driver. For use by bus - * driver only. - */ -struct visor_driver { - const char *name; - struct module *owner; - struct visor_channeltype_descriptor *channel_types; - int (*probe)(struct visor_device *dev); - void (*remove)(struct visor_device *dev); - void (*channel_interrupt)(struct visor_device *dev); - int (*pause)(struct visor_device *dev, - visorbus_state_complete_func complete_func); - int (*resume)(struct visor_device *dev, - visorbus_state_complete_func complete_func); - - /* These fields are for private use by the bus driver only. */ - struct device_driver driver; -}; - -#define to_visor_driver(x) (container_of(x, struct visor_driver, driver)) - -int visor_check_channel(struct channel_header *ch, struct device *dev, - const guid_t *expected_uuid, char *chname, - u64 expected_min_bytes, u32 expected_version, - u64 expected_signature); - -int visorbus_register_visor_driver(struct visor_driver *drv); -void visorbus_unregister_visor_driver(struct visor_driver *drv); -int visorbus_read_channel(struct visor_device *dev, - unsigned long offset, void *dest, - unsigned long nbytes); -int visorbus_write_channel(struct visor_device *dev, - unsigned long offset, void *src, - unsigned long nbytes); -int visorbus_enable_channel_interrupts(struct visor_device *dev); -void visorbus_disable_channel_interrupts(struct visor_device *dev); - -int visorchannel_signalremove(struct visorchannel *channel, u32 queue, - void *msg); -int visorchannel_signalinsert(struct visorchannel *channel, u32 queue, - void *msg); -bool visorchannel_signalempty(struct visorchannel *channel, u32 queue); -const guid_t *visorchannel_get_guid(struct visorchannel *channel); - -#define BUS_ROOT_DEVICE UINT_MAX -struct visor_device *visorbus_get_device_by_id(u32 bus_no, u32 dev_no, - struct visor_device *from); -#endif -- cgit v1.2.3 From e81fb4198e27925b151aad1450e0fd607d6733f8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 9 Jun 2022 15:04:01 -0700 Subject: netfs: Further cleanups after struct netfs_inode wrapper introduced Change the signature of netfs helper functions to take a struct netfs_inode pointer rather than a struct inode pointer where appropriate, thereby relieving the need for the network filesystem to convert its internal inode format down to the VFS inode only for netfslib to bounce it back up. For type safety, it's better not to do that (and it's less typing too). Give netfs_write_begin() an extra argument to pass in a pointer to the netfs_inode struct rather than deriving it internally from the file pointer. Note that the ->write_begin() and ->write_end() ops are intended to be replaced in the future by netfslib code that manages this without the need to call in twice for each page. netfs_readpage() and similar are intended to be pointed at directly by the address_space_operations table, so must stick to the signature dictated by the function pointers there. Changes ======= - Updated the kerneldoc comments and documentation [DH]. Signed-off-by: David Howells cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/CAHk-=wgkwKyNmNdKpQkqZ6DnmUL-x9hp0YBnUGjaPFEAdxDTbw@mail.gmail.com/ --- include/linux/netfs.h | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 6dbb4c9ce50d..a62739f3726b 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -277,7 +277,8 @@ struct netfs_cache_ops { struct readahead_control; extern void netfs_readahead(struct readahead_control *); int netfs_read_folio(struct file *, struct folio *); -extern int netfs_write_begin(struct file *, struct address_space *, +extern int netfs_write_begin(struct netfs_inode *, + struct file *, struct address_space *, loff_t, unsigned int, struct folio **, void **); @@ -302,19 +303,17 @@ static inline struct netfs_inode *netfs_inode(struct inode *inode) /** * netfs_inode_init - Initialise a netfslib inode context - * @inode: The inode with which the context is associated + * @inode: The netfs inode to initialise * @ops: The netfs's operations list * * Initialise the netfs library context struct. This is expected to follow on * directly from the VFS inode struct. */ -static inline void netfs_inode_init(struct inode *inode, +static inline void netfs_inode_init(struct netfs_inode *ctx, const struct netfs_request_ops *ops) { - struct netfs_inode *ctx = netfs_inode(inode); - ctx->ops = ops; - ctx->remote_i_size = i_size_read(inode); + ctx->remote_i_size = i_size_read(&ctx->inode); #if IS_ENABLED(CONFIG_FSCACHE) ctx->cache = NULL; #endif @@ -322,28 +321,25 @@ static inline void netfs_inode_init(struct inode *inode, /** * netfs_resize_file - Note that a file got resized - * @inode: The inode being resized + * @ctx: The netfs inode being resized * @new_i_size: The new file size * * Inform the netfs lib that a file got resized so that it can adjust its state. */ -static inline void netfs_resize_file(struct inode *inode, loff_t new_i_size) +static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size) { - struct netfs_inode *ctx = netfs_inode(inode); - ctx->remote_i_size = new_i_size; } /** * netfs_i_cookie - Get the cache cookie from the inode - * @inode: The inode to query + * @ctx: The netfs inode to query * * Get the caching cookie (if enabled) from the network filesystem's inode. */ -static inline struct fscache_cookie *netfs_i_cookie(struct inode *inode) +static inline struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx) { #if IS_ENABLED(CONFIG_FSCACHE) - struct netfs_inode *ctx = netfs_inode(inode); return ctx->cache; #else return NULL; -- cgit v1.2.3 From 40a81101202300df7db273f77dda9fbe6271b1d2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 25 Feb 2022 11:19:14 +0000 Subject: netfs: Rename the netfs_io_request cleanup op and give it an op pointer The netfs_io_request cleanup op is now always in a position to be given a pointer to a netfs_io_request struct, so this can be passed in instead of the mapping and private data arguments (both of which are included in the struct). So rename the ->cleanup op to ->free_request (to match ->init_request) and pass in the I/O pointer. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com --- include/linux/netfs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index a62739f3726b..097cdd644665 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -206,7 +206,9 @@ struct netfs_io_request { */ struct netfs_request_ops { int (*init_request)(struct netfs_io_request *rreq, struct file *file); + void (*free_request)(struct netfs_io_request *rreq); int (*begin_cache_operation)(struct netfs_io_request *rreq); + void (*expand_readahead)(struct netfs_io_request *rreq); bool (*clamp_length)(struct netfs_io_subrequest *subreq); void (*issue_read)(struct netfs_io_subrequest *subreq); @@ -214,7 +216,6 @@ struct netfs_request_ops { int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, struct folio *folio, void **_fsdata); void (*done)(struct netfs_io_request *rreq); - void (*cleanup)(struct address_space *mapping, void *netfs_priv); }; /* -- cgit v1.2.3 From 8bee9dd953b69c634d1c9a3241a8b357469ad4aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Fri, 10 Jun 2022 01:41:10 +0200 Subject: workqueue: Switch to new kerneldoc syntax for named variable macro argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The syntax without dots is available since commit 43756e347f21 ("scripts/kernel-doc: Add support for named variable macro arguments"). The same HTML output is produced with and without this patch. Signed-off-by: Jonathan Neuschäfer Acked-by: Tejun Heo Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index e1f1c8b1121b..62e75dd40d9a 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -406,7 +406,7 @@ alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...); * alloc_ordered_workqueue - allocate an ordered workqueue * @fmt: printf format for the name of the workqueue * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful) - * @args...: args for @fmt + * @args: args for @fmt * * Allocate an ordered workqueue. An ordered workqueue executes at * most one work item at any given time in the queued order. They are -- cgit v1.2.3 From 993d0b287e2ef7bee2e8b13b0ce4d2b5066f278e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 12 Jun 2022 22:32:25 +0100 Subject: usercopy: Handle vm_map_ram() areas vmalloc does not allocate a vm_struct for vm_map_ram() areas. That causes us to deny usercopies from those areas. This affects XFS which uses vm_map_ram() for its directories. Fix this by calling find_vmap_area() instead of find_vm_area(). Fixes: 0aef499f3172 ("mm/usercopy: Detect vmalloc overruns") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Uladzislau Rezki (Sony) Tested-by: Zorro Lang Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20220612213227.3881769-2-willy@infradead.org --- include/linux/vmalloc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index b159c2789961..096d48aa3437 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -215,6 +215,7 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size, void free_vm_area(struct vm_struct *area); extern struct vm_struct *remove_vm_area(const void *addr); extern struct vm_struct *find_vm_area(const void *addr); +struct vmap_area *find_vmap_area(unsigned long addr); static inline bool is_vm_area_hugepages(const void *addr) { -- cgit v1.2.3 From 0f9cd1ea10d307cad221d6693b648a8956e812b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 13 Jun 2022 09:37:03 +0200 Subject: drm/ttm: fix bulk move handling v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The resource must be on the LRU before ttm_lru_bulk_move_add() is called and we need to check if the BO is pinned or not before adding it. Additional to that we missed taking the LRU spinlock in ttm_bo_unpin(). Signed-off-by: Christian König Reviewed-by: Arunpravin Paneer Selvam Acked-by: Luben Tuikov Link: https://patchwork.freedesktop.org/patch/msgid/20220613080816.4965-1-christian.koenig@amd.com Fixes: fee2ede15542 ("drm/ttm: rework bulk move handling v5") --- include/drm/ttm/ttm_resource.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 441653693970..ca89a48c2460 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -311,12 +311,12 @@ ttm_resource_manager_cleanup(struct ttm_resource_manager *man) } void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk); -void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk, - struct ttm_resource *res); -void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk, - struct ttm_resource *res); void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk); +void ttm_resource_add_bulk_move(struct ttm_resource *res, + struct ttm_buffer_object *bo); +void ttm_resource_del_bulk_move(struct ttm_resource *res, + struct ttm_buffer_object *bo); void ttm_resource_move_to_lru_tail(struct ttm_resource *res); void ttm_resource_init(struct ttm_buffer_object *bo, -- cgit v1.2.3 From d884b6498d2f022098502e106d5a45ab635f2e9a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 14 Jun 2022 17:51:18 +0100 Subject: io_uring: remove IORING_CLOSE_FD_AND_FILE_SLOT This partially reverts a7c41b4687f5902af70cd559806990930c8a307b Even though IORING_CLOSE_FD_AND_FILE_SLOT might save cycles for some users, but it tries to do two things at a time and it's not clear how to handle errors and what to return in a single result field when one part fails and another completes well. Kill it for now. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/837c745019b3795941eee4fcfd7de697886d645b.1655224415.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 776e0278f9dd..53e7dae92e42 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -47,7 +47,6 @@ struct io_uring_sqe { __u32 unlink_flags; __u32 hardlink_flags; __u32 xattr_flags; - __u32 close_flags; }; __u64 user_data; /* data to be passed back at completion time */ /* pack this to avoid bogus arm OABI complaints */ @@ -259,11 +258,6 @@ enum io_uring_op { */ #define IORING_ACCEPT_MULTISHOT (1U << 0) -/* - * close flags, store in sqe->close_flags - */ -#define IORING_CLOSE_FD_AND_FILE_SLOT (1U << 0) - /* * IO completion data structure (Completion Queue Entry) */ -- cgit v1.2.3 From 018ab4fabddd94f1c96f3b59e180691b9e88d5d8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 14 Jun 2022 10:36:11 -0700 Subject: netfs: fix up netfs_inode_init() docbook comment Commit e81fb4198e27 ("netfs: Further cleanups after struct netfs_inode wrapper introduced") changed the argument types and names, and actually updated the comment too (although that was thanks to David Howells, not me: my original patch only changed the code). But the comment fixup didn't go quite far enough, and didn't change the argument name in the comment, resulting in include/linux/netfs.h:314: warning: Function parameter or member 'ctx' not described in 'netfs_inode_init' include/linux/netfs.h:314: warning: Excess function parameter 'inode' description in 'netfs_inode_init' during htmldoc generation. Fixes: e81fb4198e27 ("netfs: Further cleanups after struct netfs_inode wrapper introduced") Reported-by: Stephen Rothwell Signed-off-by: Linus Torvalds --- include/linux/netfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 097cdd644665..1773e5df8e65 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -304,7 +304,7 @@ static inline struct netfs_inode *netfs_inode(struct inode *inode) /** * netfs_inode_init - Initialise a netfslib inode context - * @inode: The netfs inode to initialise + * @ctx: The netfs inode to initialise * @ops: The netfs's operations list * * Initialise the netfs library context struct. This is expected to follow on -- cgit v1.2.3 From b87f02307d3cfbda768520f0687c51ca77e14fc3 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 15 Jun 2022 18:28:05 +0200 Subject: printk: Wait for the global console lock when the system is going down There are reports that the console kthreads block the global console lock when the system is going down, for example, reboot, panic. First part of the solution was to block kthreads in these problematic system states so they stopped handling newly added messages. Second part of the solution is to wait when for the kthreads when they are actively printing. It solves the problem when a message was printed before the system entered the problematic state and the kthreads managed to step in. A busy waiting has to be used because panic() can be called in any context and in an unknown state of the scheduler. There must be a timeout because the kthread might get stuck or sleeping and never release the lock. The timeout 10s is an arbitrary value inspired by the softlockup timeout. Link: https://lore.kernel.org/r/20220610205038.GA3050413@paulmck-ThinkPad-P17-Gen-1 Link: https://lore.kernel.org/r/CAMdYzYpF4FNTBPZsEFeWRuEwSies36QM_As8osPWZSr2q-viEA@mail.gmail.com Signed-off-by: Petr Mladek Tested-by: Paul E. McKenney Link: https://lore.kernel.org/r/20220615162805.27962-3-pmladek@suse.com --- include/linux/printk.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/printk.h b/include/linux/printk.h index cd26aab0ab2a..c1e07c0652c7 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -174,6 +174,7 @@ extern void printk_prefer_direct_enter(void); extern void printk_prefer_direct_exit(void); extern bool pr_flush(int timeout_ms, bool reset_on_progress); +extern void try_block_console_kthreads(int timeout_ms); /* * Please don't use printk_ratelimit(), because it shares ratelimiting state @@ -238,6 +239,10 @@ static inline bool pr_flush(int timeout_ms, bool reset_on_progress) return true; } +static inline void try_block_console_kthreads(int timeout_ms) +{ +} + static inline int printk_ratelimit(void) { return 0; -- cgit v1.2.3 From 4bca7e80b6455772b4bf3f536dcbc19aac424d6a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 15 Jun 2022 15:22:29 +0200 Subject: init: Initialize noop_backing_dev_info early noop_backing_dev_info is used by superblocks of various pseudofilesystems such as kdevtmpfs. After commit 10e14073107d ("writeback: Fix inode->i_io_list not be protected by inode->i_lock error") this broke because __mark_inode_dirty() started to access more fields from noop_backing_dev_info and this led to crashes inside locked_inode_to_wb_and_lock_list() called from __mark_inode_dirty(). Fix the problem by initializing noop_backing_dev_info before the filesystems get mounted. Fixes: 10e14073107d ("writeback: Fix inode->i_io_list not be protected by inode->i_lock error") Reported-and-tested-by: Suzuki K Poulose Reported-and-tested-by: Alexandru Elisei Reported-and-tested-by: Guenter Roeck Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara --- include/linux/backing-dev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 2bd073fa6bb5..d452071db572 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -119,6 +119,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); extern struct backing_dev_info noop_backing_dev_info; +int bdi_init(struct backing_dev_info *bdi); + /** * writeback_in_progress - determine whether there is writeback in progress * @wb: bdi_writeback of interest -- cgit v1.2.3 From 593d1ebe00a45af5cb7bda1235c0790987c2a2b2 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Wed, 15 Jun 2022 12:32:13 -0700 Subject: Revert "net: Add a second bind table hashed by port and address" This reverts: commit d5a42de8bdbe ("net: Add a second bind table hashed by port and address") commit 538aaf9b2383 ("selftests: Add test for timing a bind request to a port with a populated bhash entry") Link: https://lore.kernel.org/netdev/20220520001834.2247810-1-kuba@kernel.org/ There are a few things that need to be fixed here: * Updating bhash2 in cases where the socket's rcv saddr changes * Adding bhash2 hashbucket locks Links to syzbot reports: https://lore.kernel.org/netdev/00000000000022208805e0df247a@google.com/ https://lore.kernel.org/netdev/0000000000003f33bc05dfaf44fe@google.com/ Fixes: d5a42de8bdbe ("net: Add a second bind table hashed by port and address") Reported-by: syzbot+015d756bbd1f8b5c8f09@syzkaller.appspotmail.com Reported-by: syzbot+98fd2d1422063b0f8c44@syzkaller.appspotmail.com Reported-by: syzbot+0a847a982613c6438fba@syzkaller.appspotmail.com Signed-off-by: Joanne Koong Link: https://lore.kernel.org/r/20220615193213.2419568-1-joannelkoong@gmail.com Signed-off-by: Jakub Kicinski --- include/net/inet_connection_sock.h | 3 -- include/net/inet_hashtables.h | 68 +------------------------------------- include/net/sock.h | 14 -------- 3 files changed, 1 insertion(+), 84 deletions(-) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 077cd730ce2f..85cd695e7fd1 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -25,7 +25,6 @@ #undef INET_CSK_CLEAR_TIMERS struct inet_bind_bucket; -struct inet_bind2_bucket; struct tcp_congestion_ops; /* @@ -58,7 +57,6 @@ struct inet_connection_sock_af_ops { * * @icsk_accept_queue: FIFO of established children * @icsk_bind_hash: Bind node - * @icsk_bind2_hash: Bind node in the bhash2 table * @icsk_timeout: Timeout * @icsk_retransmit_timer: Resend (no ack) * @icsk_rto: Retransmit timeout @@ -85,7 +83,6 @@ struct inet_connection_sock { struct inet_sock icsk_inet; struct request_sock_queue icsk_accept_queue; struct inet_bind_bucket *icsk_bind_hash; - struct inet_bind2_bucket *icsk_bind2_hash; unsigned long icsk_timeout; struct timer_list icsk_retransmit_timer; struct timer_list icsk_delack_timer; diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index a0887b70967b..ebfa3df6f8dc 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -90,32 +90,11 @@ struct inet_bind_bucket { struct hlist_head owners; }; -struct inet_bind2_bucket { - possible_net_t ib_net; - int l3mdev; - unsigned short port; - union { -#if IS_ENABLED(CONFIG_IPV6) - struct in6_addr v6_rcv_saddr; -#endif - __be32 rcv_saddr; - }; - /* Node in the inet2_bind_hashbucket chain */ - struct hlist_node node; - /* List of sockets hashed to this bucket */ - struct hlist_head owners; -}; - static inline struct net *ib_net(struct inet_bind_bucket *ib) { return read_pnet(&ib->ib_net); } -static inline struct net *ib2_net(struct inet_bind2_bucket *ib) -{ - return read_pnet(&ib->ib_net); -} - #define inet_bind_bucket_for_each(tb, head) \ hlist_for_each_entry(tb, head, node) @@ -124,15 +103,6 @@ struct inet_bind_hashbucket { struct hlist_head chain; }; -/* This is synchronized using the inet_bind_hashbucket's spinlock. - * Instead of having separate spinlocks, the inet_bind2_hashbucket can share - * the inet_bind_hashbucket's given that in every case where the bhash2 table - * is useful, a lookup in the bhash table also occurs. - */ -struct inet_bind2_hashbucket { - struct hlist_head chain; -}; - /* Sockets can be hashed in established or listening table. * We must use different 'nulls' end-of-chain value for all hash buckets : * A socket might transition from ESTABLISH to LISTEN state without @@ -164,12 +134,6 @@ struct inet_hashinfo { */ struct kmem_cache *bind_bucket_cachep; struct inet_bind_hashbucket *bhash; - /* The 2nd binding table hashed by port and address. - * This is used primarily for expediting the resolution of bind - * conflicts. - */ - struct kmem_cache *bind2_bucket_cachep; - struct inet_bind2_hashbucket *bhash2; unsigned int bhash_size; /* The 2nd listener table hashed by local port and address */ @@ -229,36 +193,6 @@ inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net, void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb); -static inline bool check_bind_bucket_match(struct inet_bind_bucket *tb, - struct net *net, - const unsigned short port, - int l3mdev) -{ - return net_eq(ib_net(tb), net) && tb->port == port && - tb->l3mdev == l3mdev; -} - -struct inet_bind2_bucket * -inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net, - struct inet_bind2_hashbucket *head, - const unsigned short port, int l3mdev, - const struct sock *sk); - -void inet_bind2_bucket_destroy(struct kmem_cache *cachep, - struct inet_bind2_bucket *tb); - -struct inet_bind2_bucket * -inet_bind2_bucket_find(struct inet_hashinfo *hinfo, struct net *net, - const unsigned short port, int l3mdev, - struct sock *sk, - struct inet_bind2_hashbucket **head); - -bool check_bind2_bucket_match_nulladdr(struct inet_bind2_bucket *tb, - struct net *net, - const unsigned short port, - int l3mdev, - const struct sock *sk); - static inline u32 inet_bhashfn(const struct net *net, const __u16 lport, const u32 bhash_size) { @@ -266,7 +200,7 @@ static inline u32 inet_bhashfn(const struct net *net, const __u16 lport, } void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, - struct inet_bind2_bucket *tb2, const unsigned short snum); + const unsigned short snum); /* Caller must disable local BH processing. */ int __inet_inherit_port(const struct sock *sk, struct sock *child); diff --git a/include/net/sock.h b/include/net/sock.h index c585ef6565d9..72ca97ccb460 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -348,7 +348,6 @@ struct sk_filter; * @sk_txtime_report_errors: set report errors mode for SO_TXTIME * @sk_txtime_unused: unused txtime flags * @ns_tracker: tracker for netns reference - * @sk_bind2_node: bind node in the bhash2 table */ struct sock { /* @@ -538,7 +537,6 @@ struct sock { #endif struct rcu_head sk_rcu; netns_tracker ns_tracker; - struct hlist_node sk_bind2_node; }; enum sk_pacing { @@ -819,16 +817,6 @@ static inline void sk_add_bind_node(struct sock *sk, hlist_add_head(&sk->sk_bind_node, list); } -static inline void __sk_del_bind2_node(struct sock *sk) -{ - __hlist_del(&sk->sk_bind2_node); -} - -static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list) -{ - hlist_add_head(&sk->sk_bind2_node, list); -} - #define sk_for_each(__sk, list) \ hlist_for_each_entry(__sk, list, sk_node) #define sk_for_each_rcu(__sk, list) \ @@ -846,8 +834,6 @@ static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list) hlist_for_each_entry_safe(__sk, tmp, list, sk_node) #define sk_for_each_bound(__sk, list) \ hlist_for_each_entry(__sk, list, sk_bind_node) -#define sk_for_each_bound_bhash2(__sk, list) \ - hlist_for_each_entry(__sk, list, sk_bind2_node) /** * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset -- cgit v1.2.3 From 4d337cebcb1c27d9b48c48b9a98e939d4552d584 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 16 Jun 2022 09:44:00 +0800 Subject: blk-mq: avoid to touch q->elevator without any protection q->elevator is referred in blk_mq_has_sqsched() without any protection, no .q_usage_counter is held, no queue srcu and rcu read lock is held, so potential use-after-free may be triggered. Fix the issue by adding one queue flag for checking if the elevator uses single queue style dispatch. Meantime the elevator feature flag of ELEVATOR_F_MQ_AWARE isn't needed any more. Cc: Jan Kara Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220616014401.817001-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 608d577734c2..bb6e3c31b3b7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -575,6 +575,7 @@ struct request_queue { #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ #define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ #define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */ +#define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ #define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_SAME_COMP) | \ @@ -616,6 +617,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_pm_only(q) atomic_read(&(q)->pm_only) #define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags) #define blk_queue_nowait(q) test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags) +#define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags) extern void blk_set_pm_only(struct request_queue *q); extern void blk_clear_pm_only(struct request_queue *q); @@ -1006,8 +1008,6 @@ void disk_set_independent_access_ranges(struct gendisk *disk, */ /* Supports zoned block devices sequential write constraint */ #define ELEVATOR_F_ZBD_SEQ_WRITE (1U << 0) -/* Supports scheduling on multiple hardware queues */ -#define ELEVATOR_F_MQ_AWARE (1U << 1) extern void blk_queue_required_elevator_features(struct request_queue *q, unsigned int features); -- cgit v1.2.3