From a189b2ee938f6b15ad9f95bdef63f95a3a092418 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Thu, 17 Nov 2022 12:47:29 +0100 Subject: fbdev: Make fb_modesetting_disabled() static inline Make fb_modesetting_disabled() a static-inline function when it is defined in the header file. Avoid the linker error shown below. ld: drivers/video/fbdev/core/fbmon.o: in function `fb_modesetting_disabled': fbmon.c:(.text+0x1e4): multiple definition of `fb_modesetting_disabled'; drivers/video/fbdev/core/fbmem.o:fbmem.c:(.text+0x1bac): first defined here A bug report is at [1]. Reported-by: Stephen Rothwell Reported-by: kernel test robot Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Fixes: 0ba2fa8cbd29 ("fbdev: Add support for the nomodeset kernel parameter") Cc: Javier Martinez Canillas Cc: Daniel Vetter Cc: Helge Deller Cc: linux-fbdev@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Link: https://lore.kernel.org/dri-devel/20221117183214.2473e745@canb.auug.org.au/T/#u # 1 Link: https://patchwork.freedesktop.org/patch/msgid/20221117114729.7570-1-tzimmermann@suse.de --- include/linux/fb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index 8dc9635f5bc1..a2a52f730989 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -807,7 +807,7 @@ extern int fb_find_mode(struct fb_var_screeninfo *var, #if defined(CONFIG_VIDEO_NOMODESET) bool fb_modesetting_disabled(const char *drvname); #else -bool fb_modesetting_disabled(const char *drvname) +static inline bool fb_modesetting_disabled(const char *drvname) { return false; } -- cgit v1.2.3 From 8b83e1a455382dc667898a525a93f4eb6716cc41 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 18 Nov 2022 14:35:34 +0100 Subject: Revert "drm/fb-helper: Schedule deferred-I/O worker after writing to framebuffer" This reverts commit 7f5cc4a3e5e4c5a38e5748defc952e45278f7a70. Needed to restore the fbdev damage worker. There have been bug reports about locking order [1] and incorrectly takens branches. [2] Restore the damage worker until these problems have been resovled. Signed-off-by: Thomas Zimmermann Acked-by: Daniel Vetter Link: https://intel-gfx-ci.01.org/tree/drm-tip/fi-kbl-8809g.html # 1 Link: https://lore.kernel.org/dri-devel/20221115115819.23088-6-tzimmermann@suse.de/T/#m06eedc0a468940e4cbbd14ca026733b639bc445a # 2 Link: https://patchwork.freedesktop.org/patch/msgid/20221118133535.9739-3-tzimmermann@suse.de --- include/linux/fb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index a2a52f730989..64889a2b783f 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -663,7 +663,6 @@ extern void fb_deferred_io_open(struct fb_info *info, struct inode *inode, struct file *file); extern void fb_deferred_io_cleanup(struct fb_info *info); -extern void fb_deferred_io_schedule_flush(struct fb_info *info); extern int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasync); -- cgit v1.2.3 From b56ffa583350f605446d78cb4163114e4d1ac60c Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Wed, 23 Nov 2022 19:35:18 +0000 Subject: dma-buf: A collection of typo and documentation fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I've been collecting these typo fixes for a while and it feels like time to send them in. Signed-off-by: T.J. Mercier Reviewed-by: Randy Dunlap Acked-by: Christian König Reviewed-by: Tommaso Merciai Link: https://patchwork.freedesktop.org/patch/msgid/20221123193519.3948105-1-tjmercier@google.com Signed-off-by: Christian König --- include/linux/dma-buf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 6fa8d4e29719..3f31baa3293f 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -356,7 +356,7 @@ struct dma_buf { */ const char *name; - /** @name_lock: Spinlock to protect name acces for read access. */ + /** @name_lock: Spinlock to protect name access for read access. */ spinlock_t name_lock; /** @@ -393,7 +393,7 @@ struct dma_buf { * anything the userspace API considers write access. * * - Drivers may just always add a write fence, since that only - * causes unecessarily synchronization, but no correctness issues. + * causes unnecessary synchronization, but no correctness issues. * * - Some drivers only expose a synchronous userspace API with no * pipelining across drivers. These do not set any fences for their -- cgit v1.2.3 From e873d4329ccb891bf3b17f1e0d44396de943e92d Mon Sep 17 00:00:00 2001 From: Joy Zou Date: Tue, 15 Nov 2022 17:38:23 +0800 Subject: dmaengine: imx-sdma: support hdmi in sdma The hdmi script already supported in sdma firmware. So add support hdmi in sdma driver. The design of hdmi script is different from common script such as sai. There is no need to config buffer descriptor for HDMI. The cyclic capability is achieved by the hdmi script. The slave config is so simple, only config src_addr, dts_addr and direction DMA_TRANS_NONE. Signed-off-by: Joy Zou Reviewed-by: Sascha Hauer Link: https://lore.kernel.org/r/20221115093823.2879128-3-joy.zou@nxp.com Signed-off-by: Vinod Koul --- include/linux/dma/imx-dma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dma/imx-dma.h b/include/linux/dma/imx-dma.h index f487a4fa103a..cfec5f946e23 100644 --- a/include/linux/dma/imx-dma.h +++ b/include/linux/dma/imx-dma.h @@ -40,6 +40,7 @@ enum sdma_peripheral_type { IMX_DMATYPE_ASRC_SP, /* Shared ASRC */ IMX_DMATYPE_SAI, /* SAI */ IMX_DMATYPE_MULTI_SAI, /* MULTI FIFOs For Audio */ + IMX_DMATYPE_HDMI, /* HDMI Audio */ }; enum imx_dma_prio { -- cgit v1.2.3 From 08f01cc1d6e240092a6d9bfa21652622657f38f0 Mon Sep 17 00:00:00 2001 From: Gerald Loacker Date: Thu, 1 Dec 2022 08:22:18 +0100 Subject: iio: add struct declaration for iio types Add struct for iio type arrays such as IIO_AVAIL_LIST which can be used instead of int arrays. Signed-off-by: Gerald Loacker Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221201072220.402585-2-gerald.loacker@wolfvision.net Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 8e0afaaa3f75..81413cd3a3e7 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -381,6 +381,11 @@ s64 iio_get_time_ns(const struct iio_dev *indio_dev); #define INDIO_MAX_RAW_ELEMENTS 4 +struct iio_val_int_plus_micro { + int integer; + int micro; +}; + struct iio_trigger; /* forward declaration */ /** -- cgit v1.2.3 From 1f5e408f6a000be980872b8065e547e2dbef6acc Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 7 Dec 2022 21:03:44 +0200 Subject: iio: light: tsl2563: Drop legacy platform data code There is no in-kernel user for legacy platform data. Otherwise, a new one can use software nodes instead. Hence, drop legacy platform data code. Signed-off-by: Andy Shevchenko Tested-by: Ferry Toth Link: https://lore.kernel.org/r/20221207190348.9347-7-andriy.shevchenko@linux.intel.com Signed-off-by: Jonathan Cameron --- include/linux/platform_data/tsl2563.h | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 include/linux/platform_data/tsl2563.h (limited to 'include/linux') diff --git a/include/linux/platform_data/tsl2563.h b/include/linux/platform_data/tsl2563.h deleted file mode 100644 index 9cf9309c3f24..000000000000 --- a/include/linux/platform_data/tsl2563.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_TSL2563_H -#define __LINUX_TSL2563_H - -struct tsl2563_platform_data { - int cover_comp_gain; -}; - -#endif /* __LINUX_TSL2563_H */ -- cgit v1.2.3 From c941b98781b348bd63fb8f8b5307b10fde2af0c1 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 9 Dec 2022 18:48:39 +0200 Subject: clk: qcom: smd-rpm: remove duplication between MMXI and MMAXI defines The commit 644c42295592 ("clk: qcom: smd: Add SM6375 clocks") added a duplicate of the existing define QCOM_SMD_RPM_MMAXI_CLK, drop it now. Fixes: 644c42295592 ("clk: qcom: smd: Add SM6375 clocks") Reviewed-by: Alex Elder Signed-off-by: Dmitry Baryshkov Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221209164855.128798-4-dmitry.baryshkov@linaro.org --- include/linux/soc/qcom/smd-rpm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/smd-rpm.h b/include/linux/soc/qcom/smd-rpm.h index 62de54992e49..2990f425fdef 100644 --- a/include/linux/soc/qcom/smd-rpm.h +++ b/include/linux/soc/qcom/smd-rpm.h @@ -43,7 +43,6 @@ struct qcom_smd_rpm; #define QCOM_SMD_RPM_HWKM_CLK 0x6d6b7768 #define QCOM_SMD_RPM_PKA_CLK 0x616b70 #define QCOM_SMD_RPM_MCFG_CLK 0x6766636d -#define QCOM_SMD_RPM_MMXI_CLK 0x69786d6d int qcom_rpm_smd_write(struct qcom_smd_rpm *rpm, int state, -- cgit v1.2.3 From 63a1bd8ad1ac9e4e8bfcd5914c8899606e04898d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 30 Nov 2022 23:08:53 +0000 Subject: KVM: Drop arch hardware (un)setup hooks Drop kvm_arch_hardware_setup() and kvm_arch_hardware_unsetup() now that all implementations are nops. No functional change intended. Signed-off-by: Sean Christopherson Reviewed-by: Eric Farman # s390 Acked-by: Anup Patel Message-Id: <20221130230934.1014142-10-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4f26b244f6d0..7c1009c0e66d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1447,8 +1447,6 @@ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {} int kvm_arch_hardware_enable(void); void kvm_arch_hardware_disable(void); -int kvm_arch_hardware_setup(void *opaque); -void kvm_arch_hardware_unsetup(void); int kvm_arch_check_processor_compat(void *opaque); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From 466d27e48d7cd2542710309817cff1e91cd2e10a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 30 Nov 2022 23:09:00 +0000 Subject: KVM: arm64: Simplify the CPUHP logic For a number of historical reasons, the KVM/arm64 hotplug setup is pretty complicated, and we have two extra CPUHP notifiers for vGIC and timers. It looks pretty pointless, and gets in the way of further changes. So let's just expose some helpers that can be called from the core CPUHP callback, and get rid of everything else. This gives us the opportunity to drop a useless notifier entry, as well as tidy-up the timer enable/disable, which was a bit odd. Signed-off-by: Marc Zyngier Signed-off-by: Isaku Yamahata Signed-off-by: Sean Christopherson Message-Id: <20221130230934.1014142-17-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/cpuhotplug.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 6c6859bfc454..5cae6bd22f7f 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -188,9 +188,6 @@ enum cpuhp_state { CPUHP_AP_TI_GP_TIMER_STARTING, CPUHP_AP_HYPERV_TIMER_STARTING, CPUHP_AP_KVM_STARTING, - CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING, - CPUHP_AP_KVM_ARM_VGIC_STARTING, - CPUHP_AP_KVM_ARM_TIMER_STARTING, /* Must be the last timer callback */ CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_ARM_XEN_STARTING, -- cgit v1.2.3 From a578a0a9e3526483ad1904fac019d95e7089fb34 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 30 Nov 2022 23:09:13 +0000 Subject: KVM: Drop kvm_arch_{init,exit}() hooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop kvm_arch_init() and kvm_arch_exit() now that all implementations are nops. No functional change intended. Signed-off-by: Sean Christopherson Reviewed-by: Eric Farman # s390 Reviewed-by: Philippe Mathieu-Daudé Acked-by: Anup Patel Message-Id: <20221130230934.1014142-30-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7c1009c0e66d..62d977bb1448 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1423,9 +1423,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg); int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu); -int kvm_arch_init(void *opaque); -void kvm_arch_exit(void); - void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu); void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); -- cgit v1.2.3 From 81a1cf9f89a6b71e71bfd7d43837ce9235e70b38 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 30 Nov 2022 23:09:16 +0000 Subject: KVM: Drop kvm_arch_check_processor_compat() hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop kvm_arch_check_processor_compat() and its support code now that all architecture implementations are nops. Signed-off-by: Sean Christopherson Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Eric Farman # s390 Acked-by: Anup Patel Reviewed-by: Kai Huang Message-Id: <20221130230934.1014142-33-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 62d977bb1448..20e207e4c6c1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -956,8 +956,7 @@ static inline void kvm_irqfd_exit(void) { } #endif -int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, - struct module *module); +int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module); void kvm_exit(void); void kvm_get_kvm(struct kvm *kvm); @@ -1444,7 +1443,6 @@ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {} int kvm_arch_hardware_enable(void); void kvm_arch_hardware_disable(void); -int kvm_arch_check_processor_compat(void *opaque); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From aaf12a7b4323eb7d94677bcefc286ff6b772ed1c Mon Sep 17 00:00:00 2001 From: Chao Gao Date: Wed, 30 Nov 2022 23:09:25 +0000 Subject: KVM: Rename and move CPUHP_AP_KVM_STARTING to ONLINE section The CPU STARTING section doesn't allow callbacks to fail. Move KVM's hotplug callback to ONLINE section so that it can abort onlining a CPU in certain cases to avoid potentially breaking VMs running on existing CPUs. For example, when KVM fails to enable hardware virtualization on the hotplugged CPU. Place KVM's hotplug state before CPUHP_AP_SCHED_WAIT_EMPTY as it ensures when offlining a CPU, all user tasks and non-pinned kernel tasks have left the CPU, i.e. there cannot be a vCPU task around. So, it is safe for KVM's CPU offline callback to disable hardware virtualization at that point. Likewise, KVM's online callback can enable hardware virtualization before any vCPU task gets a chance to run on hotplugged CPUs. Drop kvm_x86_check_processor_compatibility()'s WARN that IRQs are disabled, as the ONLINE section runs with IRQs disabled. The WARN wasn't intended to be a requirement, e.g. disabling preemption is sufficient, the IRQ thing was purely an aggressive sanity check since the helper was only ever invoked via SMP function call. Rename KVM's CPU hotplug callbacks accordingly. Suggested-by: Thomas Gleixner Signed-off-by: Chao Gao Signed-off-by: Isaku Yamahata Reviewed-by: Yuan Yao [sean: drop WARN that IRQs are disabled] Signed-off-by: Sean Christopherson Message-Id: <20221130230934.1014142-42-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/cpuhotplug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 5cae6bd22f7f..5b2f8147d1ae 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -187,7 +187,6 @@ enum cpuhp_state { CPUHP_AP_CSKY_TIMER_STARTING, CPUHP_AP_TI_GP_TIMER_STARTING, CPUHP_AP_HYPERV_TIMER_STARTING, - CPUHP_AP_KVM_STARTING, /* Must be the last timer callback */ CPUHP_AP_DUMMY_TIMER_STARTING, CPUHP_AP_ARM_XEN_STARTING, @@ -202,6 +201,7 @@ enum cpuhp_state { /* Online section invoked on the hotplugged CPU from the hotplug thread */ CPUHP_AP_ONLINE_IDLE, + CPUHP_AP_KVM_ONLINE, CPUHP_AP_SCHED_WAIT_EMPTY, CPUHP_AP_SMPBOOT_THREADS, CPUHP_AP_X86_VDSO_VMA_ONLINE, -- cgit v1.2.3 From 441f7bfa99fe2b8a7e504aa72047e20579e88a5d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 30 Nov 2022 23:09:33 +0000 Subject: KVM: Opt out of generic hardware enabling on s390 and PPC Allow architectures to opt out of the generic hardware enabling logic, and opt out on both s390 and PPC, which don't need to manually enable virtualization as it's always on (when available). In addition to letting s390 and PPC drop a bit of dead code, this will hopefully also allow ARM to clean up its related code, e.g. ARM has its own per-CPU flag to track which CPUs have enable hardware due to the need to keep hardware enabled indefinitely when pKVM is enabled. Signed-off-by: Sean Christopherson Acked-by: Anup Patel Message-Id: <20221130230934.1014142-50-seanjc@google.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 20e207e4c6c1..109b18e2789c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1441,8 +1441,10 @@ void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {} #endif +#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING int kvm_arch_hardware_enable(void); void kvm_arch_hardware_disable(void); +#endif int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); @@ -2074,7 +2076,9 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) } } +#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING extern bool kvm_rebooting; +#endif extern unsigned int halt_poll_ns; extern unsigned int halt_poll_ns_grow; -- cgit v1.2.3 From 876293121f24fc1a7df85450d0997f54540c8979 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 29 Dec 2022 17:59:57 +0100 Subject: ata: scsi: rename flag ATA_QCFLAG_FAILED to ATA_QCFLAG_EH The name ATA_QCFLAG_FAILED is misleading since it does not mean that a QC completed in error, or that it didn't complete at all. It means that libata decided to schedule EH for the QC, so the QC is now owned by the libata error handler (EH). The normal execution path is responsible for not accessing a QC owned by EH. libata core enforces the rule by returning NULL from ata_qc_from_tag() for QCs owned by EH. It is quite easy to mistake that a QC marked with ATA_QCFLAG_FAILED was an error. However, a QC that was actually an error is instead indicated by having qc->err_mask set. E.g. when we have a NCQ error, we abort all QCs, which currently will mark all QCs as ATA_QCFLAG_FAILED. However, it will only be a single QC that is an error (i.e. has qc->err_mask set). Rename ATA_QCFLAG_FAILED to ATA_QCFLAG_EH to more clearly highlight that this flag simply means that a QC is now owned by EH. This new name will not mislead to think that the QC was an error (which is instead indicated by having qc->err_mask set). This also makes it more obvious that the EH code skips all QCs that do not have ATA_QCFLAG_EH set (rather than ATA_QCFLAG_FAILED), since the EH code should simply only care about QCs that are owned by EH itself. Signed-off-by: Niklas Cassel Reviewed-by: John Garry Signed-off-by: Damien Le Moal --- include/linux/libata.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index c9149ebe7423..7985e6e2ae0e 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -206,7 +206,7 @@ enum { ATA_QCFLAG_QUIET = (1 << 6), /* don't report device error */ ATA_QCFLAG_RETRY = (1 << 7), /* retry after failure */ - ATA_QCFLAG_FAILED = (1 << 16), /* cmd failed and is owned by EH */ + ATA_QCFLAG_EH = (1 << 16), /* cmd aborted and owned by EH */ ATA_QCFLAG_SENSE_VALID = (1 << 17), /* sense data valid */ ATA_QCFLAG_EH_SCHEDULED = (1 << 18), /* EH scheduled (obsolete) */ @@ -1756,7 +1756,7 @@ static inline struct ata_queued_cmd *ata_qc_from_tag(struct ata_port *ap, return qc; if ((qc->flags & (ATA_QCFLAG_ACTIVE | - ATA_QCFLAG_FAILED)) == ATA_QCFLAG_ACTIVE) + ATA_QCFLAG_EH)) == ATA_QCFLAG_ACTIVE) return qc; return NULL; -- cgit v1.2.3 From 931139af5718fb41565fff2420daf995c016ec80 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 29 Dec 2022 17:59:58 +0100 Subject: ata: libata: simplify qc_fill_rtf port operation interface The boolean return value of the qc_fill_rtf operation is used nowhere. Simplify this operation interface by making it a void function. All drivers defining this operation are also updated. Signed-off-by: Damien Le Moal Signed-off-by: Niklas Cassel Reviewed-by: John Garry --- include/linux/libata.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 7985e6e2ae0e..8483d8300ea3 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -876,7 +876,7 @@ struct ata_port_operations { int (*check_atapi_dma)(struct ata_queued_cmd *qc); enum ata_completion_errors (*qc_prep)(struct ata_queued_cmd *qc); unsigned int (*qc_issue)(struct ata_queued_cmd *qc); - bool (*qc_fill_rtf)(struct ata_queued_cmd *qc); + void (*qc_fill_rtf)(struct ata_queued_cmd *qc); /* * Configuration and exception handling @@ -1936,7 +1936,7 @@ extern void ata_sff_queue_delayed_work(struct delayed_work *dwork, unsigned long delay); extern void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay); extern unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc); -extern bool ata_sff_qc_fill_rtf(struct ata_queued_cmd *qc); +extern void ata_sff_qc_fill_rtf(struct ata_queued_cmd *qc); extern unsigned int ata_sff_port_intr(struct ata_port *ap, struct ata_queued_cmd *qc); extern irqreturn_t ata_sff_interrupt(int irq, void *dev_instance); -- cgit v1.2.3 From 93c4aa449b88196c7d56a556cb6a2aad21ad8a7a Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 29 Dec 2022 17:59:59 +0100 Subject: ata: libata: read the shared status for successful NCQ commands once Currently, the status is being read for each QC, inside ata_qc_complete(), which means that QCs being completed by ata_qc_complete_multiple() (i.e. multiple QCs completed during a single interrupt), can have different status and error bits set. This is because the FIS Receive Area will get updated as soon as the HBA receives a new FIS from the device in the NCQ case. Here is an example of the problem: ata14.00: ata_qc_complete_multiple: done_mask: 0x180000 qc tag: 19 cmd: 0x61 flags: 0x11b err_mask: 0x0 tf->status: 0x40 qc tag: 20 cmd: 0x61 flags: 0x11b err_mask: 0x0 tf->status: 0x43 A print in ata_qc_complete_multiple(), shows that done_mask is: 0x180000 which means that tag 19 and 20 were completed. Another print in ata_qc_complete(), after the call to fill_result_tf(), shows that tag 19 and 20 have different status values, even though they were completed in the same ata_qc_complete_multiple() call. If PMP is not enabled, simply read the status and error once, before calling ata_qc_complete() for each QC. Without PMP, we know that all QCs must share the same status and error values. If PMP is enabled, we also read the status before calling ata_qc_complete(), however, we still read the status for each QC, since the QCs can belong to different PMP links (which means that the QCs does not necessarily share the same status and error values). Do all this by introducing the new port operation .qc_ncq_fill_rtf. If set, this operation is called in ata_qc_complete_multiple() to set the result tf for all completed QCs signaled by the last SDB FIS received. QCs that have their result tf filled are marked with the new flag ATA_QCFLAG_RTF_FILLED so that any later execution of the qc_fill_rtf port operation does nothing (e.g. when called from ata_qc_complete()). Co-developed-by: Damien Le Moal Signed-off-by: Damien Le Moal Signed-off-by: Niklas Cassel --- include/linux/libata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 8483d8300ea3..f54e02dadc6f 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -200,6 +200,7 @@ enum { /* struct ata_queued_cmd flags */ ATA_QCFLAG_ACTIVE = (1 << 0), /* cmd not yet ack'd to scsi lyer */ ATA_QCFLAG_DMAMAP = (1 << 1), /* SG table is DMA mapped */ + ATA_QCFLAG_RTF_FILLED = (1 << 2), /* result TF has been filled */ ATA_QCFLAG_IO = (1 << 3), /* standard IO command */ ATA_QCFLAG_RESULT_TF = (1 << 4), /* result TF requested */ ATA_QCFLAG_CLEAR_EXCL = (1 << 5), /* clear excl_link on completion */ @@ -877,6 +878,7 @@ struct ata_port_operations { enum ata_completion_errors (*qc_prep)(struct ata_queued_cmd *qc); unsigned int (*qc_issue)(struct ata_queued_cmd *qc); void (*qc_fill_rtf)(struct ata_queued_cmd *qc); + void (*qc_ncq_fill_rtf)(struct ata_port *ap, u64 done_mask); /* * Configuration and exception handling -- cgit v1.2.3 From 87aab3c4cd59aef42fe280fece2be8b8156b99e0 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 29 Dec 2022 18:00:01 +0100 Subject: ata: libata: move NCQ related ATA_DFLAGs ata_dev_configure() starts off by clearing all flags in ATA_DFLAG_CFG_MASK: dev->flags &= ~ATA_DFLAG_CFG_MASK; ata_dev_configure() then calls ata_dev_config_lba() which calls ata_dev_config_ncq(). ata_dev_config_ncq() will set the correct ATA_DFLAGs depending on what is actually supported. Since these flags are set by ata_dev_configure(), they should be in ATA_DFLAG_CFG_MASK and not in ATA_DFLAG_INIT_MASK. ATA_DFLAG_NCQ_PRIO_ENABLED is set via sysfs, is should therefore not be in ATA_DFLAG_CFG_MASK. It also cannot be in ATA_DFLAG_INIT_MASK, because ata_eh_schedule_probe() calls ata_dev_init(), which will clear all flags in ATA_DFLAG_INIT_MASK. This means that ATA_DFLAG_NCQ_PRIO_ENABLED (the value the user sets via sysfs) would get silently cleared if ata_eh_schedule_probe() is called. While that should only happen in certain circumstances, it still doesn't seem right that it can get silently cleared. (ata_dev_config_ncq_prio() will still clear the ATA_DFLAG_NCQ_PRIO_ENABLED flag if ATA_DFLAG_NCQ_PRIO is suddenly no longer supported after a revalidation.) Because of this, move ATA_DFLAG_NCQ_PRIO_ENABLED to be outside of both ATA_DFLAG_CFG_MASK and ATA_DFLAG_INIT_MASK. Signed-off-by: Niklas Cassel Signed-off-by: Damien Le Moal --- include/linux/libata.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index f54e02dadc6f..3b7f5d9e2f87 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -91,22 +91,21 @@ enum { ATA_DFLAG_AN = (1 << 7), /* AN configured */ ATA_DFLAG_TRUSTED = (1 << 8), /* device supports trusted send/recv */ ATA_DFLAG_DMADIR = (1 << 10), /* device requires DMADIR */ - ATA_DFLAG_CFG_MASK = (1 << 12) - 1, + ATA_DFLAG_NCQ_SEND_RECV = (1 << 11), /* device supports NCQ SEND and RECV */ + ATA_DFLAG_NCQ_PRIO = (1 << 12), /* device supports NCQ priority */ + ATA_DFLAG_CFG_MASK = (1 << 13) - 1, - ATA_DFLAG_PIO = (1 << 12), /* device limited to PIO mode */ - ATA_DFLAG_NCQ_OFF = (1 << 13), /* device limited to non-NCQ mode */ + ATA_DFLAG_PIO = (1 << 13), /* device limited to PIO mode */ + ATA_DFLAG_NCQ_OFF = (1 << 14), /* device limited to non-NCQ mode */ ATA_DFLAG_SLEEPING = (1 << 15), /* device is sleeping */ ATA_DFLAG_DUBIOUS_XFER = (1 << 16), /* data transfer not verified */ ATA_DFLAG_NO_UNLOAD = (1 << 17), /* device doesn't support unload */ ATA_DFLAG_UNLOCK_HPA = (1 << 18), /* unlock HPA */ - ATA_DFLAG_NCQ_SEND_RECV = (1 << 19), /* device supports NCQ SEND and RECV */ - ATA_DFLAG_NCQ_PRIO = (1 << 20), /* device supports NCQ priority */ - ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 21), /* Priority cmds sent to dev */ - ATA_DFLAG_INIT_MASK = (1 << 24) - 1, + ATA_DFLAG_INIT_MASK = (1 << 19) - 1, + ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 19), /* Priority cmds sent to dev */ ATA_DFLAG_DETACH = (1 << 24), ATA_DFLAG_DETACHED = (1 << 25), - ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */ ATA_DFLAG_DEVSLP = (1 << 27), /* device supports Device Sleep */ ATA_DFLAG_ACPI_DISABLED = (1 << 28), /* ACPI for the device is disabled */ -- cgit v1.2.3 From a4b2e6063cfeaab1160501acfb27e8618a7c693f Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 15 Dec 2022 16:20:23 +0100 Subject: firmware: xilinx: Clear IOCTL_SET_SD_TAPDELAY using PM_MMIO_WRITE In case the tap delay required by Arasan SDHCI is set to 0, the current embeddedsw firmware unconditionally writes IOU_SLCR SD_ITAPDLY to 0x100 (SD0_ITAPDLYENA=1, SD0_ITAPDLYSEL=0). Previous behavior was to keep the IOU_SLCR SD_ITAPDLY set to 0x0. There is some sort of difference in the behavior between SD0_ITAPDLYENA=1/0 with the same SD0_ITAPDLYSEL=0, even though the behavior should be identical -- zero delay added to rxclk_in line. The former breaks HS200 training in low temperature conditions. Write IOU_SLCR SD_ITAPDLY register to 0 using PM_MMIO_WRITE which seem to allow unrestricted WRITE access (and PM_MMIO_READ which allows read access) to the entire address space. This way, it is possible to work around the defect in IOCTL_SET_SD_TAPDELAY design which does not permit clearing SDx_ITAPDLYENA bit. Note that the embeddedsw firmware does not permit clearing the SD_ITAPDLY SD0_ITAPDLYENA bit, this bit can only ever be set by the firmware and it is often impossible to update the possibly broken firmware. Signed-off-by: Marek Vasut Link: https://lore.kernel.org/r/20221215152023.8387-1-marex@denx.de Signed-off-by: Michal Simek --- include/linux/firmware/xlnx-zynqmp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index b986e267d149..eb88b4ba62f9 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -79,6 +79,10 @@ #define EVENT_ERROR_PSM_ERR1 (0x28108000U) #define EVENT_ERROR_PSM_ERR2 (0x2810C000U) +/* ZynqMP SD tap delay tuning */ +#define SD_ITAPDLY 0xFF180314 +#define SD_OTAPDLYSEL 0xFF180318 + enum pm_api_cb_id { PM_INIT_SUSPEND_CB = 30, PM_ACKNOWLEDGE_CB = 31, -- cgit v1.2.3 From 589c3357370a596ef7c99c00baca8ac799fce531 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Sun, 11 Dec 2022 23:06:19 -0800 Subject: PCI/CXL: Export native CXL error reporting control CXL _OSC Error Reporting Control is used by the OS to determine if Firmware has control of various CXL error reporting capabilities including the event logs. Expose the result of negotiating CXL Error Reporting Control in struct pci_host_bridge for consumption by the CXL drivers. Cc: Bjorn Helgaas Cc: Lukas Wunner Cc: linux-pci@vger.kernel.org Cc: linux-acpi@vger.kernel.org Signed-off-by: Ira Weiny Acked-by: Bjorn Helgaas Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/20221212070627.1372402-2-ira.weiny@intel.com Signed-off-by: Dan Williams --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index adffd65e84b4..22319ea71ab0 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -578,6 +578,7 @@ struct pci_host_bridge { unsigned int native_pme:1; /* OS may use PCIe PME */ unsigned int native_ltr:1; /* OS may use PCIe LTR */ unsigned int native_dpc:1; /* OS may use PCIe DPC */ + unsigned int native_cxl_error:1; /* OS may use CXL RAS/Events */ unsigned int preserve_config:1; /* Preserve FW resource setup */ unsigned int size_windows:1; /* Enable root bus sizing */ unsigned int msi_domain:1; /* Bridge wants MSI domain */ -- cgit v1.2.3 From 37e2b57078d412547f1c14bb83eff86ab9d92d3d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 3 Sep 2022 00:08:51 +0100 Subject: exportfs: Replace zero-length array with DECLARE_FLEX_ARRAY() helper Zero-length arrays are deprecated and we are moving towards adopting C99 flexible-array members instead. So, replace zero-length array declaration in struct fid with the new DECLARE_FLEX_ARRAY() helper macro. This helper allows for a flexible-array member in a union. Link: https://github.com/KSPP/linux/issues/21 Link: https://github.com/KSPP/linux/issues/193 Link: https://github.com/KSPP/linux/issues/197 Link: https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html Reviewed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- include/linux/exportfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index fe848901fcc3..1640d97e4c0d 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -131,7 +131,7 @@ struct fid { u32 parent_block; u32 parent_generation; } udf; - __u32 raw[0]; + DECLARE_FLEX_ARRAY(__u32, raw); }; }; -- cgit v1.2.3 From 06919d226d01132c03d851966d4df3870fa1b55a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 3 Sep 2022 00:17:18 +0100 Subject: mm/memremap: Replace zero-length array with DECLARE_FLEX_ARRAY() helper Zero-length arrays are deprecated and we are moving towards adopting C99 flexible-array members, instead. So, replace zero-length array declaration in struct dev_pagemap with the new DECLARE_FLEX_ARRAY() helper macro. This helper allows for a flexible-array member in a union. Also, this addresses multiple warnings reported when building with Clang-15 and -Wzero-length-array. Link: https://github.com/KSPP/linux/issues/193 Link: https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html Reviewed-by: Kees Cook Signed-off-by: Gustavo A. R. Silva --- include/linux/memremap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 7fcaf3180a5b..1314d9c5f05b 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -135,7 +135,7 @@ struct dev_pagemap { int nr_range; union { struct range range; - struct range ranges[0]; + DECLARE_FLEX_ARRAY(struct range, ranges); }; }; -- cgit v1.2.3 From 4d70c74659d9746502b23d055dba03d1d28ec388 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 30 Nov 2022 15:48:35 +0200 Subject: i915: Move list_count() to list.h as list_count_nodes() for broader use Some of the existing users, and definitely will be new ones, want to count existing nodes in the list. Provide a generic API for that by moving code from i915 to list.h. Reviewed-by: Lucas De Marchi Acked-by: Jani Nikula Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20221130134838.23805-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/list.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 61762054b4be..f10344dbad4d 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -655,6 +655,21 @@ static inline void list_splice_tail_init(struct list_head *list, !list_is_head(pos, (head)); \ pos = n, n = pos->prev) +/** + * list_count_nodes - count nodes in the list + * @head: the head for your list. + */ +static inline size_t list_count_nodes(struct list_head *head) +{ + struct list_head *pos; + size_t count = 0; + + list_for_each(pos, head) + count++; + + return count; +} + /** * list_entry_is_head - test if the entry points to the head of the list * @pos: the type * to cursor -- cgit v1.2.3 From 9b9ad70f28675c45ef93cfb6e4af9403cb7bd34c Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Fri, 6 Jan 2023 17:40:42 +0530 Subject: remoteproc: pru: Add enum for PRU Core Identifiers. Introducing enum pruss_pru_id for PRU Core Identifiers. PRUSS_PRU0 indicates PRU Core 0. PRUSS_PRU1 indicates PRU Core 1. PRUSS_NUM_PRUS indicates the total number of PRU Cores. Signed-off-by: MD Danish Anwar Reviewed-by: Roger Quadros Link: https://lore.kernel.org/r/20230106121046.886863-3-danishanwar@ti.com Signed-off-by: Mathieu Poirier --- include/linux/remoteproc/pruss.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 include/linux/remoteproc/pruss.h (limited to 'include/linux') diff --git a/include/linux/remoteproc/pruss.h b/include/linux/remoteproc/pruss.h new file mode 100644 index 000000000000..fbadfcfacb34 --- /dev/null +++ b/include/linux/remoteproc/pruss.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * PRU-ICSS Subsystem user interfaces + * + * Copyright (C) 2015-2022 Texas Instruments Incorporated - http://www.ti.com + * Suman Anna + */ + +#ifndef __LINUX_PRUSS_H +#define __LINUX_PRUSS_H + +#include +#include + +#define PRU_RPROC_DRVNAME "pru-rproc" + +/** + * enum pruss_pru_id - PRU core identifiers + * @PRUSS_PRU0: PRU Core 0. + * @PRUSS_PRU1: PRU Core 1. + * @PRUSS_NUM_PRUS: Total number of PRU Cores available. + * + */ + +enum pruss_pru_id { + PRUSS_PRU0 = 0, + PRUSS_PRU1, + PRUSS_NUM_PRUS, +}; + +#endif /* __LINUX_PRUSS_H */ -- cgit v1.2.3 From 919e8942548aa878d374b1b51aa68fdf751f18b8 Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Fri, 6 Jan 2023 17:40:43 +0530 Subject: remoteproc: pru: Add APIs to get and put the PRU cores Add two new APIs, pru_rproc_get() and pru_rproc_put(), to the PRU driver to allow client drivers to acquire and release the remoteproc device associated with a PRU core. The PRU cores are treated as resources with only one client owning it at a time. The pru_rproc_get() function returns the rproc handle corresponding to a PRU core identified by the device tree "ti,prus" property under the client node. The pru_rproc_put() is the complementary function to pru_rproc_get(). Signed-off-by: Suman Anna Signed-off-by: Tero Kristo Signed-off-by: Grzegorz Jaszczyk Signed-off-by: MD Danish Anwar Reviewed-by: Roger Quadros Link: https://lore.kernel.org/r/20230106121046.886863-4-danishanwar@ti.com Signed-off-by: Mathieu Poirier --- include/linux/remoteproc/pruss.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc/pruss.h b/include/linux/remoteproc/pruss.h index fbadfcfacb34..579dafbbaccf 100644 --- a/include/linux/remoteproc/pruss.h +++ b/include/linux/remoteproc/pruss.h @@ -28,4 +28,34 @@ enum pruss_pru_id { PRUSS_NUM_PRUS, }; +struct device_node; + +#if IS_ENABLED(CONFIG_PRU_REMOTEPROC) + +struct rproc *pru_rproc_get(struct device_node *np, int index, + enum pruss_pru_id *pru_id); +void pru_rproc_put(struct rproc *rproc); + +#else + +static inline struct rproc * +pru_rproc_get(struct device_node *np, int index, enum pruss_pru_id *pru_id) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void pru_rproc_put(struct rproc *rproc) { } + +#endif /* CONFIG_PRU_REMOTEPROC */ + +static inline bool is_pru_rproc(struct device *dev) +{ + const char *drv_name = dev_driver_string(dev); + + if (strncmp(drv_name, PRU_RPROC_DRVNAME, sizeof(PRU_RPROC_DRVNAME))) + return false; + + return true; +} + #endif /* __LINUX_PRUSS_H */ -- cgit v1.2.3 From 102853400321baea2527917e6e89be33508c3e18 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 6 Jan 2023 17:40:45 +0530 Subject: remoteproc: pru: Add pru_rproc_set_ctable() function Some firmwares expect the OS drivers to configure the CTABLE entries publishing dynamically allocated memory regions. For example, the PRU Ethernet firmwares use the C28 and C30 entries for retrieving the Shared RAM and System SRAM (OCMC) areas allocated by the PRU Ethernet client driver. Provide a way for users to do that through a new API, pru_rproc_set_ctable(). The API returns 0 on success and a negative value on error. NOTE: The programmable CTABLE entries are typically re-programmed by the PRU firmwares when dealing with a certain block of memory during block processing. This API provides an interface to the PRU client drivers to publish a dynamically allocated memory block with the PRU firmware using a CTABLE entry instead of a negotiated address in shared memory. Additional synchronization may be needed between the PRU client drivers and firmwares if different addresses needs to be published at run-time reusing the same CTABLE entry. CTABLE for stands for "constant table". Each CTable entry just holds the upper address bits so PRU can reference to external memory with larger address bits. For use case please see prueth_sw_emac_config() in "drivers/net/ethernet/ti/prueth_switch.c" /* Set in constant table C28 of PRUn to ICSS Shared memory */ pru_rproc_set_ctable(prueth->pru0, PRU_C28, sharedramaddr); pru_rproc_set_ctable(prueth->pru1, PRU_C28, sharedramaddr); /* Set in constant table C30 of PRUn to OCMC memory */ pru_rproc_set_ctable(prueth->pru0, PRU_C30, ocmcaddr); pru_rproc_set_ctable(prueth->pru1, PRU_C30, ocmcaddr); Signed-off-by: Andrew F. Davis Signed-off-by: Suman Anna Signed-off-by: Roger Quadros Signed-off-by: Grzegorz Jaszczyk Signed-off-by: MD Danish Anwar Link: https://lore.kernel.org/r/20230106121046.886863-6-danishanwar@ti.com Signed-off-by: Mathieu Poirier --- include/linux/remoteproc/pruss.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/remoteproc/pruss.h b/include/linux/remoteproc/pruss.h index 579dafbbaccf..039b50d58df2 100644 --- a/include/linux/remoteproc/pruss.h +++ b/include/linux/remoteproc/pruss.h @@ -28,13 +28,29 @@ enum pruss_pru_id { PRUSS_NUM_PRUS, }; +/* + * enum pru_ctable_idx - Configurable Constant table index identifiers + */ +enum pru_ctable_idx { + PRU_C24 = 0, + PRU_C25, + PRU_C26, + PRU_C27, + PRU_C28, + PRU_C29, + PRU_C30, + PRU_C31, +}; + struct device_node; +struct rproc; #if IS_ENABLED(CONFIG_PRU_REMOTEPROC) struct rproc *pru_rproc_get(struct device_node *np, int index, enum pruss_pru_id *pru_id); void pru_rproc_put(struct rproc *rproc); +int pru_rproc_set_ctable(struct rproc *rproc, enum pru_ctable_idx c, u32 addr); #else @@ -46,6 +62,12 @@ pru_rproc_get(struct device_node *np, int index, enum pruss_pru_id *pru_id) static inline void pru_rproc_put(struct rproc *rproc) { } +static inline int pru_rproc_set_ctable(struct rproc *rproc, + enum pru_ctable_idx c, u32 addr) +{ + return -EOPNOTSUPP; +} + #endif /* CONFIG_PRU_REMOTEPROC */ static inline bool is_pru_rproc(struct device *dev) -- cgit v1.2.3 From 33ae3d0955943ac5bacfcb6911cf7cb74822bf8c Mon Sep 17 00:00:00 2001 From: Dawei Li Date: Fri, 6 Jan 2023 23:28:03 +0800 Subject: soc: qcom: apr: make remove callback of apr driver void returned Since commit fc7a6209d571 ("bus: Make remove callback return void") forces bus_type::remove be void-returned, it doesn't make much sense for any bus based driver implementing remove callbalk to return non-void to its caller. As such, change the remove function for apr bus based drivers to return void. Signed-off-by: Dawei Li Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/TYCP286MB23232B7968D34DB8323B0F16CAFB9@TYCP286MB2323.JPNP286.PROD.OUTLOOK.COM --- include/linux/soc/qcom/apr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/apr.h b/include/linux/soc/qcom/apr.h index 23c5b30f3511..be98aebcb3e1 100644 --- a/include/linux/soc/qcom/apr.h +++ b/include/linux/soc/qcom/apr.h @@ -153,7 +153,7 @@ typedef struct apr_device gpr_device_t; struct apr_driver { int (*probe)(struct apr_device *sl); - int (*remove)(struct apr_device *sl); + void (*remove)(struct apr_device *sl); int (*callback)(struct apr_device *a, struct apr_resp_pkt *d); int (*gpr_callback)(struct gpr_resp_pkt *d, void *data, int op); -- cgit v1.2.3 From be505ba8fe90068868bc084cad2079a38486b2d5 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 18 Nov 2022 10:58:06 +0800 Subject: ASoC/soundwire: remove is_sdca boolean property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Device_ID registers already tell us if a device supports the SDCA specification or not, in hindsight we never needed a property when the information is reported by both hardware and ACPI. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Péter Ujfalusi Signed-off-by: Bard Liao Reviewed-by: Charles Keepax Acked-by: Mark Brown Link: https://lore.kernel.org/r/20221118025807.534863-2-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 9e4537f409c2..8fb458931772 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -365,7 +365,6 @@ struct sdw_dpn_prop { * @sink_dpn_prop: Sink Data Port N properties * @scp_int1_mask: SCP_INT1_MASK desired settings * @quirks: bitmask identifying deltas from the MIPI specification - * @is_sdca: the Slave supports the SDCA specification */ struct sdw_slave_prop { u32 mipi_revision; @@ -389,7 +388,6 @@ struct sdw_slave_prop { struct sdw_dpn_prop *sink_dpn_prop; u8 scp_int1_mask; u32 quirks; - bool is_sdca; }; #define SDW_SLAVE_QUIRKS_INVALID_INITIAL_PARITY BIT(0) -- cgit v1.2.3 From ffa1726589a7cc4bd96a7f19f43cecdb7342478f Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Fri, 18 Nov 2022 10:58:07 +0800 Subject: soundwire: enable optional clock registers for SoundWire 1.2 devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bus supports the mandatory clock registers for SDCA devices, these registers can also be optionally supported by SoundWire 1.2 devices that don't follow the SDCA class specification. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Péter Ujfalusi Signed-off-by: Bard Liao Reviewed-by: Charles Keepax Link: https://lore.kernel.org/r/20221118025807.534863-3-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 8fb458931772..9a49263c53cf 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -365,6 +365,9 @@ struct sdw_dpn_prop { * @sink_dpn_prop: Sink Data Port N properties * @scp_int1_mask: SCP_INT1_MASK desired settings * @quirks: bitmask identifying deltas from the MIPI specification + * @clock_reg_supported: the Peripheral implements the clock base and scale + * registers introduced with the SoundWire 1.2 specification. SDCA devices + * do not need to set this boolean property as the registers are required. */ struct sdw_slave_prop { u32 mipi_revision; @@ -388,6 +391,7 @@ struct sdw_slave_prop { struct sdw_dpn_prop *sink_dpn_prop; u8 scp_int1_mask; u32 quirks; + bool clock_reg_supported; }; #define SDW_SLAVE_QUIRKS_INVALID_INITIAL_PARITY BIT(0) -- cgit v1.2.3 From 62dc9f3f2fd07a2d4f4c97d76403f387363cb637 Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Fri, 25 Nov 2022 14:20:25 +0000 Subject: soundwire: bus: export sdw_nwrite_no_pm and sdw_nread_no_pm functions The commit 167790abb90f ("soundwire: export sdw_write/read_no_pm functions") exposed the single byte no_pm versions of the IO functions that can be used without touching PM, export the multi byte no_pm versions for the same reason. Reviewed-by: Pierre-Louis Bossart Signed-off-by: Simon Trimmer Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20221125142028.1118618-2-ckeepax@opensource.cirrus.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 9a49263c53cf..13019e3904b6 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -1049,7 +1049,9 @@ int sdw_write(struct sdw_slave *slave, u32 addr, u8 value); int sdw_write_no_pm(struct sdw_slave *slave, u32 addr, u8 value); int sdw_read_no_pm(struct sdw_slave *slave, u32 addr); int sdw_nread(struct sdw_slave *slave, u32 addr, size_t count, u8 *val); +int sdw_nread_no_pm(struct sdw_slave *slave, u32 addr, size_t count, u8 *val); int sdw_nwrite(struct sdw_slave *slave, u32 addr, size_t count, const u8 *val); +int sdw_nwrite_no_pm(struct sdw_slave *slave, u32 addr, size_t count, const u8 *val); int sdw_update(struct sdw_slave *slave, u32 addr, u8 mask, u8 val); int sdw_update_no_pm(struct sdw_slave *slave, u32 addr, u8 mask, u8 val); -- cgit v1.2.3 From 6726b47a2a60e3f2ef15ee7c4c6d9caa27770576 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 25 Nov 2022 14:20:26 +0000 Subject: soundwire: Provide build stubs for common functions Provide stub functions when CONFIG_SOUNDWIRE is not set for functions that are quite likely to be used from common code on devices supporting multiple control buses. Reviewed-by: Pierre-Louis Bossart Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20221125142028.1118618-3-ckeepax@opensource.cirrus.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 105 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 95 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 13019e3904b6..3cd2a761911f 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -4,6 +4,7 @@ #ifndef __SOUNDWIRE_H #define __SOUNDWIRE_H +#include #include #include @@ -1023,15 +1024,8 @@ int sdw_stream_add_master(struct sdw_bus *bus, struct sdw_port_config *port_config, unsigned int num_ports, struct sdw_stream_runtime *stream); -int sdw_stream_add_slave(struct sdw_slave *slave, - struct sdw_stream_config *stream_config, - struct sdw_port_config *port_config, - unsigned int num_ports, - struct sdw_stream_runtime *stream); int sdw_stream_remove_master(struct sdw_bus *bus, struct sdw_stream_runtime *stream); -int sdw_stream_remove_slave(struct sdw_slave *slave, - struct sdw_stream_runtime *stream); int sdw_startup_stream(void *sdw_substream); int sdw_prepare_stream(struct sdw_stream_runtime *stream); int sdw_enable_stream(struct sdw_stream_runtime *stream); @@ -1042,8 +1036,20 @@ int sdw_bus_prep_clk_stop(struct sdw_bus *bus); int sdw_bus_clk_stop(struct sdw_bus *bus); int sdw_bus_exit_clk_stop(struct sdw_bus *bus); -/* messaging and data APIs */ +int sdw_compare_devid(struct sdw_slave *slave, struct sdw_slave_id id); +void sdw_extract_slave_id(struct sdw_bus *bus, u64 addr, struct sdw_slave_id *id); +#if IS_ENABLED(CONFIG_SOUNDWIRE) + +int sdw_stream_add_slave(struct sdw_slave *slave, + struct sdw_stream_config *stream_config, + struct sdw_port_config *port_config, + unsigned int num_ports, + struct sdw_stream_runtime *stream); +int sdw_stream_remove_slave(struct sdw_slave *slave, + struct sdw_stream_runtime *stream); + +/* messaging and data APIs */ int sdw_read(struct sdw_slave *slave, u32 addr); int sdw_write(struct sdw_slave *slave, u32 addr, u8 value); int sdw_write_no_pm(struct sdw_slave *slave, u32 addr, u8 value); @@ -1055,7 +1061,86 @@ int sdw_nwrite_no_pm(struct sdw_slave *slave, u32 addr, size_t count, const u8 * int sdw_update(struct sdw_slave *slave, u32 addr, u8 mask, u8 val); int sdw_update_no_pm(struct sdw_slave *slave, u32 addr, u8 mask, u8 val); -int sdw_compare_devid(struct sdw_slave *slave, struct sdw_slave_id id); -void sdw_extract_slave_id(struct sdw_bus *bus, u64 addr, struct sdw_slave_id *id); +#else + +static inline int sdw_stream_add_slave(struct sdw_slave *slave, + struct sdw_stream_config *stream_config, + struct sdw_port_config *port_config, + unsigned int num_ports, + struct sdw_stream_runtime *stream) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +static inline int sdw_stream_remove_slave(struct sdw_slave *slave, + struct sdw_stream_runtime *stream) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +/* messaging and data APIs */ +static inline int sdw_read(struct sdw_slave *slave, u32 addr) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +static inline int sdw_write(struct sdw_slave *slave, u32 addr, u8 value) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +static inline int sdw_write_no_pm(struct sdw_slave *slave, u32 addr, u8 value) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +static inline int sdw_read_no_pm(struct sdw_slave *slave, u32 addr) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +static inline int sdw_nread(struct sdw_slave *slave, u32 addr, size_t count, u8 *val) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +static inline int sdw_nread_no_pm(struct sdw_slave *slave, u32 addr, size_t count, u8 *val) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +static inline int sdw_nwrite(struct sdw_slave *slave, u32 addr, size_t count, const u8 *val) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +static inline int sdw_nwrite_no_pm(struct sdw_slave *slave, u32 addr, size_t count, const u8 *val) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +static inline int sdw_update(struct sdw_slave *slave, u32 addr, u8 mask, u8 val) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +static inline int sdw_update_no_pm(struct sdw_slave *slave, u32 addr, u8 mask, u8 val) +{ + WARN_ONCE(1, "SoundWire API is disabled"); + return -EINVAL; +} + +#endif /* CONFIG_SOUNDWIRE */ #endif /* __SOUNDWIRE_H */ -- cgit v1.2.3 From 8afbb4273977f055db219a22daaafe64be10fb95 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 9 Jan 2023 18:58:05 +0100 Subject: driver core: make bus_get_device_klist() static No one calls this function outside of drivers/base/bus.c so make it static so it does not need to be exported anymore. Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230109175810.2965448-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index d8b29ccd07e5..01077c503d61 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -287,6 +287,5 @@ extern int bus_unregister_notifier(struct bus_type *bus, #define BUS_NOTIFY_DRIVER_NOT_BOUND 0x00000008 /* driver fails to be bound */ extern struct kset *bus_get_kset(struct bus_type *bus); -extern struct klist *bus_get_device_klist(struct bus_type *bus); #endif -- cgit v1.2.3 From a9efdd2519edd7df84afd075b65ca6428dcb0039 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 9 Jan 2023 18:58:06 +0100 Subject: driver core: remove subsys_find_device_by_id() This function has not been called by any code in the kernel tree in many many years so remove it as it is unused. Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230109175810.2965448-2-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 01077c503d61..3b1a2066afcd 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -250,8 +250,6 @@ bus_find_device_by_acpi_dev(struct bus_type *bus, const void *adev) } #endif -struct device *subsys_find_device_by_id(struct bus_type *bus, unsigned int id, - struct device *hint); int bus_for_each_drv(struct bus_type *bus, struct device_driver *start, void *data, int (*fn)(struct device_driver *, void *)); void bus_sort_breadthfirst(struct bus_type *bus, -- cgit v1.2.3 From 2e45fc5502af9826617a96fe63aee055002cac97 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 9 Jan 2023 18:58:07 +0100 Subject: driver core: make subsys_dev_iter_init() static No one outside of drivers/base/bus.c calls this function so make it static and remove the exported symbol. Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230109175810.2965448-3-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 3b1a2066afcd..67cb5b29f62d 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -154,10 +154,6 @@ struct subsys_dev_iter { struct klist_iter ki; const struct device_type *type; }; -void subsys_dev_iter_init(struct subsys_dev_iter *iter, - struct bus_type *subsys, - struct device *start, - const struct device_type *type); struct device *subsys_dev_iter_next(struct subsys_dev_iter *iter); void subsys_dev_iter_exit(struct subsys_dev_iter *iter); -- cgit v1.2.3 From 38cdadefa2feecc9e7aa5f67bc4aea5b9a8ca59f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 9 Jan 2023 18:58:08 +0100 Subject: driver core: make subsys_dev_iter_next() static The function subsys_dev_iter_next() is only used in drivers/base/bus.c so make it static to that file and remove the global export. Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230109175810.2965448-4-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 67cb5b29f62d..4f994d8fce0c 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -154,7 +154,6 @@ struct subsys_dev_iter { struct klist_iter ki; const struct device_type *type; }; -struct device *subsys_dev_iter_next(struct subsys_dev_iter *iter); void subsys_dev_iter_exit(struct subsys_dev_iter *iter); int bus_for_each_dev(struct bus_type *bus, struct device *start, void *data, -- cgit v1.2.3 From af6d0743599e594cb2cec3f1a6d2600a57d1d375 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 9 Jan 2023 18:58:09 +0100 Subject: driver core: make subsys_dev_iter_exit() static The function subsys_dev_iter_exit() is not used outside of drivers/base/bus.c so make it static to that file and remove the global export. Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230109175810.2965448-5-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 4f994d8fce0c..848d49f4cc09 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -154,7 +154,6 @@ struct subsys_dev_iter { struct klist_iter ki; const struct device_type *type; }; -void subsys_dev_iter_exit(struct subsys_dev_iter *iter); int bus_for_each_dev(struct bus_type *bus, struct device *start, void *data, int (*fn)(struct device *dev, void *data)); -- cgit v1.2.3 From b0a8a59a1c44c07807afe50c6bd21a33c9ec98b7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 9 Jan 2023 18:58:10 +0100 Subject: driver core: move struct subsys_dev_iter to a local file struct subsys_dev_iter is not used by any code outside of drivers/base/bus.c so move it into that file and out of the global bus.h file. Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230109175810.2965448-6-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 848d49f4cc09..d529f644e92b 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -150,11 +150,6 @@ int device_match_acpi_handle(struct device *dev, const void *handle); int device_match_any(struct device *dev, const void *unused); /* iterator helpers for buses */ -struct subsys_dev_iter { - struct klist_iter ki; - const struct device_type *type; -}; - int bus_for_each_dev(struct bus_type *bus, struct device *start, void *data, int (*fn)(struct device *dev, void *data)); struct device *bus_find_device(struct bus_type *bus, struct device *start, -- cgit v1.2.3 From 26b36df7516692292312063ca6fd19e73c06d4e7 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Tue, 27 Dec 2022 22:45:27 +0200 Subject: clk: Add generic sync_state callback for disabling unused clocks There are unused clocks that need to remain untouched by clk_disable_unused, and most likely could be disabled later on sync_state. So provide a generic sync_state callback for the clock providers that register such clocks. Then, use the same mechanism as clk_disable_unused from that generic callback, but pass the device to make sure only the clocks belonging to the current clock provider get disabled, if unused. Also, during the default clk_disable_unused, if the driver that registered the clock has the generic clk_sync_state_disable_unused callback set for sync_state, skip disabling its clocks. Signed-off-by: Abel Vesa Reviewed-by: Bjorn Andersson Reviewed-by: Dmitry Baryshkov Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221227204528.1899863-1-abel.vesa@linaro.org --- include/linux/clk-provider.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 842e72a5348f..cf1adfeaf257 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -720,6 +720,7 @@ struct clk *clk_register_divider_table(struct device *dev, const char *name, void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, const struct clk_div_table *table, spinlock_t *lock); +void clk_sync_state_disable_unused(struct device *dev); /** * clk_register_divider - register a divider clock with the clock framework * @dev: device registering this clock -- cgit v1.2.3 From a9236a0aa7d7f52a974cc7eaa971fae92aa477c5 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 2 Jan 2023 16:18:27 +0530 Subject: PM: domains: Allow a genpd consumer to require a synced power off Some genpd providers doesn't ensure that it has turned off at hardware. This is fine until the consumer really requires during some special scenarios that the power domain collapse at hardware before it is turned ON again. An example is the reset sequence of Adreno GPU which requires that the 'gpucc cx gdsc' power domain should move to OFF state in hardware at least once before turning in ON again to clear the internal state. Signed-off-by: Ulf Hansson Signed-off-by: Akhil P Oommen Reviewed-by: Bjorn Andersson Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230102161757.v5.1.I3e6b1f078ad0f1ca9358c573daa7b70ec132cdbe@changeid --- include/linux/pm_domain.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 1cd41bdf73cf..f776fb93eaa0 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -136,6 +136,7 @@ struct generic_pm_domain { unsigned int prepared_count; /* Suspend counter of prepared devices */ unsigned int performance_state; /* Aggregated max performance state */ cpumask_var_t cpus; /* A cpumask of the attached CPUs */ + bool synced_poweroff; /* A consumer needs a synced poweroff */ int (*power_off)(struct generic_pm_domain *domain); int (*power_on)(struct generic_pm_domain *domain); struct raw_notifier_head power_notifiers; /* Power on/off notifiers */ @@ -235,6 +236,7 @@ int dev_pm_genpd_add_notifier(struct device *dev, struct notifier_block *nb); int dev_pm_genpd_remove_notifier(struct device *dev); void dev_pm_genpd_set_next_wakeup(struct device *dev, ktime_t next); ktime_t dev_pm_genpd_get_next_hrtimer(struct device *dev); +void dev_pm_genpd_synced_poweroff(struct device *dev); extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; @@ -300,6 +302,9 @@ static inline ktime_t dev_pm_genpd_get_next_hrtimer(struct device *dev) { return KTIME_MAX; } +static inline void dev_pm_genpd_synced_poweroff(struct device *dev) +{ } + #define simple_qos_governor (*(struct dev_power_governor *)(NULL)) #define pm_domain_always_on_gov (*(struct dev_power_governor *)(NULL)) #endif -- cgit v1.2.3 From e656cd0bcf3d2ba2eceac82b44714bf355428ec4 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Mon, 9 Jan 2023 14:05:22 +0100 Subject: soc: qcom: rmtfs: Optionally map RMTFS to more VMs Some SoCs require that RMTFS is also mapped to the NAV VM. Trying to power on the modem without that results in the whole platform crashing and forces a hard reboot within about 2 seconds. Add support for mapping the region to additional VMs, such as NAV to open a path towards enabling modem on such platforms. Signed-off-by: Loic Poulain [Konrad: reword, make conditional and flexible, add a define for NAV VMID] Signed-off-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230109130523.298971-2-konrad.dybcio@linaro.org --- include/linux/qcom_scm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index f8335644a01a..150b72edb879 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -55,6 +55,7 @@ enum qcom_scm_ice_cipher { #define QCOM_SCM_VMID_MSS_MSA 0xF #define QCOM_SCM_VMID_WLAN 0x18 #define QCOM_SCM_VMID_WLAN_CE 0x19 +#define QCOM_SCM_VMID_NAV 0x2B #define QCOM_SCM_PERM_READ 0x4 #define QCOM_SCM_PERM_WRITE 0x2 #define QCOM_SCM_PERM_EXEC 0x1 -- cgit v1.2.3 From 45ca30eb9dfe622b00ce352cf28ee141d243254b Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 9 Jan 2023 14:05:23 +0100 Subject: dt-bindings: firmware: qcom: scm: Separate VMIDs from header to bindings The SCM VMIDs represent predefined mappings that come from the irreplaceable and non-omittable firmware that comes with every Qualcomm SoC (unless you steal engineering samples from the factory) and help clarify otherwise totally magic numbers which we are required to pass to the secure world for some parts of the SoC to work at all (with modem being the prime example). On top of that, with changes to the rmtfs binding, secure VMIDs will become useful to have in device trees for readability. Separate them out and add to include/dt-bindings. Signed-off-by: Konrad Dybcio Reviewed-by: Krzysztof Kozlowski Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230109130523.298971-3-konrad.dybcio@linaro.org --- include/linux/qcom_scm.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 150b72edb879..1e449a5d7f5c 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -9,6 +9,8 @@ #include #include +#include + #define QCOM_SCM_VERSION(major, minor) (((major) << 16) | ((minor) & 0xFF)) #define QCOM_SCM_CPU_PWR_DOWN_L2_ON 0x0 #define QCOM_SCM_CPU_PWR_DOWN_L2_OFF 0x1 @@ -51,11 +53,6 @@ enum qcom_scm_ice_cipher { QCOM_SCM_ICE_CIPHER_AES_256_CBC = 4, }; -#define QCOM_SCM_VMID_HLOS 0x3 -#define QCOM_SCM_VMID_MSS_MSA 0xF -#define QCOM_SCM_VMID_WLAN 0x18 -#define QCOM_SCM_VMID_WLAN_CE 0x19 -#define QCOM_SCM_VMID_NAV 0x2B #define QCOM_SCM_PERM_READ 0x4 #define QCOM_SCM_PERM_WRITE 0x2 #define QCOM_SCM_PERM_EXEC 0x1 -- cgit v1.2.3 From 2364b406824f1f42026d87c1e26d4dd1ca0f65af Mon Sep 17 00:00:00 2001 From: John Ogness Date: Mon, 9 Jan 2023 11:13:53 +0106 Subject: printk: move size limit macros into internal.h The size limit macros are located further down in printk.c and behind ifdef conditionals. This complicates their usage for upcoming changes. Move the macros into internal.h so that they are still invisible outside of printk, but easily accessible for printk. Also, the maximum size of formatted extended messages does not need to be known by any code outside of printk, so move it to internal.h as well. And like CONSOLE_LOG_MAX, for !CONFIG_PRINTK set CONSOLE_EXT_LOG_MAX to 0 to reduce the static memory footprint. Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230109100800.1085541-2-john.ogness@linutronix.de --- include/linux/printk.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index 8c81806c2e99..8ef499ab3c1e 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -44,8 +44,6 @@ static inline const char *printk_skip_headers(const char *buffer) return buffer; } -#define CONSOLE_EXT_LOG_MAX 8192 - /* printk's without a loglevel use this.. */ #define MESSAGE_LOGLEVEL_DEFAULT CONFIG_MESSAGE_LOGLEVEL_DEFAULT -- cgit v1.2.3 From 717a5651b10940ec827fe07acfb956d906250b2b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Jan 2023 11:13:54 +0106 Subject: console: Use BIT() macros for @flags values Rather than manually calculating powers of 2, use the BIT() macros. Also take this opportunatity to cleanup and restructure the value comments into proper kerneldoc comments. Signed-off-by: Thomas Gleixner Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230109100800.1085541-3-john.ogness@linutronix.de --- include/linux/console.h | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 9cea254b34b8..ed804dd7c2e8 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -15,6 +15,7 @@ #define _LINUX_CONSOLE_H_ 1 #include +#include #include #include @@ -125,18 +126,43 @@ static inline int con_debug_leave(void) /* * The interface for a console, or any other device that wants to capture * console messages (printer driver?) - * - * If a console driver is marked CON_BOOT then it will be auto-unregistered - * when the first real console is registered. This is for early-printk drivers. */ -#define CON_PRINTBUFFER (1) -#define CON_CONSDEV (2) /* Preferred console, /dev/console */ -#define CON_ENABLED (4) -#define CON_BOOT (8) -#define CON_ANYTIME (16) /* Safe to call when cpu is offline */ -#define CON_BRL (32) /* Used for a braille device */ -#define CON_EXTENDED (64) /* Use the extended output format a la /dev/kmsg */ +/** + * cons_flags - General console flags + * @CON_PRINTBUFFER: Used by newly registered consoles to avoid duplicate + * output of messages that were already shown by boot + * consoles or read by userspace via syslog() syscall. + * @CON_CONSDEV: Indicates that the console driver is backing + * /dev/console. + * @CON_ENABLED: Indicates if a console is allowed to print records. If + * false, the console also will not advance to later + * records. + * @CON_BOOT: Marks the console driver as early console driver which + * is used during boot before the real driver becomes + * available. It will be automatically unregistered + * when the real console driver is registered unless + * "keep_bootcon" parameter is used. + * @CON_ANYTIME: A misnomed historical flag which tells the core code + * that the legacy @console::write callback can be invoked + * on a CPU which is marked OFFLINE. That is misleading as + * it suggests that there is no contextual limit for + * invoking the callback. The original motivation was + * readiness of the per-CPU areas. + * @CON_BRL: Indicates a braille device which is exempt from + * receiving the printk spam for obvious reasons. + * @CON_EXTENDED: The console supports the extended output format of + * /dev/kmesg which requires a larger output buffer. + */ +enum cons_flags { + CON_PRINTBUFFER = BIT(0), + CON_CONSDEV = BIT(1), + CON_ENABLED = BIT(2), + CON_BOOT = BIT(3), + CON_ANYTIME = BIT(4), + CON_BRL = BIT(5), + CON_EXTENDED = BIT(6), +}; struct console { char name[16]; -- cgit v1.2.3 From 02b2396d7d0cf806e80c887b4c799d482d6977ed Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Jan 2023 11:13:55 +0106 Subject: console: Document struct console Add kerneldoc comments to struct console. Signed-off-by: Thomas Gleixner Signed-off-by: John Ogness Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230109100800.1085541-4-john.ogness@linutronix.de --- include/linux/console.h | 54 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index ed804dd7c2e8..1e36958aa656 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -164,24 +164,44 @@ enum cons_flags { CON_EXTENDED = BIT(6), }; +/** + * struct console - The console descriptor structure + * @name: The name of the console driver + * @write: Write callback to output messages (Optional) + * @read: Read callback for console input (Optional) + * @device: The underlying TTY device driver (Optional) + * @unblank: Callback to unblank the console (Optional) + * @setup: Callback for initializing the console (Optional) + * @exit: Callback for teardown of the console (Optional) + * @match: Callback for matching a console (Optional) + * @flags: Console flags. See enum cons_flags + * @index: Console index, e.g. port number + * @cflag: TTY control mode flags + * @ispeed: TTY input speed + * @ospeed: TTY output speed + * @seq: Sequence number of the next ringbuffer record to print + * @dropped: Number of unreported dropped ringbuffer records + * @data: Driver private data + * @node: hlist node for the console list + */ struct console { - char name[16]; - void (*write)(struct console *, const char *, unsigned); - int (*read)(struct console *, char *, unsigned); - struct tty_driver *(*device)(struct console *, int *); - void (*unblank)(void); - int (*setup)(struct console *, char *); - int (*exit)(struct console *); - int (*match)(struct console *, char *name, int idx, char *options); - short flags; - short index; - int cflag; - uint ispeed; - uint ospeed; - u64 seq; - unsigned long dropped; - void *data; - struct hlist_node node; + char name[16]; + void (*write)(struct console *co, const char *s, unsigned int count); + int (*read)(struct console *co, char *s, unsigned int count); + struct tty_driver *(*device)(struct console *co, int *index); + void (*unblank)(void); + int (*setup)(struct console *co, char *options); + int (*exit)(struct console *co); + int (*match)(struct console *co, char *name, int idx, char *options); + short flags; + short index; + int cflag; + uint ispeed; + uint ospeed; + u64 seq; + unsigned long dropped; + void *data; + struct hlist_node node; }; #ifdef CONFIG_LOCKDEP -- cgit v1.2.3 From 504fa212d7030fb1c042290dc2eb92b21515573a Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 28 Dec 2022 00:21:52 +0100 Subject: driver core: Make driver_deferred_probe_timeout a static variable It is not used outside of its compilation unit, so there's no need to export this variable. Signed-off-by: Javier Martinez Canillas Reviewed-by: Andrew Halaney Acked-by: John Stultz Link: https://lore.kernel.org/r/20221227232152.3094584-1-javierm@redhat.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device/driver.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index 2114d65b862f..50d0a416a5e7 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -240,7 +240,6 @@ driver_find_device_by_acpi_dev(struct device_driver *drv, const void *adev) } #endif -extern int driver_deferred_probe_timeout; void driver_deferred_probe_add(struct device *dev); int driver_deferred_probe_check_state(struct device *dev); void driver_init(void); -- cgit v1.2.3 From 17cde5e601b165174e8a433b550f84f362731164 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 28 Nov 2022 17:06:42 -0400 Subject: genirq/msi: Add msi_device_has_isolated_msi() This will replace irq_domain_check_msi_remap() in following patches. The new API makes it more clear what "msi_remap" actually means from a functional perspective instead of identifying an implementation specific HW feature. Isolated MSI means that HW modeled by an irq_domain on the path from the initiating device to the CPU will validate that the MSI message specifies an interrupt number that the device is authorized to trigger. This must block devices from triggering interrupts they are not authorized to trigger. Currently authorization means the MSI vector is one assigned to the device. This is interesting for securing VFIO use cases where a rouge MSI (eg created by abusing a normal PCI MemWr DMA) must not allow the VFIO userspace to impact outside its security domain, eg userspace triggering interrupts on kernel drivers, a VM triggering interrupts on the hypervisor, or a VM triggering interrupts on another VM. As this is actually modeled as a per-irq_domain property, not a global platform property, correct the interface to accept the device parameter and scan through only the part of the irq_domains hierarchy originating from the source device. Locate the new code in msi.c as it naturally only works with CONFIG_GENERIC_MSI_IRQ, which also requires CONFIG_IRQ_DOMAIN and IRQ_DOMAIN_HIERARCHY. Link: https://lore.kernel.org/r/1-v3-3313bb5dd3a3+10f11-secure_msi_jgg@nvidia.com Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Reviewed-by: Thomas Gleixner Signed-off-by: Jason Gunthorpe --- include/linux/msi.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index a112b913fff9..e8a3f3a8a7f4 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -649,6 +649,19 @@ int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int vir void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq, unsigned int nvec); void *platform_msi_get_host_data(struct irq_domain *domain); + +bool msi_device_has_isolated_msi(struct device *dev); +#else /* CONFIG_GENERIC_MSI_IRQ */ +static inline bool msi_device_has_isolated_msi(struct device *dev) +{ + /* + * Arguably if the platform does not enable MSI support then it has + * "isolated MSI", as an interrupt controller that cannot receive MSIs + * is inherently isolated by our definition. As nobody seems to needs + * this be conservative and return false anyhow. + */ + return false; +} #endif /* CONFIG_GENERIC_MSI_IRQ */ /* PCI specific interfaces */ -- cgit v1.2.3 From efc30a8f15a7981737297f9e9c62fb814d74e268 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 9 Dec 2022 13:23:08 -0400 Subject: iommu: Add iommu_group_has_isolated_msi() Compute the isolated_msi over all the devices in the IOMMU group because iommufd and vfio both need to know that the entire group is isolated before granting access to it. Link: https://lore.kernel.org/r/2-v3-3313bb5dd3a3+10f11-secure_msi_jgg@nvidia.com Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 46e1347bfa22..9b7a9fa5ad28 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -455,6 +455,7 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) extern int bus_iommu_probe(struct bus_type *bus); extern bool iommu_present(struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); +extern bool iommu_group_has_isolated_msi(struct iommu_group *group); extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus); extern struct iommu_group *iommu_group_get_by_id(int id); extern void iommu_domain_free(struct iommu_domain *domain); -- cgit v1.2.3 From a5e72a6bac14181249ffd04f35f6a7c9bf47fbb9 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 8 Dec 2022 10:51:02 -0400 Subject: genirq/irqdomain: Remove unused irq_domain_check_msi_remap() code After converting the users of irq_domain_check_msi_remap() it and the helpers are no longer needed. The new version does not require all the #ifdef helpers and inlines because CONFIG_GENERIC_MSI_IRQ always requires CONFIG_IRQ_DOMAIN and IRQ_DOMAIN_HIERARCHY. Link: https://lore.kernel.org/r/5-v3-3313bb5dd3a3+10f11-secure_msi_jgg@nvidia.com Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Reviewed-by: Thomas Gleixner Signed-off-by: Jason Gunthorpe --- include/linux/irqdomain.h | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index a372086750ca..b04ce03d3bb6 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -276,7 +276,6 @@ struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, void *host_data); extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); -extern bool irq_domain_check_msi_remap(void); extern void irq_set_default_host(struct irq_domain *host); extern struct irq_domain *irq_get_default_host(void); extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, @@ -559,13 +558,6 @@ static inline bool irq_domain_is_msi(struct irq_domain *domain) return domain->flags & IRQ_DOMAIN_FLAG_MSI; } -static inline bool irq_domain_is_msi_remap(struct irq_domain *domain) -{ - return domain->flags & IRQ_DOMAIN_FLAG_MSI_REMAP; -} - -extern bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain); - static inline bool irq_domain_is_msi_parent(struct irq_domain *domain) { return domain->flags & IRQ_DOMAIN_FLAG_MSI_PARENT; @@ -611,17 +603,6 @@ static inline bool irq_domain_is_msi(struct irq_domain *domain) return false; } -static inline bool irq_domain_is_msi_remap(struct irq_domain *domain) -{ - return false; -} - -static inline bool -irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain) -{ - return false; -} - static inline bool irq_domain_is_msi_parent(struct irq_domain *domain) { return false; @@ -641,10 +622,6 @@ static inline struct irq_domain *irq_find_matching_fwnode( { return NULL; } -static inline bool irq_domain_check_msi_remap(void) -{ - return false; -} #endif /* !CONFIG_IRQ_DOMAIN */ #endif /* _LINUX_IRQDOMAIN_H */ -- cgit v1.2.3 From dcb83f6ec1bf08a44b3f19719b56e8dc18058ff5 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 28 Nov 2022 20:12:43 -0400 Subject: genirq/msi: Rename IRQ_DOMAIN_MSI_REMAP to IRQ_DOMAIN_ISOLATED_MSI What x86 calls "interrupt remapping" is one way to achieve isolated MSI, make it clear this is talking about isolated MSI, no matter how it is achieved. This matches the new driver facing API name of msi_device_has_isolated_msi() No functional change. Link: https://lore.kernel.org/r/6-v3-3313bb5dd3a3+10f11-secure_msi_jgg@nvidia.com Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Reviewed-by: Thomas Gleixner Signed-off-by: Jason Gunthorpe --- include/linux/irqdomain.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index b04ce03d3bb6..0a3e974b7288 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -192,8 +192,10 @@ enum { /* Irq domain implements MSIs */ IRQ_DOMAIN_FLAG_MSI = (1 << 4), - /* Irq domain implements MSI remapping */ - IRQ_DOMAIN_FLAG_MSI_REMAP = (1 << 5), + /* + * Irq domain implements isolated MSI, see msi_device_has_isolated_msi() + */ + IRQ_DOMAIN_FLAG_ISOLATED_MSI = (1 << 5), /* Irq domain doesn't translate anything */ IRQ_DOMAIN_FLAG_NO_MAP = (1 << 6), -- cgit v1.2.3 From bf210f793937a634bae6eda6a6d699c00b2b53d9 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 28 Nov 2022 20:31:57 -0400 Subject: irq/s390: Add arch_is_isolated_msi() for s390 s390 doesn't use irq_domains, so it has no place to set IRQ_DOMAIN_FLAG_ISOLATED_MSI. Instead of continuing to abuse the iommu subsystem to convey this information add a simple define which s390 can make statically true. The define will cause msi_device_has_isolated() to return true. Remove IOMMU_CAP_INTR_REMAP from the s390 iommu driver. Link: https://lore.kernel.org/r/8-v3-3313bb5dd3a3+10f11-secure_msi_jgg@nvidia.com Reviewed-by: Matthew Rosato Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- include/linux/msi.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index e8a3f3a8a7f4..13c9b74a4575 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -48,6 +48,10 @@ typedef struct arch_msi_msg_data { } __attribute__ ((packed)) arch_msi_msg_data_t; #endif +#ifndef arch_is_isolated_msi +#define arch_is_isolated_msi() false +#endif + /** * msi_msg - Representation of a MSI message * @address_lo: Low 32 bits of msi message address @@ -657,10 +661,10 @@ static inline bool msi_device_has_isolated_msi(struct device *dev) /* * Arguably if the platform does not enable MSI support then it has * "isolated MSI", as an interrupt controller that cannot receive MSIs - * is inherently isolated by our definition. As nobody seems to needs - * this be conservative and return false anyhow. + * is inherently isolated by our definition. The default definition for + * arch_is_isolated_msi() is conservative and returns false anyhow. */ - return false; + return arch_is_isolated_msi(); } #endif /* CONFIG_GENERIC_MSI_IRQ */ -- cgit v1.2.3 From b062007c63eb4452f1122384e86d402531fb1d52 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 28 Nov 2022 20:34:32 -0400 Subject: iommu: Remove IOMMU_CAP_INTR_REMAP No iommu driver implements this any more, get rid of it. Link: https://lore.kernel.org/r/9-v3-3313bb5dd3a3+10f11-secure_msi_jgg@nvidia.com Tested-by: Matthew Rosato Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 9b7a9fa5ad28..933cc57bfc48 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -120,7 +120,6 @@ static inline bool iommu_is_dma_domain(struct iommu_domain *domain) enum iommu_cap { IOMMU_CAP_CACHE_COHERENCY, /* IOMMU_CACHE is supported */ - IOMMU_CAP_INTR_REMAP, /* IOMMU supports interrupt isolation */ IOMMU_CAP_NOEXEC, /* IOMMU_NOEXEC flag */ IOMMU_CAP_PRE_BOOT_PROTECTION, /* Firmware says it used the IOMMU for DMA protection and we should too */ -- cgit v1.2.3 From 5b6373de4351debd9fa87f1c46442b2c28d8b18e Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 19 Dec 2022 17:05:16 +0100 Subject: drm/fbdev: Remove aperture handling and FBINFO_MISC_FIRMWARE There are no users left of struct fb_info.apertures and the flag FBINFO_MISC_FIRMWARE. Remove both and the aperture-ownership code in the fbdev core. All code for aperture ownership is now located in the fbdev drivers. Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20221219160516.23436-19-tzimmermann@suse.de --- include/linux/fb.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index 96b96323e9cb..30183fd259ae 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -423,8 +423,6 @@ struct fb_tile_ops { */ #define FBINFO_MISC_ALWAYS_SETPAR 0x40000 -/* where the fb is a firmware driver, and can be replaced with a proper one */ -#define FBINFO_MISC_FIRMWARE 0x80000 /* * Host and GPU endianness differ. */ @@ -499,30 +497,10 @@ struct fb_info { void *fbcon_par; /* fbcon use-only private area */ /* From here on everything is device dependent */ void *par; - /* we need the PCI or similar aperture base/size not - smem_start/size as smem_start may just be an object - allocated inside the aperture so may not actually overlap */ - struct apertures_struct { - unsigned int count; - struct aperture { - resource_size_t base; - resource_size_t size; - } ranges[0]; - } *apertures; bool skip_vt_switch; /* no VT switch on suspend/resume required */ }; -static inline struct apertures_struct *alloc_apertures(unsigned int max_num) { - struct apertures_struct *a; - - a = kzalloc(struct_size(a, ranges, max_num), GFP_KERNEL); - if (!a) - return NULL; - a->count = max_num; - return a; -} - #define FBINFO_FLAG_DEFAULT FBINFO_DEFAULT /* This will go away -- cgit v1.2.3 From 6caeb33fa986151f745fc62190bc28a593b8a0d2 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 10 Jan 2023 10:54:05 +0800 Subject: iommu: Add set_platform_dma_ops iommu ops When VFIO finishes assigning a device to user space and calls iommu_group_release_dma_owner() to return the device to kernel, the IOMMU core will attach the default domain to the device. Unfortunately, some IOMMU drivers don't support default domain, hence in the end, the core calls .detach_dev instead. This adds set_platform_dma_ops iommu ops to make it clear that what it does is returning control back to the platform DMA ops. Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230110025408.667767-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 46e1347bfa22..7b3e3775b069 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -228,6 +228,9 @@ struct iommu_iotlb_gather { * @release_device: Remove device from iommu driver handling * @probe_finalize: Do final setup work after the device is added to an IOMMU * group and attached to the groups domain + * @set_platform_dma_ops: Returning control back to the platform DMA ops. This op + * is to support old IOMMU drivers, new drivers should use + * default domains, and the common IOMMU DMA ops. * @device_group: find iommu group for a particular device * @get_resv_regions: Request list of reserved regions for a device * @of_xlate: add OF master IDs to iommu grouping @@ -256,6 +259,7 @@ struct iommu_ops { struct iommu_device *(*probe_device)(struct device *dev); void (*release_device)(struct device *dev); void (*probe_finalize)(struct device *dev); + void (*set_platform_dma_ops)(struct device *dev); struct iommu_group *(*device_group)(struct device *dev); /* Request/Free a list of reserved regions for a device */ -- cgit v1.2.3 From dd8a25c557e109f868430bd2e3e8f394cb40eaa7 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 10 Jan 2023 10:54:07 +0800 Subject: iommu: Remove deferred attach check from __iommu_detach_device() At the current moment, __iommu_detach_device() is only called via call chains that are after the device driver is attached - eg via explicit attach APIs called by the device driver. Commit bd421264ed30 ("iommu: Fix deferred domain attachment") has removed deferred domain attachment check from __iommu_attach_device() path, so it should just unconditionally work in the __iommu_detach_device() path. It actually looks like a bug that we were blocking detach on these paths since the attach was unconditional and the caller is going to free the (probably) UNAMANGED domain once this returns. The only place we should be testing for deferred attach is during the initial point the dma device is linked to the group, and then again during the dma api calls. Signed-off-by: Jason Gunthorpe Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230110025408.667767-5-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 7b3e3775b069..0d10566b3cb2 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -405,6 +405,7 @@ struct iommu_fault_param { * @iommu_dev: IOMMU device this device is linked to * @priv: IOMMU Driver private data * @max_pasids: number of PASIDs this device can consume + * @attach_deferred: the dma domain attachment is deferred * * TODO: migrate other per device data pointers under iommu_dev_data, e.g. * struct iommu_group *iommu_group; @@ -417,6 +418,7 @@ struct dev_iommu { struct iommu_device *iommu_dev; void *priv; u32 max_pasids; + u32 attach_deferred:1; }; int iommu_device_register(struct iommu_device *iommu, -- cgit v1.2.3 From 8f9930fa016134ea07db4775ec596b16c3d03f05 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 10 Jan 2023 10:54:08 +0800 Subject: iommu: Remove detach_dev callback The detach_dev callback of domain ops is not called in the IOMMU core. Remove this callback to avoid dead code. The trace event for detaching domain from device is removed accordingly. Reviewed-by: Jason Gunthorpe Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230110025408.667767-6-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 0d10566b3cb2..a8063f26ff69 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -299,7 +299,6 @@ struct iommu_ops { * * EBUSY - device is attached to a domain and cannot be changed * * ENODEV - device specific errors, not able to be attached * * - treated as ENODEV by the caller. Use is discouraged - * @detach_dev: detach an iommu domain from a device * @set_dev_pasid: set an iommu domain to a pasid of device * @map: map a physically contiguous memory region to an iommu domain * @map_pages: map a physically contiguous set of pages of the same size to @@ -320,7 +319,6 @@ struct iommu_ops { */ struct iommu_domain_ops { int (*attach_dev)(struct iommu_domain *domain, struct device *dev); - void (*detach_dev)(struct iommu_domain *domain, struct device *dev); int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); -- cgit v1.2.3 From 9f820fc0651c32f8dde26b8e3ad93e1b9fdb62c4 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 12 Jan 2023 10:36:35 +0100 Subject: mtd: rawnand: Check the data only read pattern only once Instead of checking if a pattern is supported each time we need it, let's create a bitfield that only the core would be allowed to fill at startup time. The core and the individual drivers may then use it in order to check what operation they should use. This bitfield is supposed to grow over time. Signed-off-by: Miquel Raynal Tested-by: Liao Jaime Link: https://lore.kernel.org/linux-mtd/20230112093637.987838-2-miquel.raynal@bootlin.com --- include/linux/mtd/rawnand.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index dcf90144d70b..28c5dce782dd 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1094,10 +1094,18 @@ struct nand_controller_ops { * * @lock: lock used to serialize accesses to the NAND controller * @ops: NAND controller operations. + * @supported_op: NAND controller known-to-be-supported operations, + * only writable by the core after initial checking. + * @supported_op.data_only_read: The controller supports reading more data from + * the bus without restarting an entire read operation nor + * changing the column. */ struct nand_controller { struct mutex lock; const struct nand_controller_ops *ops; + struct { + unsigned int data_only_read: 1; + } supported_op; }; static inline void nand_controller_init(struct nand_controller *nfc) -- cgit v1.2.3 From 003fe4b9545b83cca4f7a7633695d1f69a0b0011 Mon Sep 17 00:00:00 2001 From: JaimeLiao Date: Thu, 12 Jan 2023 10:36:37 +0100 Subject: mtd: rawnand: Support for sequential cache reads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for sequential cache reads for controllers using the generic core helpers for their fast read/write helpers. Sequential reads may reduce the overhead when accessing physically continuous data by loading in cache the next page while the previous page gets sent out on the NAND bus. The ONFI specification provides the following additional commands to handle sequential cached reads: * 0x31 - READ CACHE SEQUENTIAL: Requires the NAND chip to load the next page into cache while keeping the current cache available for host reads. * 0x3F - READ CACHE END: Tells the NAND chip this is the end of the sequential cache read, the current cache shall remain accessible for the host but no more internal cache loading operation is required. On the bus, a multi page read operation is currently handled like this: 00 -- ADDR1 -- 30 -- WAIT_RDY (tR+tRR) -- DATA1_IN 00 -- ADDR2 -- 30 -- WAIT_RDY (tR+tRR) -- DATA2_IN 00 -- ADDR3 -- 30 -- WAIT_RDY (tR+tRR) -- DATA3_IN Sequential cached reads may instead be achieved with: 00 -- ADDR1 -- 30 -- WAIT_RDY (tR) -- \ 31 -- WAIT_RDY (tRCBSY+tRR) -- DATA1_IN \ 31 -- WAIT_RDY (tRCBSY+tRR) -- DATA2_IN \ 3F -- WAIT_RDY (tRCBSY+tRR) -- DATA3_IN Below are the read speed test results with regular reads and sequential cached reads, on NXP i.MX6 VAR-SOM-SOLO in mapping mode with a NAND chip characterized with the following timings: * tR: 20 µs * tRCBSY: 5 µs * tRR: 20 ns and the following geometry: * device size: 2 MiB * eraseblock size: 128 kiB * page size: 2 kiB ============= Normal read @ 33MHz ================= mtd_speedtest: eraseblock read speed is 15633 KiB/s mtd_speedtest: page read speed is 15515 KiB/s mtd_speedtest: 2 page read speed is 15398 KiB/s =================================================== ========= Sequential cache read @ 33MHz =========== mtd_speedtest: eraseblock read speed is 18285 KiB/s mtd_speedtest: page read speed is 15875 KiB/s mtd_speedtest: 2 page read speed is 16253 KiB/s =================================================== We observe an overall speed improvement of about 5% when reading 2 pages, up to 15% when reading an entire block. This is due to the ~14us gain on each additional page read (tR - (tRCBSY + tRR)). Co-developed-by: Miquel Raynal Signed-off-by: Miquel Raynal Signed-off-by: JaimeLiao Tested-by: Liao Jaime Link: https://lore.kernel.org/linux-mtd/20230112093637.987838-4-miquel.raynal@bootlin.com --- include/linux/mtd/rawnand.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 28c5dce782dd..1b0936fe3c6e 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -67,6 +67,8 @@ struct gpio_desc; /* Extended commands for large page devices */ #define NAND_CMD_READSTART 0x30 +#define NAND_CMD_READCACHESEQ 0x31 +#define NAND_CMD_READCACHEEND 0x3f #define NAND_CMD_RNDOUTSTART 0xE0 #define NAND_CMD_CACHEDPROG 0x15 @@ -1099,12 +1101,14 @@ struct nand_controller_ops { * @supported_op.data_only_read: The controller supports reading more data from * the bus without restarting an entire read operation nor * changing the column. + * @supported_op.cont_read: The controller supports sequential cache reads. */ struct nand_controller { struct mutex lock; const struct nand_controller_ops *ops; struct { unsigned int data_only_read: 1; + unsigned int cont_read: 1; } supported_op; }; @@ -1308,6 +1312,11 @@ struct nand_chip { int read_retries; struct nand_secure_region *secure_regions; u8 nr_secure_regions; + struct { + bool ongoing; + unsigned int first_page; + unsigned int last_page; + } cont_read; /* Externals */ struct nand_controller *controller; -- cgit v1.2.3 From fa5bde139ee43ab91087c01e690c61aec957c339 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 27 Oct 2022 13:07:55 +0900 Subject: ata: libata: Introduce ata_ncq_supported() Introduce the inline helper function ata_ncq_supported() to test if a device supports NCQ commands. The function ata_ncq_enabled() is also rewritten using this new helper function. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Niklas Cassel --- include/linux/libata.h | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 3b7f5d9e2f87..059ca7f2b69c 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1691,21 +1691,35 @@ extern struct ata_device *ata_dev_next(struct ata_device *dev, (dev) = ata_dev_next((dev), (link), ATA_DITER_##mode)) /** - * ata_ncq_enabled - Test whether NCQ is enabled - * @dev: ATA device to test for + * ata_ncq_supported - Test whether NCQ is supported + * @dev: ATA device to test * * LOCKING: * spin_lock_irqsave(host lock) * * RETURNS: - * 1 if NCQ is enabled for @dev, 0 otherwise. + * true if @dev supports NCQ, false otherwise. */ -static inline int ata_ncq_enabled(struct ata_device *dev) +static inline bool ata_ncq_supported(struct ata_device *dev) { if (!IS_ENABLED(CONFIG_SATA_HOST)) - return 0; - return (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ_OFF | - ATA_DFLAG_NCQ)) == ATA_DFLAG_NCQ; + return false; + return (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ)) == ATA_DFLAG_NCQ; +} + +/** + * ata_ncq_enabled - Test whether NCQ is enabled + * @dev: ATA device to test + * + * LOCKING: + * spin_lock_irqsave(host lock) + * + * RETURNS: + * true if NCQ is enabled for @dev, false otherwise. + */ +static inline bool ata_ncq_enabled(struct ata_device *dev) +{ + return ata_ncq_supported(dev) && !(dev->flags & ATA_DFLAG_NCQ_OFF); } static inline bool ata_fpdma_dsm_supported(struct ata_device *dev) -- cgit v1.2.3 From 4d2e4980a5289ae31a1cff40d258b68573182a37 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 14 Oct 2022 18:05:38 +0900 Subject: ata: libata: cleanup fua support detection Move the detection of a device FUA support from ata_scsiop_mode_sense()/ata_dev_supports_fua() to device scan time in ata_dev_configure(). The function ata_dev_config_fua() is introduced to detect if a device supports FUA and this support is indicated using the new device flag ATA_DFLAG_FUA. In order to blacklist known buggy devices, the horkage flag ATA_HORKAGE_NO_FUA is introduced. Similarly to other horkage flags, the libata.force= arguments "fua" and "nofua" are also introduced to allow a user to control this horkage flag through the "force" libata module parameter. The ATA_DFLAG_FUA device flag is set only and only if all the following conditions are met: * libata.fua module parameter is set to 1 * The device supports the WRITE DMA FUA EXT command, * The device is not marked with the ATA_HORKAGE_NO_FUA flag, either from the blacklist or set by the user with libata.force=nofua * The device supports NCQ (while this is not mandated by the standards, this restriction is introduced to avoid problems with older non-NCQ devices). Enabling or diabling libata FUA support for all devices can now also be done using the "force=[no]fua" module parameter when libata.fua is set to 1. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Niklas Cassel --- include/linux/libata.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 059ca7f2b69c..a759dfbdcc91 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -90,6 +90,7 @@ enum { ATA_DFLAG_ACPI_FAILED = (1 << 6), /* ACPI on devcfg has failed */ ATA_DFLAG_AN = (1 << 7), /* AN configured */ ATA_DFLAG_TRUSTED = (1 << 8), /* device supports trusted send/recv */ + ATA_DFLAG_FUA = (1 << 9), /* device supports FUA */ ATA_DFLAG_DMADIR = (1 << 10), /* device requires DMADIR */ ATA_DFLAG_NCQ_SEND_RECV = (1 << 11), /* device supports NCQ SEND and RECV */ ATA_DFLAG_NCQ_PRIO = (1 << 12), /* device supports NCQ priority */ @@ -112,9 +113,9 @@ enum { ATA_DFLAG_D_SENSE = (1 << 29), /* Descriptor sense requested */ ATA_DFLAG_ZAC = (1 << 30), /* ZAC device */ - ATA_DFLAG_FEATURES_MASK = ATA_DFLAG_TRUSTED | ATA_DFLAG_DA | \ - ATA_DFLAG_DEVSLP | ATA_DFLAG_NCQ_SEND_RECV | \ - ATA_DFLAG_NCQ_PRIO, + ATA_DFLAG_FEATURES_MASK = (ATA_DFLAG_TRUSTED | ATA_DFLAG_DA | \ + ATA_DFLAG_DEVSLP | ATA_DFLAG_NCQ_SEND_RECV | \ + ATA_DFLAG_NCQ_PRIO | ATA_DFLAG_FUA), ATA_DEV_UNKNOWN = 0, /* unknown device */ ATA_DEV_ATA = 1, /* ATA device */ @@ -381,6 +382,7 @@ enum { ATA_HORKAGE_NO_NCQ_ON_ATI = (1 << 27), /* Disable NCQ on ATI chipset */ ATA_HORKAGE_NO_ID_DEV_LOG = (1 << 28), /* Identify device log missing */ ATA_HORKAGE_NO_LOG_DIR = (1 << 29), /* Do not read log directory */ + ATA_HORKAGE_NO_FUA = (1 << 30), /* Do not use FUA */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3 From 312b8f79eb05479628ee71357749815b2eeeeea8 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Wed, 4 Jan 2023 11:43:34 +0200 Subject: RDMA/mlx: Calling qp event handler in workqueue context Move the call of qp event handler from atomic to workqueue context, so that the handler is able to block. This is needed by following patches. Signed-off-by: Mark Zhang Reviewed-by: Patrisious Haddad Link: https://lore.kernel.org/r/0cd17b8331e445f03942f4bb28d447f24ac5669d.1672821186.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx4/qp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 9db93e487496..c78b90f2e9a1 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -503,4 +503,5 @@ static inline u16 folded_qp(u32 q) u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn); +void mlx4_put_qp(struct mlx4_qp *qp); #endif /* MLX4_QP_H */ -- cgit v1.2.3 From ade7fd908d710d0ab865c273df782c75528636ef Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 17 Jan 2023 13:43:09 +0100 Subject: efi: efivars: drop kobject from efivars_register() Since commit 0f5b2c69a4cb ("efi: vars: Remove deprecated 'efivars' sysfs interface") and the removal of the sysfs interface there are no users of the efivars kobject. Drop the kobject argument from efivars_register() and add a new efivar_is_available() helper in favour of the old efivars_kobject(). Note that the new helper uses the prefix 'efivar' (i.e. without an 's') for consistency with efivar_supports_writes() and the rest of the interface (except the registration functions). For the benefit of drivers with optional EFI support, also provide a dummy implementation of efivar_is_available(). Signed-off-by: Johan Hovold Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 4b27519143f5..2124e55c02d6 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1039,7 +1039,6 @@ struct efivar_operations { struct efivars { struct kset *kset; - struct kobject *kobject; const struct efivar_operations *ops; }; @@ -1053,10 +1052,14 @@ struct efivars { #define EFI_VAR_NAME_LEN 1024 int efivars_register(struct efivars *efivars, - const struct efivar_operations *ops, - struct kobject *kobject); + const struct efivar_operations *ops); int efivars_unregister(struct efivars *efivars); -struct kobject *efivars_kobject(void); + +#ifdef CONFIG_EFI +bool efivar_is_available(void); +#else +static inline bool efivar_is_available(void) { return false; } +#endif int efivar_supports_writes(void); -- cgit v1.2.3 From 2cf9e278efeff8f8bbb9580e2d6760e19795e310 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 17 Jan 2023 13:43:10 +0100 Subject: efi: efivars: make efivar_supports_writes() return bool For consistency with the new efivar_is_available() function, change the return type of efivar_supports_writes() to bool. Signed-off-by: Johan Hovold Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 2124e55c02d6..f6b107da1cbc 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1061,7 +1061,7 @@ bool efivar_is_available(void); static inline bool efivar_is_available(void) { return false; } #endif -int efivar_supports_writes(void); +bool efivar_supports_writes(void); int efivar_lock(void); int efivar_trylock(void); -- cgit v1.2.3 From 28e1ff70a08d331703f115534bd4278e11451439 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Wed, 21 Dec 2022 20:34:51 +0100 Subject: USB: Improve usb_fill_* documentation Document the transfer buffer requirement. That is, the buffer must be DMAble - otherwise data corruption might occur. Acked-by: Randy Dunlap Reviewed-by: Laurent Pinchart Signed-off-by: Ricardo Ribalda Acked-by: Alan Stern Link: https://lore.kernel.org/r/20221220-usb-dmadoc-v4-0-74a045bf14f4@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 7d5325d47c45..4e98ebacec96 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1626,14 +1626,25 @@ struct urb { * @urb: pointer to the urb to initialize. * @dev: pointer to the struct usb_device for this urb. * @pipe: the endpoint pipe - * @setup_packet: pointer to the setup_packet buffer - * @transfer_buffer: pointer to the transfer buffer + * @setup_packet: pointer to the setup_packet buffer. The buffer must be + * suitable for DMA. + * @transfer_buffer: pointer to the transfer buffer. The buffer must be + * suitable for DMA. * @buffer_length: length of the transfer buffer * @complete_fn: pointer to the usb_complete_t function * @context: what to set the urb context to. * * Initializes a control urb with the proper information needed to submit * it to a device. + * + * The transfer buffer and the setup_packet buffer will most likely be filled + * or read via DMA. The simplest way to get a buffer that can be DMAed to is + * allocating it via kmalloc() or equivalent, even for very small buffers. + * If the buffers are embedded in a bigger structure, there is a risk that + * the buffer itself, the previous fields and/or the next fields are corrupted + * due to cache incoherencies; or slowed down if they are evicted from the + * cache. For more information, check &struct urb. + * */ static inline void usb_fill_control_urb(struct urb *urb, struct usb_device *dev, @@ -1658,13 +1669,17 @@ static inline void usb_fill_control_urb(struct urb *urb, * @urb: pointer to the urb to initialize. * @dev: pointer to the struct usb_device for this urb. * @pipe: the endpoint pipe - * @transfer_buffer: pointer to the transfer buffer + * @transfer_buffer: pointer to the transfer buffer. The buffer must be + * suitable for DMA. * @buffer_length: length of the transfer buffer * @complete_fn: pointer to the usb_complete_t function * @context: what to set the urb context to. * * Initializes a bulk urb with the proper information needed to submit it * to a device. + * + * Refer to usb_fill_control_urb() for a description of the requirements for + * transfer_buffer. */ static inline void usb_fill_bulk_urb(struct urb *urb, struct usb_device *dev, @@ -1687,7 +1702,8 @@ static inline void usb_fill_bulk_urb(struct urb *urb, * @urb: pointer to the urb to initialize. * @dev: pointer to the struct usb_device for this urb. * @pipe: the endpoint pipe - * @transfer_buffer: pointer to the transfer buffer + * @transfer_buffer: pointer to the transfer buffer. The buffer must be + * suitable for DMA. * @buffer_length: length of the transfer buffer * @complete_fn: pointer to the usb_complete_t function * @context: what to set the urb context to. @@ -1697,6 +1713,9 @@ static inline void usb_fill_bulk_urb(struct urb *urb, * Initializes a interrupt urb with the proper information needed to submit * it to a device. * + * Refer to usb_fill_control_urb() for a description of the requirements for + * transfer_buffer. + * * Note that High Speed and SuperSpeed(+) interrupt endpoints use a logarithmic * encoding of the endpoint interval, and express polling intervals in * microframes (eight per millisecond) rather than in frames (one per -- cgit v1.2.3 From 599f008c257d913674b2b2f2fa9e273c1058ec2e Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Sat, 14 Jan 2023 01:32:44 -0800 Subject: usb: typec: tcpm: Add callbacks to mitigate wakeups due to contaminant On some of the TCPC implementations, when the Type-C port is exposed to contaminants, such as water, TCPC stops toggling while reporting OPEN either by the time TCPM reads CC pin status or during CC debounce window. This causes TCPM to be stuck in TOGGLING state. If TCPM is made to restart toggling, the behavior recurs causing redundant CPU wakeups till the USB-C port is free of contaminant. [206199.287817] CC1: 0 -> 0, CC2: 0 -> 0 [state TOGGLING, polarity 0, disconnected] [206199.640337] CC1: 0 -> 0, CC2: 0 -> 0 [state TOGGLING, polarity 0, disconnected] [206199.985789] CC1: 0 -> 0, CC2: 0 -> 0 [state TOGGLING, polarity 0, disconnected] (or) [ 7853.867577] Start toggling [ 7853.889921] CC1: 0 -> 0, CC2: 0 -> 0 [state TOGGLING, polarity 0, disconnected] [ 7855.698765] CC1: 0 -> 0, CC2: 0 -> 5 [state TOGGLING, polarity 0, connected] [ 7855.698790] state change TOGGLING -> SNK_ATTACH_WAIT [rev3 NONE_AMS] [ 7855.698826] pending state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED @ 170 ms [rev3 NONE_AMS] [ 7855.703559] CC1: 0 -> 0, CC2: 5 -> 5 [state SNK_ATTACH_WAIT, polarity 0, connected] [ 7855.856555] CC1: 0 -> 0, CC2: 5 -> 0 [state SNK_ATTACH_WAIT, polarity 0, disconnected] [ 7855.856581] state change SNK_ATTACH_WAIT -> SNK_ATTACH_WAIT [rev3 NONE_AMS] [ 7855.856613] pending state change SNK_ATTACH_WAIT -> SNK_UNATTACHED @ 170 ms [rev3 NONE_AMS] [ 7856.027744] state change SNK_ATTACH_WAIT -> SNK_UNATTACHED [delayed 170 ms] [ 7856.181949] CC1: 0 -> 0, CC2: 0 -> 0 [state TOGGLING, polarity 0, disconnected] [ 7856.187896] CC1: 0 -> 0, CC2: 0 -> 0 [state TOGGLING, polarity 0, disconnected] [ 7857.645630] CC1: 0 -> 0, CC2: 0 -> 0 [state TOGGLING, polarity 0, disconnected] [ 7857.647291] CC1: 0 -> 0, CC2: 0 -> 5 [state TOGGLING, polarity 0, connected] [ 7857.647298] state change TOGGLING -> SNK_ATTACH_WAIT [rev3 NONE_AMS] [ 7857.647310] pending state change SNK_ATTACH_WAIT -> SNK_DEBOUNCED @ 170 ms [rev3 NONE_AMS] [ 7857.808106] CC1: 0 -> 0, CC2: 5 -> 0 [state SNK_ATTACH_WAIT, polarity 0, disconnected] [ 7857.808123] state change SNK_ATTACH_WAIT -> SNK_ATTACH_WAIT [rev3 NONE_AMS] [ 7857.808150] pending state change SNK_ATTACH_WAIT -> SNK_UNATTACHED @ 170 ms [rev3 NONE_AMS] [ 7857.978727] state change SNK_ATTACH_WAIT -> SNK_UNATTACHED [delayed 170 ms] To mitigate redundant TCPM wakeups, TCPCs which do have the needed hardware can implement the check_contaminant callback which is invoked by TCPM to evaluate for presence of contaminant. Lower level TCPC driver can restart toggling through TCPM_PORT_CLEAN event when the driver detects that USB-C port is free of contaminant. check_contaminant callback also passes the disconnect_while_debounce flag which when true denotes that the CC pins transitioned to OPEN state during the CC debounce window. Signed-off-by: Badhri Jagan Sridharan Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20230114093246.1933321-1-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h index bffc8d3e14ad..ab7ca872950b 100644 --- a/include/linux/usb/tcpm.h +++ b/include/linux/usb/tcpm.h @@ -114,6 +114,11 @@ enum tcpm_transmit_type { * Optional; The USB Communications Capable bit indicates if port * partner is capable of communication over the USB data lines * (e.g. D+/- or SS Tx/Rx). Called to notify the status of the bit. + * @check_contaminant: + * Optional; The callback is called when CC pins report open status + * at the end of the deboumce period or when the port is still + * toggling. Chip level drivers are expected to check for contaminant + * and call tcpm_clean_port when the port is clean. */ struct tcpc_dev { struct fwnode_handle *fwnode; @@ -148,6 +153,7 @@ struct tcpc_dev { bool pps_active, u32 requested_vbus_voltage); bool (*is_vbus_vsafe0v)(struct tcpc_dev *dev); void (*set_partner_usb_comm_capable)(struct tcpc_dev *dev, bool enable); + void (*check_contaminant)(struct tcpc_dev *dev); }; struct tcpm_port; @@ -165,5 +171,7 @@ void tcpm_pd_transmit_complete(struct tcpm_port *port, enum tcpm_transmit_status status); void tcpm_pd_hard_reset(struct tcpm_port *port); void tcpm_tcpc_reset(struct tcpm_port *port); +void tcpm_port_clean(struct tcpm_port *port); +bool tcpm_port_is_toggling(struct tcpm_port *port); #endif /* __LINUX_USB_TCPM_H */ -- cgit v1.2.3 From abc028a270f47f86060ef479395d1bb8c2ab6e7e Mon Sep 17 00:00:00 2001 From: Badhri Jagan Sridharan Date: Sat, 14 Jan 2023 01:32:45 -0800 Subject: usb: typec: tcpci: Add callback for evaluating contaminant presence This change adds callback to evaluate presence of contaminant in the TCPCI layer. Signed-off-by: Badhri Jagan Sridharan Link: https://lore.kernel.org/r/20230114093246.1933321-2-badhri@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpci.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h index 17657451c762..85e95a3251d3 100644 --- a/include/linux/usb/tcpci.h +++ b/include/linux/usb/tcpci.h @@ -188,6 +188,12 @@ struct tcpci; * Optional; The USB Communications Capable bit indicates if port * partner is capable of communication over the USB data lines * (e.g. D+/- or SS Tx/Rx). Called to notify the status of the bit. + * @check_contaminant: + * Optional; The callback is invoked when chiplevel drivers indicated + * that the USB port needs to be checked for contaminant presence. + * Chip level drivers are expected to check for contaminant and call + * tcpm_clean_port when the port is clean to put the port back into + * toggling state. */ struct tcpci_data { struct regmap *regmap; @@ -204,6 +210,7 @@ struct tcpci_data { void (*frs_sourcing_vbus)(struct tcpci *tcpci, struct tcpci_data *data); void (*set_partner_usb_comm_capable)(struct tcpci *tcpci, struct tcpci_data *data, bool capable); + void (*check_contaminant)(struct tcpci *tcpci, struct tcpci_data *data); }; struct tcpci *tcpci_register_port(struct device *dev, struct tcpci_data *data); -- cgit v1.2.3 From 5c5a7680e67ba6fbbb5f4d79fa41485450c1985c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 9 Dec 2022 16:09:14 +0100 Subject: platform: Provide a remove callback that returns no value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit struct platform_driver::remove returning an integer made driver authors expect that returning an error code was proper error handling. However the driver core ignores the error and continues to remove the device because there is nothing the core could do anyhow and reentering the remove callback again is only calling for trouble. So this is an source for errors typically yielding resource leaks in the error path. As there are too many platform drivers to neatly convert them all to return void in a single go, do it in several steps after this patch: a) Convert all drivers to implement .remove_new() returning void instead of .remove() returning int; b) Change struct platform_driver::remove() to return void and so make it identical to .remove_new(); c) Change all drivers back to .remove() now with the better prototype; d) drop struct platform_driver::remove_new(). While this touches all drivers eventually twice, steps a) and c) can be done one driver after another and so reduces coordination efforts immensely and simplifies review. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20221209150914.3557650-1-u.kleine-koenig@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_device.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index b0d5a253156e..b845fd83f429 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -207,7 +207,18 @@ extern void platform_device_put(struct platform_device *pdev); struct platform_driver { int (*probe)(struct platform_device *); + + /* + * Traditionally the remove callback returned an int which however is + * ignored by the driver core. This led to wrong expectations by driver + * authors who thought returning an error code was a valid error + * handling strategy. To convert to a callback returning void, new + * drivers should implement .remove_new() until the conversion it done + * that eventually makes .remove() return void. + */ int (*remove)(struct platform_device *); + void (*remove_new)(struct platform_device *); + void (*shutdown)(struct platform_device *); int (*suspend)(struct platform_device *, pm_message_t state); int (*resume)(struct platform_device *); -- cgit v1.2.3 From 3dbdd92014a410778de8406e9e3253e353f51c2b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 28 Dec 2022 11:49:22 +0200 Subject: software node: Remove unused APIs There are no more users of software_node_register_nodes() and software_node_unregister_nodes(). Remove them. Signed-off-by: Andy Shevchenko Acked-by: Greg Kroah-Hartman Acked-by: Sakari Ailus Tested-by: Daniel Scally Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20221228094922.84119-5-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/property.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 37179e3abad5..1ffd4f9bb67b 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -486,9 +486,6 @@ const struct software_node * software_node_find_by_name(const struct software_node *parent, const char *name); -int software_node_register_nodes(const struct software_node *nodes); -void software_node_unregister_nodes(const struct software_node *nodes); - int software_node_register_node_group(const struct software_node **node_group); void software_node_unregister_node_group(const struct software_node **node_group); -- cgit v1.2.3 From c3719bd9eeb2edf84bd263d662e36ca0ba262a23 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Wed, 4 Jan 2023 19:30:24 +0100 Subject: cacheinfo: Use RISC-V's init_cache_level() as generic OF implementation RISC-V's implementation of init_of_cache_level() is following the Devicetree Specification v0.3 regarding caches, cf.: - s3.7.3 'Internal (L1) Cache Properties' - s3.8 'Multi-level and Shared Cache Nodes' Allow reusing the implementation by moving it. Also make 'levels', 'leaves' and 'level' unsigned int. Signed-off-by: Pierre Gondois Reviewed-by: Conor Dooley Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230104183033.755668-2-pierre.gondois@arm.com Signed-off-by: Sudeep Holla --- include/linux/cacheinfo.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 00b7a6ae8617..ff0328f3fbb0 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -80,6 +80,7 @@ struct cpu_cacheinfo { struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu); int init_cache_level(unsigned int cpu); +int init_of_cache_level(unsigned int cpu); int populate_cache_leaves(unsigned int cpu); int cache_setup_acpi(unsigned int cpu); bool last_level_cache_is_valid(unsigned int cpu); -- cgit v1.2.3 From 504450d05c545a839b22542c72ddc388740462ac Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 10:23:30 +0100 Subject: driver core: bus.h: document bus notifiers better The bus notifier values are not documented all that well, so clean this up and make a real enumerated type for them and document them much better. When doing this, remove the hex values and just rely on the enumerated type instead as that is all that is needed. Reviewed-by: Randy Dunlap Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20230111092331.3946745-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index d529f644e92b..7b4a48b5159b 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -257,21 +257,35 @@ extern int bus_register_notifier(struct bus_type *bus, extern int bus_unregister_notifier(struct bus_type *bus, struct notifier_block *nb); -/* All 4 notifers below get called with the target struct device * - * as an argument. Note that those functions are likely to be called - * with the device lock held in the core, so be careful. +/** + * enum bus_notifier_event - Bus Notifier events that have happened + * @BUS_NOTIFY_ADD_DEVICE: device is added to this bus + * @BUS_NOTIFY_DEL_DEVICE: device is about to be removed from this bus + * @BUS_NOTIFY_REMOVED_DEVICE: device is successfully removed from this bus + * @BUS_NOTIFY_BIND_DRIVER: a driver is about to be bound to this device on this bus + * @BUS_NOTIFY_BOUND_DRIVER: a driver is successfully bound to this device on this bus + * @BUS_NOTIFY_UNBIND_DRIVER: a driver is about to be unbound from this device on this bus + * @BUS_NOTIFY_UNBOUND_DRIVER: a driver is successfully unbound from this device on this bus + * @BUS_NOTIFY_DRIVER_NOT_BOUND: a driver failed to be bound to this device on this bus + * + * These are the value passed to a bus notifier when a specific event happens. + * + * Note that bus notifiers are likely to be called with the device lock already + * held by the driver core, so be careful in any notifier callback as to what + * you do with the device structure. + * + * All bus notifiers are called with the target struct device * as an argument. */ -#define BUS_NOTIFY_ADD_DEVICE 0x00000001 /* device added */ -#define BUS_NOTIFY_DEL_DEVICE 0x00000002 /* device to be removed */ -#define BUS_NOTIFY_REMOVED_DEVICE 0x00000003 /* device removed */ -#define BUS_NOTIFY_BIND_DRIVER 0x00000004 /* driver about to be - bound */ -#define BUS_NOTIFY_BOUND_DRIVER 0x00000005 /* driver bound to device */ -#define BUS_NOTIFY_UNBIND_DRIVER 0x00000006 /* driver about to be - unbound */ -#define BUS_NOTIFY_UNBOUND_DRIVER 0x00000007 /* driver is unbound - from the device */ -#define BUS_NOTIFY_DRIVER_NOT_BOUND 0x00000008 /* driver fails to be bound */ +enum bus_notifier_event { + BUS_NOTIFY_ADD_DEVICE, + BUS_NOTIFY_DEL_DEVICE, + BUS_NOTIFY_REMOVED_DEVICE, + BUS_NOTIFY_BIND_DRIVER, + BUS_NOTIFY_BOUND_DRIVER, + BUS_NOTIFY_UNBIND_DRIVER, + BUS_NOTIFY_UNBOUND_DRIVER, + BUS_NOTIFY_DRIVER_NOT_BOUND, +}; extern struct kset *bus_get_kset(struct bus_type *bus); -- cgit v1.2.3 From bd500361a937c03a3da57178287ce543c8f3681b Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Wed, 4 Jan 2023 19:30:28 +0100 Subject: ACPI: PPTT: Update acpi_find_last_cache_level() to acpi_get_cache_info() acpi_find_last_cache_level() allows to find the last level of cache for a given CPU. The function is only called on arm64 ACPI based platforms to check for cache information that would be missing in the CLIDR_EL1 register. To allow populating (struct cpu_cacheinfo).num_leaves by only parsing a PPTT, update acpi_find_last_cache_level() to get the 'split_levels', i.e. the number of cache levels being split in data/instruction caches. It is assumed that there will not be data/instruction caches above a unified cache. If a split level consist of one data cache and no instruction cache (or opposite), then the missing cache will still be populated by default with minimal cache information, and maximal cpumask (all non-existing caches have the same fw_token). Suggested-by: Jeremy Linton Signed-off-by: Pierre Gondois Reviewed-by: Jeremy Linton Acked-by: Rafael J. Wysocki Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230104183033.755668-6-pierre.gondois@arm.com Signed-off-by: Sudeep Holla --- include/linux/cacheinfo.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index ff0328f3fbb0..00d8e7f9d1c6 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -88,19 +88,22 @@ bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y); int detect_cache_attributes(unsigned int cpu); #ifndef CONFIG_ACPI_PPTT /* - * acpi_find_last_cache_level is only called on ACPI enabled + * acpi_get_cache_info() is only called on ACPI enabled * platforms using the PPTT for topology. This means that if * the platform supports other firmware configuration methods * we need to stub out the call when ACPI is disabled. * ACPI enabled platforms not using PPTT won't be making calls * to this function so we need not worry about them. */ -static inline int acpi_find_last_cache_level(unsigned int cpu) +static inline +int acpi_get_cache_info(unsigned int cpu, + unsigned int *levels, unsigned int *split_levels) { return 0; } #else -int acpi_find_last_cache_level(unsigned int cpu); +int acpi_get_cache_info(unsigned int cpu, + unsigned int *levels, unsigned int *split_levels); #endif const struct attribute_group *cache_get_priv_group(struct cacheinfo *this_leaf); -- cgit v1.2.3 From 5944ce092b97caed5d86d961e963b883b5c44ee2 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Wed, 4 Jan 2023 19:30:29 +0100 Subject: arch_topology: Build cacheinfo from primary CPU commit 3fcbf1c77d08 ("arch_topology: Fix cache attributes detection in the CPU hotplug path") adds a call to detect_cache_attributes() to populate the cacheinfo before updating the siblings mask. detect_cache_attributes() allocates memory and can take the PPTT mutex (on ACPI platforms). On PREEMPT_RT kernels, on secondary CPUs, this triggers a: 'BUG: sleeping function called from invalid context' [1] as the code is executed with preemption and interrupts disabled. The primary CPU was previously storing the cache information using the now removed (struct cpu_topology).llc_id: commit 5b8dc787ce4a ("arch_topology: Drop LLC identifier stash from the CPU topology") allocate_cache_info() tries to build the cacheinfo from the primary CPU prior secondary CPUs boot, if the DT/ACPI description contains cache information. If allocate_cache_info() fails, then fallback to the current state for the cacheinfo allocation. [1] will be triggered in such case. When unplugging a CPU, the cacheinfo memory cannot be freed. If it was, then the memory would be allocated early by the re-plugged CPU and would trigger [1]. Note that populate_cache_leaves() might be called multiple times due to populate_leaves being moved up. This is required since detect_cache_attributes() might be called with per_cpu_cacheinfo(cpu) being allocated but not populated. [1]: | BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46 | in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 0, name: swapper/111 | preempt_count: 1, expected: 0 | RCU nest depth: 1, expected: 1 | 3 locks held by swapper/111/0: | #0: (&pcp->lock){+.+.}-{3:3}, at: get_page_from_freelist+0x218/0x12c8 | #1: (rcu_read_lock){....}-{1:3}, at: rt_spin_trylock+0x48/0xf0 | #2: (&zone->lock){+.+.}-{3:3}, at: rmqueue_bulk+0x64/0xa80 | irq event stamp: 0 | hardirqs last enabled at (0): 0x0 | hardirqs last disabled at (0): copy_process+0x5dc/0x1ab8 | softirqs last enabled at (0): copy_process+0x5dc/0x1ab8 | softirqs last disabled at (0): 0x0 | Preemption disabled at: | migrate_enable+0x30/0x130 | CPU: 111 PID: 0 Comm: swapper/111 Tainted: G W 6.0.0-rc4-rt6-[...] | Call trace: | __kmalloc+0xbc/0x1e8 | detect_cache_attributes+0x2d4/0x5f0 | update_siblings_masks+0x30/0x368 | store_cpu_topology+0x78/0xb8 | secondary_start_kernel+0xd0/0x198 | __secondary_switched+0xb0/0xb4 Signed-off-by: Pierre Gondois Reviewed-by: Sudeep Holla Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20230104183033.755668-7-pierre.gondois@arm.com Signed-off-by: Sudeep Holla --- include/linux/cacheinfo.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 00d8e7f9d1c6..dfef57077cd0 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -85,6 +85,7 @@ int populate_cache_leaves(unsigned int cpu); int cache_setup_acpi(unsigned int cpu); bool last_level_cache_is_valid(unsigned int cpu); bool last_level_cache_is_shared(unsigned int cpu_x, unsigned int cpu_y); +int fetch_cache_info(unsigned int cpu); int detect_cache_attributes(unsigned int cpu); #ifndef CONFIG_ACPI_PPTT /* -- cgit v1.2.3 From 80baab88bb93eeaa133b426d24dfc0775a8cf824 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 15 Jan 2023 08:45:51 -0800 Subject: iomap/gfs2: Unlock and put folio in page_done handler When an iomap defines a ->page_done() handler in its page_ops, delegate unlocking the folio and putting the folio reference to that handler. This allows to fix a race between journaled data writes and folio writeback in gfs2: before this change, gfs2_iomap_page_done() was called after unlocking the folio, so writeback could start writing back the folio's buffers before they could be marked for writing to the journal. Also, try_to_free_buffers() could free the buffers before gfs2_iomap_page_done() was done adding the buffers to the current current transaction. With this change, gfs2_iomap_page_done() adds the buffers to the current transaction while the folio is still locked, so the problems described above can no longer occur. The only current user of ->page_done() is gfs2, so other filesystems are not affected. To catch out any out-of-tree users, switch from a page to a folio in ->page_done(). Signed-off-by: Andreas Gruenbacher Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 0983dfc9a203..743e2a909162 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -131,13 +131,14 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap) * associated with them. * * When page_prepare succeeds, page_done will always be called to do any - * cleanup work necessary. In that page_done call, @page will be NULL if the - * associated page could not be obtained. + * cleanup work necessary. In that page_done call, @folio will be NULL if the + * associated folio could not be obtained. When folio is not NULL, page_done + * is responsible for unlocking and putting the folio. */ struct iomap_page_ops { int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len); void (*page_done)(struct inode *inode, loff_t pos, unsigned copied, - struct page *page); + struct folio *folio); /* * Check that the cached iomap still maps correctly to the filesystem's -- cgit v1.2.3 From 40405dddd98a9a95585482af46b8e269f5ebe5df Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 15 Jan 2023 08:45:51 -0800 Subject: iomap: Rename page_done handler to put_folio The ->page_done() handler in struct iomap_page_ops is now somewhat misnamed in that it mainly deals with unlocking and putting a folio, so rename it to ->put_folio(). Signed-off-by: Andreas Gruenbacher Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 743e2a909162..ecf815b34d51 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -126,18 +126,18 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap) /* * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare - * and page_done will be called for each page written to. This only applies to - * buffered writes as unbuffered writes will not typically have pages + * and put_folio will be called for each folio written to. This only applies + * to buffered writes as unbuffered writes will not typically have folios * associated with them. * - * When page_prepare succeeds, page_done will always be called to do any - * cleanup work necessary. In that page_done call, @folio will be NULL if the - * associated folio could not be obtained. When folio is not NULL, page_done + * When page_prepare succeeds, put_folio will always be called to do any + * cleanup work necessary. In that put_folio call, @folio will be NULL if the + * associated folio could not be obtained. When folio is not NULL, put_folio * is responsible for unlocking and putting the folio. */ struct iomap_page_ops { int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len); - void (*page_done)(struct inode *inode, loff_t pos, unsigned copied, + void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied, struct folio *folio); /* -- cgit v1.2.3 From 98321b5139f92a736a426404fb6e23bfb8feb9cc Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 15 Jan 2023 08:49:04 -0800 Subject: iomap: Add iomap_get_folio helper Add an iomap_get_folio() helper that gets a folio reference based on an iomap iterator and an offset into the address space. Use it in iomap_write_begin(). Signed-off-by: Andreas Gruenbacher Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index ecf815b34d51..188d14e786a4 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -261,6 +261,7 @@ int iomap_file_buffered_write_punch_delalloc(struct inode *inode, int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops); void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); +struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos); bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags); void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len); int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, -- cgit v1.2.3 From 9060bc4d3aca6106bbe72891efba391d9d6b86e7 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 15 Jan 2023 08:49:12 -0800 Subject: iomap/gfs2: Get page in page_prepare handler Change the iomap ->page_prepare() handler to get and return a locked folio instead of doing that in iomap_write_begin(). This allows to recover from out-of-memory situations in ->page_prepare(), which eliminates the corresponding error handling code in iomap_write_begin(). The ->put_folio() handler now also isn't called with NULL as the folio value anymore. Filesystems are expected to use the iomap_get_folio() helper for getting locked folios in their ->page_prepare() handlers. Signed-off-by: Andreas Gruenbacher Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 188d14e786a4..d50501781856 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -13,6 +13,7 @@ struct address_space; struct fiemap_extent_info; struct inode; +struct iomap_iter; struct iomap_dio; struct iomap_writepage_ctx; struct iov_iter; @@ -131,12 +132,12 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap) * associated with them. * * When page_prepare succeeds, put_folio will always be called to do any - * cleanup work necessary. In that put_folio call, @folio will be NULL if the - * associated folio could not be obtained. When folio is not NULL, put_folio - * is responsible for unlocking and putting the folio. + * cleanup work necessary. put_folio is responsible for unlocking and putting + * @folio. */ struct iomap_page_ops { - int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len); + struct folio *(*page_prepare)(struct iomap_iter *iter, loff_t pos, + unsigned len); void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied, struct folio *folio); -- cgit v1.2.3 From c82abc2394642490da736b1ba78671bbcef81150 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 15 Jan 2023 08:50:25 -0800 Subject: iomap: Rename page_prepare handler to get_folio The ->page_prepare() handler in struct iomap_page_ops is now somewhat misnamed, so rename it to ->get_folio(). Signed-off-by: Andreas Gruenbacher Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index d50501781856..da226032aedc 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -126,17 +126,17 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap) } /* - * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare + * When a filesystem sets page_ops in an iomap mapping it returns, get_folio * and put_folio will be called for each folio written to. This only applies * to buffered writes as unbuffered writes will not typically have folios * associated with them. * - * When page_prepare succeeds, put_folio will always be called to do any + * When get_folio succeeds, put_folio will always be called to do any * cleanup work necessary. put_folio is responsible for unlocking and putting * @folio. */ struct iomap_page_ops { - struct folio *(*page_prepare)(struct iomap_iter *iter, loff_t pos, + struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos, unsigned len); void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied, struct folio *folio); -- cgit v1.2.3 From 471859f57d42537626a56312cfb50cd6acee09ae Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 15 Jan 2023 08:50:44 -0800 Subject: iomap: Rename page_ops to folio_ops The operations in struct page_ops all operate on folios, so rename struct page_ops to struct folio_ops. Signed-off-by: Andreas Gruenbacher Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig [djwong: port around not removing iomap_valid] Signed-off-by: Darrick J. Wong --- include/linux/iomap.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index da226032aedc..c70a9d4fb447 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -86,7 +86,7 @@ struct vm_fault; */ #define IOMAP_NULL_ADDR -1ULL /* addr is not valid */ -struct iomap_page_ops; +struct iomap_folio_ops; struct iomap { u64 addr; /* disk offset of mapping, bytes */ @@ -98,7 +98,7 @@ struct iomap { struct dax_device *dax_dev; /* dax_dev for dax operations */ void *inline_data; void *private; /* filesystem private */ - const struct iomap_page_ops *page_ops; + const struct iomap_folio_ops *folio_ops; u64 validity_cookie; /* used with .iomap_valid() */ }; @@ -126,7 +126,7 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap) } /* - * When a filesystem sets page_ops in an iomap mapping it returns, get_folio + * When a filesystem sets folio_ops in an iomap mapping it returns, get_folio * and put_folio will be called for each folio written to. This only applies * to buffered writes as unbuffered writes will not typically have folios * associated with them. @@ -135,7 +135,7 @@ static inline bool iomap_inline_data_valid(const struct iomap *iomap) * cleanup work necessary. put_folio is responsible for unlocking and putting * @folio. */ -struct iomap_page_ops { +struct iomap_folio_ops { struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos, unsigned len); void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied, -- cgit v1.2.3 From 04a42e72d77a93a166b79c34b7bc862f55a53967 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 14 Dec 2022 22:17:57 -0800 Subject: mm: move folio_set_compound_order() to mm/internal.h folio_set_compound_order() is moved to an mm-internal location so external folio users cannot misuse this function. Change the name of the function to folio_set_order() and use WARN_ON_ONCE() rather than BUG_ON. Also, handle the case if a non-large folio is passed and add clarifying comments to the function. Link: https://lore.kernel.org/lkml/20221207223731.32784-1-sidhartha.kumar@oracle.com/T/ Link: https://lkml.kernel.org/r/20221215061757.223440-1-sidhartha.kumar@oracle.com Fixes: 9fd330582b2f ("mm: add folio dtor and order setter functions") Signed-off-by: Sidhartha Kumar Suggested-by: Mike Kravetz Suggested-by: Muchun Song Suggested-by: Matthew Wilcox Suggested-by: John Hubbard Reviewed-by: John Hubbard Reviewed-by: Muchun Song Signed-off-by: Andrew Morton --- include/linux/mm.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8f857163ac89..253b2d7489e6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1019,22 +1019,6 @@ static inline void set_compound_order(struct page *page, unsigned int order) #endif } -/* - * folio_set_compound_order is generally passed a non-zero order to - * initialize a large folio. However, hugetlb code abuses this by - * passing in zero when 'dissolving' a large folio. - */ -static inline void folio_set_compound_order(struct folio *folio, - unsigned int order) -{ - VM_BUG_ON_FOLIO(!folio_test_large(folio), folio); - - folio->_folio_order = order; -#ifdef CONFIG_64BIT - folio->_folio_nr_pages = order ? 1U << order : 0; -#endif -} - /* Returns the number of pages in this potentially compound page. */ static inline unsigned long compound_nr(struct page *page) { -- cgit v1.2.3 From 105ff5339f498af74e60d7662c8f1c4d21f1342d Mon Sep 17 00:00:00 2001 From: Jeff Xu Date: Thu, 15 Dec 2022 00:12:03 +0000 Subject: mm/memfd: add MFD_NOEXEC_SEAL and MFD_EXEC The new MFD_NOEXEC_SEAL and MFD_EXEC flags allows application to set executable bit at creation time (memfd_create). When MFD_NOEXEC_SEAL is set, memfd is created without executable bit (mode:0666), and sealed with F_SEAL_EXEC, so it can't be chmod to be executable (mode: 0777) after creation. when MFD_EXEC flag is set, memfd is created with executable bit (mode:0777), this is the same as the old behavior of memfd_create. The new pid namespaced sysctl vm.memfd_noexec has 3 values: 0: memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL acts like MFD_EXEC was set. 1: memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL acts like MFD_NOEXEC_SEAL was set. 2: memfd_create() without MFD_NOEXEC_SEAL will be rejected. The sysctl allows finer control of memfd_create for old-software that doesn't set the executable bit, for example, a container with vm.memfd_noexec=1 means the old-software will create non-executable memfd by default. Also, the value of memfd_noexec is passed to child namespace at creation time. For example, if the init namespace has vm.memfd_noexec=2, all its children namespaces will be created with 2. [akpm@linux-foundation.org: add stub functions to fix build] [akpm@linux-foundation.org: remove unneeded register_pid_ns_ctl_table_vm() stub, per Jeff] [akpm@linux-foundation.org: s/pr_warn_ratelimited/pr_warn_once/, per review] [akpm@linux-foundation.org: fix CONFIG_SYSCTL=n warning] Link: https://lkml.kernel.org/r/20221215001205.51969-4-jeffxu@google.com Signed-off-by: Jeff Xu Co-developed-by: Daniel Verkamp Signed-off-by: Daniel Verkamp Reported-by: kernel test robot Reviewed-by: Kees Cook Cc: David Herrmann Cc: Dmitry Torokhov Cc: Hugh Dickins Cc: Jann Horn Cc: Jorge Lucangeli Obes Cc: Shuah Khan Signed-off-by: Andrew Morton --- include/linux/pid_namespace.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 07481bb87d4e..c758809d5bcf 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -16,6 +16,21 @@ struct fs_pin; +#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) +/* + * sysctl for vm.memfd_noexec + * 0: memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL + * acts like MFD_EXEC was set. + * 1: memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL + * acts like MFD_NOEXEC_SEAL was set. + * 2: memfd_create() without MFD_NOEXEC_SEAL will be + * rejected. + */ +#define MEMFD_NOEXEC_SCOPE_EXEC 0 +#define MEMFD_NOEXEC_SCOPE_NOEXEC_SEAL 1 +#define MEMFD_NOEXEC_SCOPE_NOEXEC_ENFORCED 2 +#endif + struct pid_namespace { struct idr idr; struct rcu_head rcu; @@ -31,6 +46,10 @@ struct pid_namespace { struct ucounts *ucounts; int reboot; /* group exit code if this pidns was rebooted */ struct ns_common ns; +#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) + /* sysctl for vm.memfd_noexec */ + int memfd_noexec_scope; +#endif } __randomize_layout; extern struct pid_namespace init_pid_ns; -- cgit v1.2.3 From fe7d4c6d5a42f5bdc63fdfdca2cad32c8a779e23 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 16 Dec 2022 10:50:54 -0500 Subject: mm/hugetlb: document huge_pte_offset usage huge_pte_offset() is potentially a pgtable walker, looking up pte_t* for a hugetlb address. Normally, it's always safe to walk a generic pgtable as long as we're with the mmap lock held for either read or write, because that guarantees the pgtable pages will always be valid during the process. But it's not true for hugetlbfs, especially shared: hugetlbfs can have its pgtable freed by pmd unsharing, it means that even with mmap lock held for current mm, the PMD pgtable page can still go away from under us if pmd unsharing is possible during the walk. So we have two ways to make it safe even for a shared mapping: (1) If we're with the hugetlb vma lock held for either read/write, it's okay because pmd unshare cannot happen at all. (2) If we're with the i_mmap_rwsem lock held for either read/write, it's okay because even if pmd unshare can happen, the pgtable page cannot be freed from under us. Document it. Link: https://lkml.kernel.org/r/20221216155100.2043537-4-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: John Hubbard Reviewed-by: David Hildenbrand Cc: Andrea Arcangeli Cc: James Houghton Cc: Jann Horn Cc: Miaohe Lin Cc: Mike Kravetz Cc: Muchun Song Cc: Nadav Amit Cc: Rik van Riel Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 551834cd5299..d755e2a7c0db 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -192,6 +192,38 @@ extern struct list_head huge_boot_pages; pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long sz); +/* + * huge_pte_offset(): Walk the hugetlb pgtable until the last level PTE. + * Returns the pte_t* if found, or NULL if the address is not mapped. + * + * Since this function will walk all the pgtable pages (including not only + * high-level pgtable page, but also PUD entry that can be unshared + * concurrently for VM_SHARED), the caller of this function should be + * responsible of its thread safety. One can follow this rule: + * + * (1) For private mappings: pmd unsharing is not possible, so holding the + * mmap_lock for either read or write is sufficient. Most callers + * already hold the mmap_lock, so normally, no special action is + * required. + * + * (2) For shared mappings: pmd unsharing is possible (so the PUD-ranged + * pgtable page can go away from under us! It can be done by a pmd + * unshare with a follow up munmap() on the other process), then we + * need either: + * + * (2.1) hugetlb vma lock read or write held, to make sure pmd unshare + * won't happen upon the range (it also makes sure the pte_t we + * read is the right and stable one), or, + * + * (2.2) hugetlb mapping i_mmap_rwsem lock held read or write, to make + * sure even if unshare happened the racy unmap() will wait until + * i_mmap_rwsem is released. + * + * Option (2.1) is the safest, which guarantees pte stability from pmd + * sharing pov, until the vma lock released. Option (2.2) doesn't protect + * a concurrent pmd unshare, but it makes sure the pgtable page is safe to + * access. + */ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz); unsigned long hugetlb_mask_last_page(struct hstate *h); -- cgit v1.2.3 From fcd48540d188876c917a377d81cd24c100332a62 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 16 Dec 2022 10:50:55 -0500 Subject: mm/hugetlb: move swap entry handling into vma lock when faulted In hugetlb_fault(), there used to have a special path to handle swap entry at the entrance using huge_pte_offset(). That's unsafe because huge_pte_offset() for a pmd sharable range can access freed pgtables if without any lock to protect the pgtable from being freed after pmd unshare. Here the simplest solution to make it safe is to move the swap handling to be after the vma lock being held. We may need to take the fault mutex on either migration or hwpoison entries now (also the vma lock, but that's really needed), however neither of them is hot path. Note that the vma lock cannot be released in hugetlb_fault() when the migration entry is detected, because in migration_entry_wait_huge() the pgtable page will be used again (by taking the pgtable lock), so that also need to be protected by the vma lock. Modify migration_entry_wait_huge() so that it must be called with vma read lock held, and properly release the lock in __migration_entry_wait_huge(). Link: https://lkml.kernel.org/r/20221216155100.2043537-5-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Mike Kravetz Reviewed-by: John Hubbard Cc: Andrea Arcangeli Cc: David Hildenbrand Cc: James Houghton Cc: Jann Horn Cc: Miaohe Lin Cc: Muchun Song Cc: Nadav Amit Cc: Rik van Riel Signed-off-by: Andrew Morton --- include/linux/swapops.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swapops.h b/include/linux/swapops.h index b982dd614572..3a451b7afcb3 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -337,7 +337,8 @@ extern void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address); #ifdef CONFIG_HUGETLB_PAGE -extern void __migration_entry_wait_huge(pte_t *ptep, spinlock_t *ptl); +extern void __migration_entry_wait_huge(struct vm_area_struct *vma, + pte_t *ptep, spinlock_t *ptl); extern void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte); #endif /* CONFIG_HUGETLB_PAGE */ #else /* CONFIG_MIGRATION */ @@ -366,7 +367,8 @@ static inline void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { } #ifdef CONFIG_HUGETLB_PAGE -static inline void __migration_entry_wait_huge(pte_t *ptep, spinlock_t *ptl) { } +static inline void __migration_entry_wait_huge(struct vm_area_struct *vma, + pte_t *ptep, spinlock_t *ptl) { } static inline void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte) { } #endif /* CONFIG_HUGETLB_PAGE */ static inline int is_writable_migration_entry(swp_entry_t entry) -- cgit v1.2.3 From dd361e5033cf36c51acab996ea17748b81cedb38 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 16 Dec 2022 10:52:26 -0500 Subject: mm/hugetlb: make walk_hugetlb_range() safe to pmd unshare Since walk_hugetlb_range() walks the pgtable, it needs the vma lock to make sure the pgtable page will not be freed concurrently. Link: https://lkml.kernel.org/r/20221216155226.2043738-1-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Mike Kravetz Reviewed-by: John Hubbard Cc: Andrea Arcangeli Cc: David Hildenbrand Cc: James Houghton Cc: Jann Horn Cc: Miaohe Lin Cc: Muchun Song Cc: Nadav Amit Cc: Rik van Riel Signed-off-by: Andrew Morton --- include/linux/pagewalk.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h index 959f52e5867d..27a6df448ee5 100644 --- a/include/linux/pagewalk.h +++ b/include/linux/pagewalk.h @@ -21,7 +21,16 @@ struct mm_walk; * depth is -1 if not known, 0:PGD, 1:P4D, 2:PUD, 3:PMD. * Any folded depths (where PTRS_PER_P?D is equal to 1) * are skipped. - * @hugetlb_entry: if set, called for each hugetlb entry + * @hugetlb_entry: if set, called for each hugetlb entry. This hook + * function is called with the vma lock held, in order to + * protect against a concurrent freeing of the pte_t* or + * the ptl. In some cases, the hook function needs to drop + * and retake the vma lock in order to avoid deadlocks + * while calling other functions. In such cases the hook + * function must either refrain from accessing the pte or + * ptl after dropping the vma lock, or else revalidate + * those items after re-acquiring the vma lock and before + * accessing them. * @test_walk: caller specific callback function to determine whether * we walk over the current vma or not. Returning 0 means * "do page table walk over the current vma", returning -- cgit v1.2.3 From 9c67a20704e763f9cb8cd262c3e45de7bd2816bc Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 16 Dec 2022 10:52:29 -0500 Subject: mm/hugetlb: introduce hugetlb_walk() huge_pte_offset() is the main walker function for hugetlb pgtables. The name is not really representing what it does, though. Instead of renaming it, introduce a wrapper function called hugetlb_walk() which will use huge_pte_offset() inside. Assert on the locks when walking the pgtable. Note, the vma lock assertion will be a no-op for private mappings. Document the last special case in the page_vma_mapped_walk() path where we don't need any more lock to call hugetlb_walk(). Taking vma lock there is not needed because either: (1) potential callers of hugetlb pvmw holds i_mmap_rwsem already (from one rmap_walk()), or (2) the caller will not walk a hugetlb vma at all so the hugetlb code path not reachable (e.g. in ksm or uprobe paths). It's slightly implicit for future page_vma_mapped_walk() callers on that lock requirement. But anyway, when one day this rule breaks, one will get a straightforward warning in hugetlb_walk() with lockdep, then there'll be a way out. [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20221216155229.2043750-1-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Mike Kravetz Reviewed-by: John Hubbard Reviewed-by: David Hildenbrand Cc: Andrea Arcangeli Cc: James Houghton Cc: Jann Horn Cc: Miaohe Lin Cc: Muchun Song Cc: Nadav Amit Cc: Rik van Riel Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d755e2a7c0db..b6b10101bea7 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -2,6 +2,7 @@ #ifndef _LINUX_HUGETLB_H #define _LINUX_HUGETLB_H +#include #include #include #include @@ -196,6 +197,11 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, * huge_pte_offset(): Walk the hugetlb pgtable until the last level PTE. * Returns the pte_t* if found, or NULL if the address is not mapped. * + * IMPORTANT: we should normally not directly call this function, instead + * this is only a common interface to implement arch-specific + * walker. Please use hugetlb_walk() instead, because that will attempt to + * verify the locking for you. + * * Since this function will walk all the pgtable pages (including not only * high-level pgtable page, but also PUD entry that can be unshared * concurrently for VM_SHARED), the caller of this function should be @@ -1229,4 +1235,35 @@ bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr); #define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) #endif +static inline bool __vma_shareable_lock(struct vm_area_struct *vma) +{ + return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; +} + +/* + * Safe version of huge_pte_offset() to check the locks. See comments + * above huge_pte_offset(). + */ +static inline pte_t * +hugetlb_walk(struct vm_area_struct *vma, unsigned long addr, unsigned long sz) +{ +#if defined(CONFIG_HUGETLB_PAGE) && \ + defined(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && defined(CONFIG_LOCKDEP) + struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; + + /* + * If pmd sharing possible, locking needed to safely walk the + * hugetlb pgtables. More information can be found at the comment + * above huge_pte_offset() in the same file. + * + * NOTE: lockdep_is_held() is only defined with CONFIG_LOCKDEP. + */ + if (__vma_shareable_lock(vma)) + WARN_ON_ONCE(!lockdep_is_held(&vma_lock->rw_sema) && + !lockdep_is_held( + &vma->vm_file->f_mapping->i_mmap_rwsem)); +#endif + return huge_pte_offset(vma->vm_mm, addr, sz); +} + #endif /* _LINUX_HUGETLB_H */ -- cgit v1.2.3 From d685c668b0695dff927c85e27ef27d4f404f16a3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 15 Dec 2022 21:43:51 +0000 Subject: buffer: add b_folio as an alias of b_page Patch series "Start converting buffer_heads to use folios". I was hoping that filesystems would convert from buffer_heads to iomap, but that's not happening particularly quickly. So the buffer_head infrastructure needs to be converted from being page-based to being folio-based. This patch (of 12): Buffer heads point to the allocation (ie the folio), not the page. This is currently the same thing for all filesystems that use buffer heads, so this is a safe transitional step. Link: https://lkml.kernel.org/r/20221215214402.3522366-1-willy@infradead.org Link: https://lkml.kernel.org/r/20221215214402.3522366-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Jan Kara Signed-off-by: Andrew Morton --- include/linux/buffer_head.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 33fa5e94aa80..8f14dca5fed7 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -61,7 +61,10 @@ typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); struct buffer_head { unsigned long b_state; /* buffer state bitmap (see above) */ struct buffer_head *b_this_page;/* circular list of page's buffers */ - struct page *b_page; /* the page this bh is mapped to */ + union { + struct page *b_page; /* the page this bh is mapped to */ + struct folio *b_folio; /* the folio this bh is mapped to */ + }; sector_t b_blocknr; /* start block number */ size_t b_size; /* size of mapping */ -- cgit v1.2.3 From 6a6fe9ebd571a4092b7d5c1f11e4e1e15d296fa5 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 9 Dec 2022 10:06:18 +0800 Subject: mm: swap: convert mark_page_lazyfree() to folio_mark_lazyfree() mark_page_lazyfree() and the callers are converted to use folio, this rename and make it to take in a folio argument instead of calling page_folio(). Link: https://lkml.kernel.org/r/20221209020618.190306-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Vishal Moola (Oracle) Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/swap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 2787b84eaf12..93f1cebd8545 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -402,7 +402,7 @@ extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_cpu_zone(struct zone *zone); extern void lru_add_drain_all(void); extern void deactivate_page(struct page *page); -extern void mark_page_lazyfree(struct page *page); +void folio_mark_lazyfree(struct folio *folio); extern void swap_setup(void); extern void lru_cache_add_inactive_or_unevictable(struct page *page, -- cgit v1.2.3 From 98def236f63c66629fb6b2d4b69cecffc5b46539 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 5 Dec 2022 23:08:20 +0000 Subject: mm/damon/core: implement damos filter Patch series "implement DAMOS filtering for anon pages and/or specific memory cgroups" DAMOS let users do system operations in a data access pattern oriented way. The data access pattern, which is extracted by DAMON, is somewhat accurate more than what user space could know in many cases. However, in some situation, users could know something more than the kernel about the pattern or some special requirements for some types of memory or processes. For example, some users would have slow swap devices and knows latency-ciritical processes and therefore want to use DAMON-based proactive reclamation (DAMON_RECLAIM) for only non-anonymous pages of non-latency-critical processes. For such restriction, users could exclude the memory regions from the initial monitoring regions and use non-dynamic monitoring regions update monitoring operations set including fvaddr and paddr. They could also adjust the DAMOS target access pattern. For dynamically changing memory layout and access pattern, those would be not enough. To help the case, add an interface, namely DAMOS filters, which can be used to avoid the DAMOS actions be applied to specific types of memory, to DAMON kernel API (damon.h). At the moment, it supports filtering anonymous pages and/or specific memory cgroups in or out for each DAMOS scheme. This patchset adds the support for all DAMOS actions that 'paddr' monitoring operations set supports ('pageout', 'lru_prio', and 'lru_deprio'), and the functionality is exposed via DAMON kernel API (damon.h) the DAMON sysfs interface (/sys/kernel/mm/damon/admins/), and DAMON_RECLAIM module parameters. Patches Sequence ---------------- First patch implements DAMOS filter interface to DAMON kernel API. Second patch makes the physical address space monitoring operations set to support the filters from all supporting DAMOS actions. Third patch adds anonymous pages filter support to DAMON_RECLAIM, and the fourth patch documents the DAMON_RECLAIM's new feature. Fifth to seventh patches implement DAMON sysfs files for support of the filters, and eighth patch connects the file to use DAMOS filters feature. Ninth patch adds simple self test cases for DAMOS filters of the sysfs interface. Finally, following two patches (tenth and eleventh) document the new features and interfaces. This patch (of 11): DAMOS lets users do system operation in a data access pattern oriented way. The data access pattern, which is extracted by DAMON, is somewhat accurate more than what user space could know in many cases. However, in some situation, users could know something more than the kernel about the pattern or some special requirements for some types of memory or processes. For example, some users would have slow swap devices and knows latency-ciritical processes and therefore want to use DAMON-based proactive reclamation (DAMON_RECLAIM) for only non-anonymous pages of non-latency-critical processes. For such restriction, users could exclude the memory regions from the initial monitoring regions and use non-dynamic monitoring regions update monitoring operations set including fvaddr and paddr. They could also adjust the DAMOS target access pattern. For dynamically changing memory layout and access pattern, those would be not enough. To help the case, add an interface, namely DAMOS filters, which can be used to avoid the DAMOS actions be applied to specific types of memory, to DAMON kernel API (damon.h). At the moment, it supports filtering anonymous pages and/or specific memory cgroups in or out for each DAMOS scheme. Note that this commit adds only the interface to the DAMON kernel API. The impelmentation should be made in the monitoring operations sets, and following commits will add that. Link: https://lkml.kernel.org/r/20221205230830.144349-1-sj@kernel.org Link: https://lkml.kernel.org/r/20221205230830.144349-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Shuah Khan Signed-off-by: Andrew Morton --- include/linux/damon.h | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index ad15a5b88e3a..7907918ad2e0 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -8,6 +8,7 @@ #ifndef _DAMON_H_ #define _DAMON_H_ +#include #include #include #include @@ -215,6 +216,39 @@ struct damos_stat { unsigned long qt_exceeds; }; +/** + * enum damos_filter_type - Type of memory for &struct damos_filter + * @DAMOS_FILTER_TYPE_ANON: Anonymous pages. + * @DAMOS_FILTER_TYPE_MEMCG: Specific memcg's pages. + * @NR_DAMOS_FILTER_TYPES: Number of filter types. + */ +enum damos_filter_type { + DAMOS_FILTER_TYPE_ANON, + DAMOS_FILTER_TYPE_MEMCG, + NR_DAMOS_FILTER_TYPES, +}; + +/** + * struct damos_filter - DAMOS action target memory filter. + * @type: Type of the page. + * @matching: If the matching page should filtered out or in. + * @memcg_id: Memcg id of the question if @type is DAMOS_FILTER_MEMCG. + * @list: List head for siblings. + * + * Before applying the &damos->action to a memory region, DAMOS checks if each + * page of the region matches to this and avoid applying the action if so. + * Note that the check support is up to &struct damon_operations + * implementation. + */ +struct damos_filter { + enum damos_filter_type type; + bool matching; + union { + unsigned short memcg_id; + }; + struct list_head list; +}; + /** * struct damos_access_pattern - Target access pattern of the given scheme. * @min_sz_region: Minimum size of target regions. @@ -239,6 +273,7 @@ struct damos_access_pattern { * @action: &damo_action to be applied to the target regions. * @quota: Control the aggressiveness of this scheme. * @wmarks: Watermarks for automated (in)activation of this scheme. + * @filters: Additional set of &struct damos_filter for &action. * @stat: Statistics of this scheme. * @list: List head for siblings. * @@ -254,6 +289,10 @@ struct damos_access_pattern { * If all schemes that registered to a &struct damon_ctx are inactive, DAMON * stops monitoring and just repeatedly checks the watermarks. * + * Before applying the &action to a memory region, &struct damon_operations + * implementation could check pages of the region and skip &action to respect + * &filters + * * After applying the &action to each region, &stat_count and &stat_sz is * updated to reflect the number of regions and total size of regions that the * &action is applied. @@ -263,6 +302,7 @@ struct damos { enum damos_action action; struct damos_quota quota; struct damos_watermarks wmarks; + struct list_head filters; struct damos_stat stat; struct list_head list; }; @@ -516,6 +556,12 @@ static inline unsigned long damon_sz_region(struct damon_region *r) #define damon_for_each_scheme_safe(s, next, ctx) \ list_for_each_entry_safe(s, next, &(ctx)->schemes, list) +#define damos_for_each_filter(f, scheme) \ + list_for_each_entry(f, &(scheme)->filters, list) + +#define damos_for_each_filter_safe(f, next, scheme) \ + list_for_each_entry_safe(f, next, &(scheme)->filters, list) + #ifdef CONFIG_DAMON struct damon_region *damon_new_region(unsigned long start, unsigned long end); @@ -536,6 +582,11 @@ void damon_destroy_region(struct damon_region *r, struct damon_target *t); int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges, unsigned int nr_ranges); +struct damos_filter *damos_new_filter(enum damos_filter_type type, + bool matching); +void damos_add_filter(struct damos *s, struct damos_filter *f); +void damos_destroy_filter(struct damos_filter *f); + struct damos *damon_new_scheme(struct damos_access_pattern *pattern, enum damos_action action, struct damos_quota *quota, struct damos_watermarks *wmarks); -- cgit v1.2.3 From 44383cef54c0ce1201f884d83cc2b367bc5aa4f7 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 19 Dec 2022 19:09:18 +0100 Subject: kasan: allow sampling page_alloc allocations for HW_TAGS As Hardware Tag-Based KASAN is intended to be used in production, its performance impact is crucial. As page_alloc allocations tend to be big, tagging and checking all such allocations can introduce a significant slowdown. Add two new boot parameters that allow to alleviate that slowdown: - kasan.page_alloc.sample, which makes Hardware Tag-Based KASAN tag only every Nth page_alloc allocation with the order configured by the second added parameter (default: tag every such allocation). - kasan.page_alloc.sample.order, which makes sampling enabled by the first parameter only affect page_alloc allocations with the order equal or greater than the specified value (default: 3, see below). The exact performance improvement caused by using the new parameters depends on their values and the applied workload. The chosen default value for kasan.page_alloc.sample.order is 3, which matches both PAGE_ALLOC_COSTLY_ORDER and SKB_FRAG_PAGE_ORDER. This is done for two reasons: 1. PAGE_ALLOC_COSTLY_ORDER is "the order at which allocations are deemed costly to service", which corresponds to the idea that only large and thus costly allocations are supposed to sampled. 2. One of the workloads targeted by this patch is a benchmark that sends a large amount of data over a local loopback connection. Most multi-page data allocations in the networking subsystem have the order of SKB_FRAG_PAGE_ORDER (or PAGE_ALLOC_COSTLY_ORDER). When running a local loopback test on a testing MTE-enabled device in sync mode, enabling Hardware Tag-Based KASAN introduces a ~50% slowdown. Applying this patch and setting kasan.page_alloc.sampling to a value higher than 1 allows to lower the slowdown. The performance improvement saturates around the sampling interval value of 10 with the default sampling page order of 3. This lowers the slowdown to ~20%. The slowdown in real scenarios involving the network will likely be better. Enabling page_alloc sampling has a downside: KASAN misses bad accesses to a page_alloc allocation that has not been tagged. This lowers the value of KASAN as a security mitigation. However, based on measuring the number of page_alloc allocations of different orders during boot in a test build, sampling with the default kasan.page_alloc.sample.order value affects only ~7% of allocations. The rest ~93% of allocations are still checked deterministically. Link: https://lkml.kernel.org/r/129da0614123bb85ed4dd61ae30842b2dd7c903f.1671471846.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Evgenii Stepanov Cc: Jann Horn Cc: Mark Brand Cc: Peter Collingbourne Signed-off-by: Andrew Morton --- include/linux/kasan.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 96c9d56e5510..5ebbaf672009 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -120,12 +120,13 @@ static __always_inline void kasan_poison_pages(struct page *page, __kasan_poison_pages(page, order, init); } -void __kasan_unpoison_pages(struct page *page, unsigned int order, bool init); -static __always_inline void kasan_unpoison_pages(struct page *page, +bool __kasan_unpoison_pages(struct page *page, unsigned int order, bool init); +static __always_inline bool kasan_unpoison_pages(struct page *page, unsigned int order, bool init) { if (kasan_enabled()) - __kasan_unpoison_pages(page, order, init); + return __kasan_unpoison_pages(page, order, init); + return false; } void __kasan_cache_create_kmalloc(struct kmem_cache *cache); @@ -249,8 +250,11 @@ static __always_inline bool kasan_check_byte(const void *addr) static inline void kasan_unpoison_range(const void *address, size_t size) {} static inline void kasan_poison_pages(struct page *page, unsigned int order, bool init) {} -static inline void kasan_unpoison_pages(struct page *page, unsigned int order, - bool init) {} +static inline bool kasan_unpoison_pages(struct page *page, unsigned int order, + bool init) +{ + return false; +} static inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) {} static inline void kasan_poison_slab(struct slab *slab) {} static inline void kasan_unpoison_object_data(struct kmem_cache *cache, -- cgit v1.2.3 From 831978e37e93bd3e36612917a4b193278950daff Mon Sep 17 00:00:00 2001 From: Vernon Yang Date: Wed, 21 Dec 2022 14:00:52 +0800 Subject: maple_tree: remove extra space and blank line Patch series "Clean up and refinement for maple tree", v2. This patchset cleans up and refines some maple tree code. A few small changes make the code easier to understand and for better readability. This patch (of 7): These extra space and blank lines are unnecessary, so drop them. Link: https://lkml.kernel.org/r/20221221060058.609003-1-vernon2gm@gmail.com Link: https://lkml.kernel.org/r/20221221060058.609003-2-vernon2gm@gmail.com Signed-off-by: Vernon Yang Reviewed-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index e594db58a0f1..4ee5a969441c 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -517,7 +517,6 @@ static inline void mas_reset(struct ma_state *mas) * entry. * * Note: may return the zero entry. - * */ #define mas_for_each(__mas, __entry, __max) \ while (((__entry) = mas_find((__mas), (__max))) != NULL) @@ -639,7 +638,6 @@ static inline void mt_set_in_rcu(struct maple_tree *mt) } static inline unsigned int mt_height(const struct maple_tree *mt) - { return (mt->ma_flags & MT_FLAGS_HEIGHT_MASK) >> MT_FLAGS_HEIGHT_OFFSET; } -- cgit v1.2.3 From eabb305293835b191ffe60234587ae8bf5e4e9fd Mon Sep 17 00:00:00 2001 From: Vernon Yang Date: Wed, 21 Dec 2022 14:00:56 +0800 Subject: maple_tree: remove the redundant code The macros CONFIG_DEBUG_MAPLE_TREE_VERBOSE no one uses, functions mas_dup_tree() and mas_dup_store() are not implemented, just function declaration, so drop it. Link: https://lkml.kernel.org/r/20221221060058.609003-6-vernon2gm@gmail.com Signed-off-by: Vernon Yang Reviewed-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 4ee5a969441c..815a27661517 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -12,7 +12,6 @@ #include #include /* #define CONFIG_MAPLE_RCU_DISABLED */ -/* #define CONFIG_DEBUG_MAPLE_TREE_VERBOSE */ /* * Allocated nodes are mutable until they have been inserted into the tree, @@ -483,9 +482,6 @@ static inline bool mas_is_paused(struct ma_state *mas) return mas->node == MAS_PAUSE; } -void mas_dup_tree(struct ma_state *oldmas, struct ma_state *mas); -void mas_dup_store(struct ma_state *mas, void *entry); - /* * This finds an empty area from the highest address to the lowest. * AKA "Topdown" version, -- cgit v1.2.3 From 318e9342fbbb6888d903d86e83865609901a1c65 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Wed, 21 Dec 2022 10:08:45 -0800 Subject: mm/memory: add vm_normal_folio() Patch series "Convert deactivate_page() to folio_deactivate()", v4. Deactivate_page() has already been converted to use folios. This patch series modifies the callers of deactivate_page() to use folios. It also introduces vm_normal_folio() to assist with folio conversions, and converts deactivate_page() to folio_deactivate() which takes in a folio. This patch (of 4): Introduce a wrapper function called vm_normal_folio(). This function calls vm_normal_page() and returns the folio of the page found, or null if no page is found. This function allows callers to get a folio from a pte, which will eventually allow them to completely replace their struct page variables with struct folio instead. Link: https://lkml.kernel.org/r/20221221180848.20774-1-vishal.moola@gmail.com Link: https://lkml.kernel.org/r/20221221180848.20774-2-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Reviewed-by: Matthew Wilcox (Oracle) Cc: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 253b2d7489e6..8e14183dfc58 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1968,6 +1968,8 @@ static inline bool can_do_mlock(void) { return false; } extern int user_shm_lock(size_t, struct ucounts *); extern void user_shm_unlock(size_t, struct ucounts *); +struct folio *vm_normal_folio(struct vm_area_struct *vma, unsigned long addr, + pte_t pte); struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte); struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, -- cgit v1.2.3 From 5a9e34747c9f731bbb6b7fd7521c4fec0d840593 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Wed, 21 Dec 2022 10:08:48 -0800 Subject: mm/swap: convert deactivate_page() to folio_deactivate() Deactivate_page() has already been converted to use folios, this change converts it to take in a folio argument instead of calling page_folio(). It also renames the function folio_deactivate() to be more consistent with other folio functions. [akpm@linux-foundation.org: fix left-over comments, per Yu Zhao] Link: https://lkml.kernel.org/r/20221221180848.20774-5-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/swap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 93f1cebd8545..87cecb8c0bdc 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -401,7 +401,7 @@ extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_cpu_zone(struct zone *zone); extern void lru_add_drain_all(void); -extern void deactivate_page(struct page *page); +void folio_deactivate(struct folio *folio); void folio_mark_lazyfree(struct folio *folio); extern void swap_setup(void); -- cgit v1.2.3 From 0b7b8704ddcee372099a2bc6781db6ab273a85d5 Mon Sep 17 00:00:00 2001 From: Hao Sun Date: Wed, 21 Dec 2022 22:42:45 +0800 Subject: mm: new primitive kvmemdup() Similar to kmemdup(), but support large amount of bytes with kvmalloc() and does *not* guarantee that the result will be physically contiguous. Use only in cases where kvmalloc() is needed and free it with kvfree(). Also adapt policy_unpack.c in case someone bisect into this. Link: https://lkml.kernel.org/r/20221221144245.27164-1-sunhao.th@gmail.com Signed-off-by: Hao Sun Suggested-by: Daniel Borkmann Cc: Nick Terrell Cc: John Johansen Cc: Paul Moore Cc: James Morris Cc: "Serge E. Hallyn" Signed-off-by: Andrew Morton --- include/linux/string.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index db28802ab0a6..c062c581a98b 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -177,6 +177,7 @@ extern char *kstrdup(const char *s, gfp_t gfp) __malloc; extern const char *kstrdup_const(const char *s, gfp_t gfp); extern char *kstrndup(const char *s, size_t len, gfp_t gfp); extern void *kmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2); +extern void *kvmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2); extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp); extern char **argv_split(gfp_t gfp, const char *str, int *argcp); -- cgit v1.2.3 From b5054174ac7c7d8fae15deee7ddc0e20fd604f30 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 21 Dec 2022 21:24:54 +0000 Subject: mm: move FOLL_* defs to mm_types.h Move FOLL_* definitions to linux/mm_types.h to make them more accessible without having to drag in all of linux/mm.h and everything that drags in too[1]. Link: https://lkml.kernel.org/r/2161258.1671657894@warthog.procyon.org.uk Signed-off-by: David Howells Suggested-by: Matthew Wilcox Reviewed-by: John Hubbard Cc: Christoph Hellwig Cc: Al Viro Signed-off-by: Andrew Morton --- include/linux/mm.h | 75 ------------------------------------------------ include/linux/mm_types.h | 75 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 75 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8e14183dfc58..d68579bf8484 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3057,81 +3057,6 @@ static inline vm_fault_t vmf_error(int err) struct page *follow_page(struct vm_area_struct *vma, unsigned long address, unsigned int foll_flags); -#define FOLL_WRITE 0x01 /* check pte is writable */ -#define FOLL_TOUCH 0x02 /* mark page accessed */ -#define FOLL_GET 0x04 /* do get_page on page */ -#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */ -#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ -#define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO - * and return without waiting upon it */ -#define FOLL_NOFAULT 0x80 /* do not fault in pages */ -#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ -#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ -#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ -#define FOLL_ANON 0x8000 /* don't do file mappings */ -#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */ -#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */ -#define FOLL_PIN 0x40000 /* pages must be released via unpin_user_page */ -#define FOLL_FAST_ONLY 0x80000 /* gup_fast: prevent fall-back to slow gup */ -#define FOLL_PCI_P2PDMA 0x100000 /* allow returning PCI P2PDMA pages */ -#define FOLL_INTERRUPTIBLE 0x200000 /* allow interrupts from generic signals */ - -/* - * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each - * other. Here is what they mean, and how to use them: - * - * FOLL_LONGTERM indicates that the page will be held for an indefinite time - * period _often_ under userspace control. This is in contrast to - * iov_iter_get_pages(), whose usages are transient. - * - * FIXME: For pages which are part of a filesystem, mappings are subject to the - * lifetime enforced by the filesystem and we need guarantees that longterm - * users like RDMA and V4L2 only establish mappings which coordinate usage with - * the filesystem. Ideas for this coordination include revoking the longterm - * pin, delaying writeback, bounce buffer page writeback, etc. As FS DAX was - * added after the problem with filesystems was found FS DAX VMAs are - * specifically failed. Filesystem pages are still subject to bugs and use of - * FOLL_LONGTERM should be avoided on those pages. - * - * FIXME: Also NOTE that FOLL_LONGTERM is not supported in every GUP call. - * Currently only get_user_pages() and get_user_pages_fast() support this flag - * and calls to get_user_pages_[un]locked are specifically not allowed. This - * is due to an incompatibility with the FS DAX check and - * FAULT_FLAG_ALLOW_RETRY. - * - * In the CMA case: long term pins in a CMA region would unnecessarily fragment - * that region. And so, CMA attempts to migrate the page before pinning, when - * FOLL_LONGTERM is specified. - * - * FOLL_PIN indicates that a special kind of tracking (not just page->_refcount, - * but an additional pin counting system) will be invoked. This is intended for - * anything that gets a page reference and then touches page data (for example, - * Direct IO). This lets the filesystem know that some non-file-system entity is - * potentially changing the pages' data. In contrast to FOLL_GET (whose pages - * are released via put_page()), FOLL_PIN pages must be released, ultimately, by - * a call to unpin_user_page(). - * - * FOLL_PIN is similar to FOLL_GET: both of these pin pages. They use different - * and separate refcounting mechanisms, however, and that means that each has - * its own acquire and release mechanisms: - * - * FOLL_GET: get_user_pages*() to acquire, and put_page() to release. - * - * FOLL_PIN: pin_user_pages*() to acquire, and unpin_user_pages to release. - * - * FOLL_PIN and FOLL_GET are mutually exclusive for a given function call. - * (The underlying pages may experience both FOLL_GET-based and FOLL_PIN-based - * calls applied to them, and that's perfectly OK. This is a constraint on the - * callers, not on the pages.) - * - * FOLL_PIN should be set internally by the pin_user_pages*() APIs, never - * directly by the caller. That's in order to help avoid mismatches when - * releasing pages: get_user_pages*() pages must be released via put_page(), - * while pin_user_pages*() pages must be released via unpin_user_page(). - * - * Please see Documentation/core-api/pin_user_pages.rst for more information. - */ - static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) { if (vm_fault & VM_FAULT_OOM) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 9757067c3053..1118e381fcdc 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1085,4 +1085,79 @@ enum fault_flag { typedef unsigned int __bitwise zap_flags_t; +/* + * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each + * other. Here is what they mean, and how to use them: + * + * FOLL_LONGTERM indicates that the page will be held for an indefinite time + * period _often_ under userspace control. This is in contrast to + * iov_iter_get_pages(), whose usages are transient. + * + * FIXME: For pages which are part of a filesystem, mappings are subject to the + * lifetime enforced by the filesystem and we need guarantees that longterm + * users like RDMA and V4L2 only establish mappings which coordinate usage with + * the filesystem. Ideas for this coordination include revoking the longterm + * pin, delaying writeback, bounce buffer page writeback, etc. As FS DAX was + * added after the problem with filesystems was found FS DAX VMAs are + * specifically failed. Filesystem pages are still subject to bugs and use of + * FOLL_LONGTERM should be avoided on those pages. + * + * FIXME: Also NOTE that FOLL_LONGTERM is not supported in every GUP call. + * Currently only get_user_pages() and get_user_pages_fast() support this flag + * and calls to get_user_pages_[un]locked are specifically not allowed. This + * is due to an incompatibility with the FS DAX check and + * FAULT_FLAG_ALLOW_RETRY. + * + * In the CMA case: long term pins in a CMA region would unnecessarily fragment + * that region. And so, CMA attempts to migrate the page before pinning, when + * FOLL_LONGTERM is specified. + * + * FOLL_PIN indicates that a special kind of tracking (not just page->_refcount, + * but an additional pin counting system) will be invoked. This is intended for + * anything that gets a page reference and then touches page data (for example, + * Direct IO). This lets the filesystem know that some non-file-system entity is + * potentially changing the pages' data. In contrast to FOLL_GET (whose pages + * are released via put_page()), FOLL_PIN pages must be released, ultimately, by + * a call to unpin_user_page(). + * + * FOLL_PIN is similar to FOLL_GET: both of these pin pages. They use different + * and separate refcounting mechanisms, however, and that means that each has + * its own acquire and release mechanisms: + * + * FOLL_GET: get_user_pages*() to acquire, and put_page() to release. + * + * FOLL_PIN: pin_user_pages*() to acquire, and unpin_user_pages to release. + * + * FOLL_PIN and FOLL_GET are mutually exclusive for a given function call. + * (The underlying pages may experience both FOLL_GET-based and FOLL_PIN-based + * calls applied to them, and that's perfectly OK. This is a constraint on the + * callers, not on the pages.) + * + * FOLL_PIN should be set internally by the pin_user_pages*() APIs, never + * directly by the caller. That's in order to help avoid mismatches when + * releasing pages: get_user_pages*() pages must be released via put_page(), + * while pin_user_pages*() pages must be released via unpin_user_page(). + * + * Please see Documentation/core-api/pin_user_pages.rst for more information. + */ + +#define FOLL_WRITE 0x01 /* check pte is writable */ +#define FOLL_TOUCH 0x02 /* mark page accessed */ +#define FOLL_GET 0x04 /* do get_page on page */ +#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */ +#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ +#define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO + * and return without waiting upon it */ +#define FOLL_NOFAULT 0x80 /* do not fault in pages */ +#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ +#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ +#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ +#define FOLL_ANON 0x8000 /* don't do file mappings */ +#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */ +#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */ +#define FOLL_PIN 0x40000 /* pages must be released via unpin_user_page */ +#define FOLL_FAST_ONLY 0x80000 /* gup_fast: prevent fall-back to slow gup */ +#define FOLL_PCI_P2PDMA 0x100000 /* allow returning PCI P2PDMA pages */ +#define FOLL_INTERRUPTIBLE 0x200000 /* allow interrupts from generic signals */ + #endif /* _LINUX_MM_TYPES_H */ -- cgit v1.2.3 From 391655fe08d1f942359a11148aa9aaf3f99d6d6f Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Wed, 21 Dec 2022 21:18:59 -0700 Subject: mm: multi-gen LRU: rename lru_gen_struct to lru_gen_folio Patch series "mm: multi-gen LRU: memcg LRU", v3. Overview ======== An memcg LRU is a per-node LRU of memcgs. It is also an LRU of LRUs, since each node and memcg combination has an LRU of folios (see mem_cgroup_lruvec()). Its goal is to improve the scalability of global reclaim, which is critical to system-wide memory overcommit in data centers. Note that memcg reclaim is currently out of scope. Its memory bloat is a pointer to each lruvec and negligible to each pglist_data. In terms of traversing memcgs during global reclaim, it improves the best-case complexity from O(n) to O(1) and does not affect the worst-case complexity O(n). Therefore, on average, it has a sublinear complexity in contrast to the current linear complexity. The basic structure of an memcg LRU can be understood by an analogy to the active/inactive LRU (of folios): 1. It has the young and the old (generations), i.e., the counterparts to the active and the inactive; 2. The increment of max_seq triggers promotion, i.e., the counterpart to activation; 3. Other events trigger similar operations, e.g., offlining an memcg triggers demotion, i.e., the counterpart to deactivation. In terms of global reclaim, it has two distinct features: 1. Sharding, which allows each thread to start at a random memcg (in the old generation) and improves parallelism; 2. Eventual fairness, which allows direct reclaim to bail out at will and reduces latency without affecting fairness over some time. The commit message in patch 6 details the workflow: https://lore.kernel.org/r/20221222041905.2431096-7-yuzhao@google.com/ The following is a simple test to quickly verify its effectiveness. Test design: 1. Create multiple memcgs. 2. Each memcg contains a job (fio). 3. All jobs access the same amount of memory randomly. 4. The system does not experience global memory pressure. 5. Periodically write to the root memory.reclaim. Desired outcome: 1. All memcgs have similar pgsteal counts, i.e., stddev(pgsteal) over mean(pgsteal) is close to 0%. 2. The total pgsteal is close to the total requested through memory.reclaim, i.e., sum(pgsteal) over sum(requested) is close to 100%. Actual outcome [1]: MGLRU off MGLRU on stddev(pgsteal) / mean(pgsteal) 75% 20% sum(pgsteal) / sum(requested) 425% 95% #################################################################### MEMCGS=128 for ((memcg = 0; memcg < $MEMCGS; memcg++)); do mkdir /sys/fs/cgroup/memcg$memcg done start() { echo $BASHPID > /sys/fs/cgroup/memcg$memcg/cgroup.procs fio -name=memcg$memcg --numjobs=1 --ioengine=mmap \ --filename=/dev/zero --size=1920M --rw=randrw \ --rate=64m,64m --random_distribution=random \ --fadvise_hint=0 --time_based --runtime=10h \ --group_reporting --minimal } for ((memcg = 0; memcg < $MEMCGS; memcg++)); do start & done sleep 600 for ((i = 0; i < 600; i++)); do echo 256m >/sys/fs/cgroup/memory.reclaim sleep 6 done for ((memcg = 0; memcg < $MEMCGS; memcg++)); do grep "pgsteal " /sys/fs/cgroup/memcg$memcg/memory.stat done #################################################################### [1]: This was obtained from running the above script (touches less than 256GB memory) on an EPYC 7B13 with 512GB DRAM for over an hour. This patch (of 8): The new name lru_gen_folio will be more distinct from the coming lru_gen_memcg. Link: https://lkml.kernel.org/r/20221222041905.2431096-1-yuzhao@google.com Link: https://lkml.kernel.org/r/20221222041905.2431096-2-yuzhao@google.com Signed-off-by: Yu Zhao Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Michael Larabel Cc: Michal Hocko Cc: Mike Rapoport Cc: Roman Gushchin Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/linux/mm_inline.h | 4 ++-- include/linux/mmzone.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index ff3f3f23f649..177b8b1dd43c 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -178,7 +178,7 @@ static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *foli int zone = folio_zonenum(folio); int delta = folio_nr_pages(folio); enum lru_list lru = type * LRU_INACTIVE_FILE; - struct lru_gen_struct *lrugen = &lruvec->lrugen; + struct lru_gen_folio *lrugen = &lruvec->lrugen; VM_WARN_ON_ONCE(old_gen != -1 && old_gen >= MAX_NR_GENS); VM_WARN_ON_ONCE(new_gen != -1 && new_gen >= MAX_NR_GENS); @@ -224,7 +224,7 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, int gen = folio_lru_gen(folio); int type = folio_is_file_lru(folio); int zone = folio_zonenum(folio); - struct lru_gen_struct *lrugen = &lruvec->lrugen; + struct lru_gen_folio *lrugen = &lruvec->lrugen; VM_WARN_ON_ONCE_FOLIO(gen != -1, folio); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index cd28a100d9e4..1686fcc4ed01 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -404,7 +404,7 @@ enum { * The number of pages in each generation is eventually consistent and therefore * can be transiently negative when reset_batch_size() is pending. */ -struct lru_gen_struct { +struct lru_gen_folio { /* the aging increments the youngest generation number */ unsigned long max_seq; /* the eviction increments the oldest generation numbers */ @@ -461,7 +461,7 @@ struct lru_gen_mm_state { struct lru_gen_mm_walk { /* the lruvec under reclaim */ struct lruvec *lruvec; - /* unstable max_seq from lru_gen_struct */ + /* unstable max_seq from lru_gen_folio */ unsigned long max_seq; /* the next address within an mm to scan */ unsigned long next_addr; @@ -524,7 +524,7 @@ struct lruvec { unsigned long flags; #ifdef CONFIG_LRU_GEN /* evictable pages divided into generations */ - struct lru_gen_struct lrugen; + struct lru_gen_folio lrugen; /* to concurrently iterate lru_gen_mm_list */ struct lru_gen_mm_state mm_state; #endif -- cgit v1.2.3 From 6df1b2212950aae2b2188c6645ea18e2a9e3fdd5 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Wed, 21 Dec 2022 21:19:00 -0700 Subject: mm: multi-gen LRU: rename lrugen->lists[] to lrugen->folios[] lru_gen_folio will be chained into per-node lists by the coming lrugen->list. Link: https://lkml.kernel.org/r/20221222041905.2431096-3-yuzhao@google.com Signed-off-by: Yu Zhao Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Michael Larabel Cc: Michal Hocko Cc: Mike Rapoport Cc: Roman Gushchin Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/linux/mm_inline.h | 4 ++-- include/linux/mmzone.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 177b8b1dd43c..eb8a2435ee80 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -256,9 +256,9 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, lru_gen_update_size(lruvec, folio, -1, gen); /* for folio_rotate_reclaimable() */ if (reclaiming) - list_add_tail(&folio->lru, &lrugen->lists[gen][type][zone]); + list_add_tail(&folio->lru, &lrugen->folios[gen][type][zone]); else - list_add(&folio->lru, &lrugen->lists[gen][type][zone]); + list_add(&folio->lru, &lrugen->folios[gen][type][zone]); return true; } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1686fcc4ed01..6c96ee823dbd 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -312,7 +312,7 @@ enum lruvec_flags { * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An * offset within MAX_NR_GENS, i.e., gen, indexes the LRU list of the * corresponding generation. The gen counter in folio->flags stores gen+1 while - * a page is on one of lrugen->lists[]. Otherwise it stores 0. + * a page is on one of lrugen->folios[]. Otherwise it stores 0. * * A page is added to the youngest generation on faulting. The aging needs to * check the accessed bit at least twice before handing this page over to the @@ -324,8 +324,8 @@ enum lruvec_flags { * rest of generations, if they exist, are considered inactive. See * lru_gen_is_active(). * - * PG_active is always cleared while a page is on one of lrugen->lists[] so that - * the aging needs not to worry about it. And it's set again when a page + * PG_active is always cleared while a page is on one of lrugen->folios[] so + * that the aging needs not to worry about it. And it's set again when a page * considered active is isolated for non-reclaiming purposes, e.g., migration. * See lru_gen_add_folio() and lru_gen_del_folio(). * @@ -412,7 +412,7 @@ struct lru_gen_folio { /* the birth time of each generation in jiffies */ unsigned long timestamps[MAX_NR_GENS]; /* the multi-gen LRU lists, lazily sorted on eviction */ - struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; + struct list_head folios[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; /* the multi-gen LRU sizes, eventually consistent */ long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; /* the exponential moving average of refaulted */ -- cgit v1.2.3 From e4dde56cd208674ce899b47589f263499e5b8cdc Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Wed, 21 Dec 2022 21:19:04 -0700 Subject: mm: multi-gen LRU: per-node lru_gen_folio lists For each node, memcgs are divided into two generations: the old and the young. For each generation, memcgs are randomly sharded into multiple bins to improve scalability. For each bin, an RCU hlist_nulls is virtually divided into three segments: the head, the tail and the default. An onlining memcg is added to the tail of a random bin in the old generation. The eviction starts at the head of a random bin in the old generation. The per-node memcg generation counter, whose reminder (mod 2) indexes the old generation, is incremented when all its bins become empty. There are four operations: 1. MEMCG_LRU_HEAD, which moves an memcg to the head of a random bin in its current generation (old or young) and updates its "seg" to "head"; 2. MEMCG_LRU_TAIL, which moves an memcg to the tail of a random bin in its current generation (old or young) and updates its "seg" to "tail"; 3. MEMCG_LRU_OLD, which moves an memcg to the head of a random bin in the old generation, updates its "gen" to "old" and resets its "seg" to "default"; 4. MEMCG_LRU_YOUNG, which moves an memcg to the tail of a random bin in the young generation, updates its "gen" to "young" and resets its "seg" to "default". The events that trigger the above operations are: 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD; 2. The first attempt to reclaim an memcg below low, which triggers MEMCG_LRU_TAIL; 3. The first attempt to reclaim an memcg below reclaimable size threshold, which triggers MEMCG_LRU_TAIL; 4. The second attempt to reclaim an memcg below reclaimable size threshold, which triggers MEMCG_LRU_YOUNG; 5. Attempting to reclaim an memcg below min, which triggers MEMCG_LRU_YOUNG; 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG; 7. Offlining an memcg, which triggers MEMCG_LRU_OLD. Note that memcg LRU only applies to global reclaim, and the round-robin incrementing of their max_seq counters ensures the eventual fairness to all eligible memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). Link: https://lkml.kernel.org/r/20221222041905.2431096-7-yuzhao@google.com Signed-off-by: Yu Zhao Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Michael Larabel Cc: Michal Hocko Cc: Mike Rapoport Cc: Roman Gushchin Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 10 ++++ include/linux/mm_inline.h | 17 +++++++ include/linux/mmzone.h | 117 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 142 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d3c8203cab6c..2e08b05bc6bf 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -794,6 +794,11 @@ static inline void obj_cgroup_put(struct obj_cgroup *objcg) percpu_ref_put(&objcg->refcnt); } +static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg) +{ + return !memcg || css_tryget(&memcg->css); +} + static inline void mem_cgroup_put(struct mem_cgroup *memcg) { if (memcg) @@ -1301,6 +1306,11 @@ static inline void obj_cgroup_put(struct obj_cgroup *objcg) { } +static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg) +{ + return true; +} + static inline void mem_cgroup_put(struct mem_cgroup *memcg) { } diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index eb8a2435ee80..acf03147fff8 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -122,6 +122,18 @@ static inline bool lru_gen_in_fault(void) return current->in_lru_fault; } +#ifdef CONFIG_MEMCG +static inline int lru_gen_memcg_seg(struct lruvec *lruvec) +{ + return READ_ONCE(lruvec->lrugen.seg); +} +#else +static inline int lru_gen_memcg_seg(struct lruvec *lruvec) +{ + return 0; +} +#endif + static inline int lru_gen_from_seq(unsigned long seq) { return seq % MAX_NR_GENS; @@ -297,6 +309,11 @@ static inline bool lru_gen_in_fault(void) return false; } +static inline int lru_gen_memcg_seg(struct lruvec *lruvec) +{ + return 0; +} + static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) { return false; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6c96ee823dbd..815c7c2edf45 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -367,6 +368,15 @@ struct page_vma_mapped_walk; #define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF) #define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF) +/* see the comment on MEMCG_NR_GENS */ +enum { + MEMCG_LRU_NOP, + MEMCG_LRU_HEAD, + MEMCG_LRU_TAIL, + MEMCG_LRU_OLD, + MEMCG_LRU_YOUNG, +}; + #ifdef CONFIG_LRU_GEN enum { @@ -426,6 +436,14 @@ struct lru_gen_folio { atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; /* whether the multi-gen LRU is enabled */ bool enabled; +#ifdef CONFIG_MEMCG + /* the memcg generation this lru_gen_folio belongs to */ + u8 gen; + /* the list segment this lru_gen_folio belongs to */ + u8 seg; + /* per-node lru_gen_folio list for global reclaim */ + struct hlist_nulls_node list; +#endif }; enum { @@ -479,12 +497,87 @@ void lru_gen_init_lruvec(struct lruvec *lruvec); void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); #ifdef CONFIG_MEMCG + +/* + * For each node, memcgs are divided into two generations: the old and the + * young. For each generation, memcgs are randomly sharded into multiple bins + * to improve scalability. For each bin, the hlist_nulls is virtually divided + * into three segments: the head, the tail and the default. + * + * An onlining memcg is added to the tail of a random bin in the old generation. + * The eviction starts at the head of a random bin in the old generation. The + * per-node memcg generation counter, whose reminder (mod MEMCG_NR_GENS) indexes + * the old generation, is incremented when all its bins become empty. + * + * There are four operations: + * 1. MEMCG_LRU_HEAD, which moves an memcg to the head of a random bin in its + * current generation (old or young) and updates its "seg" to "head"; + * 2. MEMCG_LRU_TAIL, which moves an memcg to the tail of a random bin in its + * current generation (old or young) and updates its "seg" to "tail"; + * 3. MEMCG_LRU_OLD, which moves an memcg to the head of a random bin in the old + * generation, updates its "gen" to "old" and resets its "seg" to "default"; + * 4. MEMCG_LRU_YOUNG, which moves an memcg to the tail of a random bin in the + * young generation, updates its "gen" to "young" and resets its "seg" to + * "default". + * + * The events that trigger the above operations are: + * 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD; + * 2. The first attempt to reclaim an memcg below low, which triggers + * MEMCG_LRU_TAIL; + * 3. The first attempt to reclaim an memcg below reclaimable size threshold, + * which triggers MEMCG_LRU_TAIL; + * 4. The second attempt to reclaim an memcg below reclaimable size threshold, + * which triggers MEMCG_LRU_YOUNG; + * 5. Attempting to reclaim an memcg below min, which triggers MEMCG_LRU_YOUNG; + * 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG; + * 7. Offlining an memcg, which triggers MEMCG_LRU_OLD. + * + * Note that memcg LRU only applies to global reclaim, and the round-robin + * incrementing of their max_seq counters ensures the eventual fairness to all + * eligible memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). + */ +#define MEMCG_NR_GENS 2 +#define MEMCG_NR_BINS 8 + +struct lru_gen_memcg { + /* the per-node memcg generation counter */ + unsigned long seq; + /* each memcg has one lru_gen_folio per node */ + unsigned long nr_memcgs[MEMCG_NR_GENS]; + /* per-node lru_gen_folio list for global reclaim */ + struct hlist_nulls_head fifo[MEMCG_NR_GENS][MEMCG_NR_BINS]; + /* protects the above */ + spinlock_t lock; +}; + +void lru_gen_init_pgdat(struct pglist_data *pgdat); + void lru_gen_init_memcg(struct mem_cgroup *memcg); void lru_gen_exit_memcg(struct mem_cgroup *memcg); -#endif +void lru_gen_online_memcg(struct mem_cgroup *memcg); +void lru_gen_offline_memcg(struct mem_cgroup *memcg); +void lru_gen_release_memcg(struct mem_cgroup *memcg); +void lru_gen_rotate_memcg(struct lruvec *lruvec, int op); + +#else /* !CONFIG_MEMCG */ + +#define MEMCG_NR_GENS 1 + +struct lru_gen_memcg { +}; + +static inline void lru_gen_init_pgdat(struct pglist_data *pgdat) +{ +} + +#endif /* CONFIG_MEMCG */ #else /* !CONFIG_LRU_GEN */ +static inline void lru_gen_init_pgdat(struct pglist_data *pgdat) +{ +} + static inline void lru_gen_init_lruvec(struct lruvec *lruvec) { } @@ -494,6 +587,7 @@ static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) } #ifdef CONFIG_MEMCG + static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) { } @@ -501,7 +595,24 @@ static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg) { } -#endif + +static inline void lru_gen_online_memcg(struct mem_cgroup *memcg) +{ +} + +static inline void lru_gen_offline_memcg(struct mem_cgroup *memcg) +{ +} + +static inline void lru_gen_release_memcg(struct mem_cgroup *memcg) +{ +} + +static inline void lru_gen_rotate_memcg(struct lruvec *lruvec, int op) +{ +} + +#endif /* CONFIG_MEMCG */ #endif /* CONFIG_LRU_GEN */ @@ -1243,6 +1354,8 @@ typedef struct pglist_data { #ifdef CONFIG_LRU_GEN /* kswap mm walk data */ struct lru_gen_mm_walk mm_walk; + /* lru_gen_folio list */ + struct lru_gen_memcg memcg_lru; #endif CACHELINE_PADDING(_pad2_); -- cgit v1.2.3 From 1ef488edd6c4d447784710974f049628c2890481 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 23 Dec 2022 16:56:16 +0100 Subject: mm/mprotect: drop pgprot_t parameter from change_protection() Being able to provide a custom protection opens the door for inconsistencies and BUGs: for example, accidentally allowing for more permissions than desired by other mechanisms (e.g., softdirty tracking). vma->vm_page_prot should be the single source of truth. Only PROT_NUMA is special: there is no way we can erroneously allow for more permissions when removing all permissions. Special-case using the MM_CP_PROT_NUMA flag. [david@redhat.com: PAGE_NONE might not be defined without CONFIG_NUMA_BALANCING] Link: https://lkml.kernel.org/r/5084ff1c-ebb3-f918-6a60-bacabf550a88@redhat.com Link: https://lkml.kernel.org/r/20221223155616.297723-3-david@redhat.com Signed-off-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Nadav Amit Cc: Peter Xu Signed-off-by: Andrew Morton --- include/linux/mm.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index d68579bf8484..329ed67edd76 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2134,8 +2134,7 @@ bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr, pte_t pte); extern unsigned long change_protection(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, - unsigned long end, pgprot_t newprot, - unsigned long cp_flags); + unsigned long end, unsigned long cp_flags); extern int mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma, struct vm_area_struct **pprev, unsigned long start, unsigned long end, unsigned long newflags); -- cgit v1.2.3 From 3783e1721b650588938d28e4a084a1c9748361c8 Mon Sep 17 00:00:00 2001 From: Kele Huang Date: Sat, 24 Dec 2022 01:02:33 -0500 Subject: mm: fix comment of page table counter Commit af5b0f6a09e42 ("mm: consolidate page table accounting") consolidates page table accounting to a single counter in struct mm_struct {} as mm->pgtables_bytes. So the meanning of this counter should be the size of all page tables now. Link: https://lkml.kernel.org/r/20221224060233.417827-1-kele.huang@columbia.edu Signed-off-by: Kele Huang Cc: Arnd Bergmann Cc: Colin Cross Cc: David Hildenbrand Cc: Hugh Dickins Cc: Liam Howlett Cc: Matthew Wilcox (Oracle) Cc: Pasha Tatashin Cc: Peter Xu Cc: Vlastimil Babka Cc: Yu Zhao Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 1118e381fcdc..10b6eb311ede 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -647,7 +647,7 @@ struct mm_struct { atomic_t mm_count; #ifdef CONFIG_MMU - atomic_long_t pgtables_bytes; /* PTE page table pages */ + atomic_long_t pgtables_bytes; /* size of all page tables */ #endif int map_count; /* number of VMAs */ -- cgit v1.2.3 From cff61bbc717bfddd6e433fe142b8e70b21546a1d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Dec 2022 06:10:29 -1000 Subject: jbd2,ocfs2: move jbd2_journal_submit_inode_data_buffers to ocfs2 jbd2_journal_submit_inode_data_buffers is only used by ocfs2, so move it there to prepare for removing generic_writepages. Link: https://lkml.kernel.org/r/20221229161031.391878-5-hch@lst.de Signed-off-by: Christoph Hellwig Cc: Jan Kara Cc: Joel Becker Cc: Joseph Qi Cc: Konstantin Komarov Cc: Mark Fasheh Cc: Matthew Wilcox Cc: Theodore Ts'o Signed-off-by: Andrew Morton --- include/linux/jbd2.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 2170e0cc279d..5962072a4b19 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1570,8 +1570,6 @@ extern int jbd2_journal_inode_ranged_write(handle_t *handle, extern int jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *inode, loff_t start_byte, loff_t length); -extern int jbd2_journal_submit_inode_data_buffers( - struct jbd2_inode *jinode); extern int jbd2_journal_finish_inode_data_buffers( struct jbd2_inode *jinode); extern int jbd2_journal_begin_ordered_truncate(journal_t *journal, -- cgit v1.2.3 From c2ca7a59a4199059556b57cfdf98fcf46039ca6b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Dec 2022 06:10:31 -1000 Subject: mm: remove generic_writepages Now that all external callers are gone, just fold it into do_writepages. Link: https://lkml.kernel.org/r/20221229161031.391878-7-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Cc: Joel Becker Cc: Joseph Qi Cc: Konstantin Komarov Cc: Mark Fasheh Cc: Matthew Wilcox Cc: Theodore Ts'o Signed-off-by: Andrew Morton --- include/linux/writeback.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 06f9291b6fd5..2554b71765e9 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -369,8 +369,6 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb); typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, void *data); -int generic_writepages(struct address_space *mapping, - struct writeback_control *wbc); void tag_pages_for_writeback(struct address_space *mapping, pgoff_t start, pgoff_t end); int write_cache_pages(struct address_space *mapping, -- cgit v1.2.3 From becacb04fdd439d7d1f2a93739161706a2e3e947 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 30 Dec 2022 15:08:42 +0800 Subject: mm: memcg: add folio_memcg_check() Patch series "mm: convert page_idle/damon to use folios", v4. This patch (of 8): Convert page_memcg_check() into folio_memcg_check() and add a page_memcg_check() wrapper. The behaviour of page_memcg_check() is unchanged; tail pages always had a NULL ->memcg_data. Link: https://lkml.kernel.org/r/20221230070849.63358-1-wangkefeng.wang@huawei.com Link: https://lkml.kernel.org/r/20221230070849.63358-2-wangkefeng.wang@huawei.com Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Kefeng Wang Cc: David Hildenbrand Cc: SeongJae Park Cc: Vishal Moola (Oracle) Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 2e08b05bc6bf..26667bf16da5 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -466,34 +466,34 @@ static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) } /* - * page_memcg_check - get the memory cgroup associated with a page - * @page: a pointer to the page struct + * folio_memcg_check - Get the memory cgroup associated with a folio. + * @folio: Pointer to the folio. * - * Returns a pointer to the memory cgroup associated with the page, - * or NULL. This function unlike page_memcg() can take any page - * as an argument. It has to be used in cases when it's not known if a page + * Returns a pointer to the memory cgroup associated with the folio, + * or NULL. This function unlike folio_memcg() can take any folio + * as an argument. It has to be used in cases when it's not known if a folio * has an associated memory cgroup pointer or an object cgroups vector or * an object cgroup. * - * For a non-kmem page any of the following ensures page and memcg binding + * For a non-kmem folio any of the following ensures folio and memcg binding * stability: * - * - the page lock + * - the folio lock * - LRU isolation - * - lock_page_memcg() + * - lock_folio_memcg() * - exclusive reference * - mem_cgroup_trylock_pages() * - * For a kmem page a caller should hold an rcu read lock to protect memcg - * associated with a kmem page from being released. + * For a kmem folio a caller should hold an rcu read lock to protect memcg + * associated with a kmem folio from being released. */ -static inline struct mem_cgroup *page_memcg_check(struct page *page) +static inline struct mem_cgroup *folio_memcg_check(struct folio *folio) { /* - * Because page->memcg_data might be changed asynchronously - * for slab pages, READ_ONCE() should be used here. + * Because folio->memcg_data might be changed asynchronously + * for slabs, READ_ONCE() should be used here. */ - unsigned long memcg_data = READ_ONCE(page->memcg_data); + unsigned long memcg_data = READ_ONCE(folio->memcg_data); if (memcg_data & MEMCG_DATA_OBJCGS) return NULL; @@ -508,6 +508,13 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page) return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } +static inline struct mem_cgroup *page_memcg_check(struct page *page) +{ + if (PageTail(page)) + return NULL; + return folio_memcg_check((struct folio *)page); +} + static inline struct mem_cgroup *get_mem_cgroup_from_objcg(struct obj_cgroup *objcg) { struct mem_cgroup *memcg; @@ -1170,6 +1177,11 @@ static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) return NULL; } +static inline struct mem_cgroup *folio_memcg_check(struct folio *folio) +{ + return NULL; +} + static inline struct mem_cgroup *page_memcg_check(struct page *page) { return NULL; -- cgit v1.2.3 From a79390f5d6a78647fd70856bd42b22d994de0ba2 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 4 Jan 2023 17:52:06 -0500 Subject: mm/mprotect: use long for page accountings and retval Switch to use type "long" for page accountings and retval across the whole procedure of change_protection(). The change should have shrinked the possible maximum page number to be half comparing to previous (ULONG_MAX / 2), but it shouldn't overflow on any system either because the maximum possible pages touched by change protection should be ULONG_MAX / PAGE_SIZE. Two reasons to switch from "unsigned long" to "long": 1. It suites better on count_vm_numa_events(), whose 2nd parameter takes a long type. 2. It paves way for returning negative (error) values in the future. Currently the only caller that consumes this retval is change_prot_numa(), where the unsigned long was converted to an int. Since at it, touching up the numa code to also take a long, so it'll avoid any possible overflow too during the int-size convertion. Link: https://lkml.kernel.org/r/20230104225207.1066932-3-peterx@redhat.com Signed-off-by: Peter Xu Acked-by: Mike Kravetz Acked-by: James Houghton Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: David Hildenbrand Cc: Muchun Song Cc: Nadav Amit Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 4 ++-- include/linux/mm.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index b6b10101bea7..e3aa336df900 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -248,7 +248,7 @@ void hugetlb_vma_lock_release(struct kref *kref); int pmd_huge(pmd_t pmd); int pud_huge(pud_t pud); -unsigned long hugetlb_change_protection(struct vm_area_struct *vma, +long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot, unsigned long cp_flags); @@ -437,7 +437,7 @@ static inline void move_hugetlb_state(struct folio *old_folio, { } -static inline unsigned long hugetlb_change_protection( +static inline long hugetlb_change_protection( struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot, unsigned long cp_flags) diff --git a/include/linux/mm.h b/include/linux/mm.h index 329ed67edd76..4ac5ea4b584c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2132,7 +2132,7 @@ static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma } bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr, pte_t pte); -extern unsigned long change_protection(struct mmu_gather *tlb, +extern long change_protection(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long cp_flags); extern int mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma, -- cgit v1.2.3 From d1751118c88673fe5a948ad82277898e9e284c55 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 4 Jan 2023 17:52:07 -0500 Subject: mm/uffd: detect pgtable allocation failures Before this patch, when there's any pgtable allocation issues happened during change_protection(), the error will be ignored from the syscall. For shmem, there will be an error dumped into the host dmesg. Two issues with that: (1) Doing a trace dump when allocation fails is not anything close to grace. (2) The user should be notified with any kind of such error, so the user can trap it and decide what to do next, either by retrying, or stop the process properly, or anything else. For userfault users, this will change the API of UFFDIO_WRITEPROTECT when pgtable allocation failure happened. It should not normally break anyone, though. If it breaks, then in good ways. One man-page update will be on the way to introduce the new -ENOMEM for UFFDIO_WRITEPROTECT. Not marking stable so we keep the old behavior on the 5.19-till-now kernels. [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20230104225207.1066932-4-peterx@redhat.com Signed-off-by: Peter Xu Reported-by: James Houghton Acked-by: James Houghton Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: David Hildenbrand Cc: Mike Kravetz Cc: Muchun Song Cc: Nadav Amit Signed-off-by: Andrew Morton --- include/linux/userfaultfd_k.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 9df0b9a762cc..3767f18114ef 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -73,7 +73,7 @@ extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start, extern int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, unsigned long len, bool enable_wp, atomic_t *mmap_changing); -extern void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *vma, +extern long uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *vma, unsigned long start, unsigned long len, bool enable_wp); /* mm helpers */ -- cgit v1.2.3 From 9eefefd835e451d340f5e95bc14ffd68b9b99268 Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Thu, 5 Jan 2023 13:04:24 +0100 Subject: mm: remove an ambiguous sentence from kmap_local_folio() kdocs In the kdocs of kmap_local_folio() there is a an ambiguous sentence which suggests to use this API "only when really necessary". On the contrary, since kmap() and kmap_atomic() are deprecated, both kmap_local_folio(), as well as kmap_local_page(), must be preferred to the previous ones. Therefore, remove the above-mentioned sentence exactly how it has previously been done for the kmap_local_page() kdocs in commit 72f1c55adf70 ("highmem: delete a sentence from kmap_local_page() kdocs"). Link: https://lkml.kernel.org/r/20230105120424.30055-1-fmdefrancesco@gmail.com Signed-off-by: Fabio M. De Francesco Reviewed-by: Ira Weiny Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/highmem.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 44242268f53b..daeb0d8e753a 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -119,9 +119,8 @@ static inline void *kmap_local_page(struct page *page); * virtual address of the direct mapping. Only real highmem pages are * temporarily mapped. * - * While it is significantly faster than kmap() for the higmem case it - * comes with restrictions about the pointer validity. Only use when really - * necessary. + * While it is significantly faster than kmap() for the highmem case it + * comes with restrictions about the pointer validity. * * On HIGHMEM enabled systems mapping a highmem page has the side effect of * disabling migration in order to keep the virtual address stable across -- cgit v1.2.3 From 1f8549fce525bc95df40ea3ddbfc6e8e719d188d Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Thu, 5 Jan 2023 13:13:05 +0100 Subject: mm: fix spelling mistake in highmem.h Substitute "higmem" with "highmem" in highmem.h. Link: https://lkml.kernel.org/r/20230105121305.30714-1-fmdefrancesco@gmail.com Signed-off-by: Fabio M. De Francesco Suggested-by: "Matthew Wilcox (Oracle)" Reviewed-by: Ira Weiny Signed-off-by: Andrew Morton --- include/linux/highmem.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index daeb0d8e753a..d7097b8158f2 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -86,8 +86,8 @@ static inline void kmap_flush_unused(void); * virtual address of the direct mapping. Only real highmem pages are * temporarily mapped. * - * While it is significantly faster than kmap() for the higmem case it - * comes with restrictions about the pointer validity. + * While kmap_local_page() is significantly faster than kmap() for the highmem + * case it comes with restrictions about the pointer validity. * * On HIGHMEM enabled systems mapping a highmem page has the side effect of * disabling migration in order to keep the virtual address stable across -- cgit v1.2.3 From bbc61844b4645d54c147a82654ac974bb7be85de Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 4 Jan 2023 14:06:05 +0800 Subject: mm/kasan: simplify and refine kasan_cache code struct 'kasan_cache' has a member 'is_kmalloc' indicating whether its host kmem_cache is a kmalloc cache. With newly introduced is_kmalloc_cache() helper, 'is_kmalloc' and its related function can be replaced and removed. Also 'kasan_cache' is only needed by KASAN generic mode, and not by SW/HW tag modes, so refine its protection macro accordingly, suggested by Andrey Konoval. Link: https://lkml.kernel.org/r/20230104060605.930910-2-feng.tang@intel.com Signed-off-by: Feng Tang Reviewed-by: Andrey Konovalov Acked-by: Vlastimil Babka Acked-by: David Rientjes Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Christoph Lameter Cc: Dmitry Vyukov Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Joonsoo Kim Cc: Pekka Enberg Cc: Roman Gushchin Cc: Vincenzo Frascino Signed-off-by: Andrew Morton --- include/linux/kasan.h | 22 +++++----------------- include/linux/slab_def.h | 2 +- include/linux/slub_def.h | 2 +- 3 files changed, 7 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 5ebbaf672009..f7ef70661ce2 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -96,15 +96,6 @@ static inline bool kasan_has_integrated_init(void) } #ifdef CONFIG_KASAN - -struct kasan_cache { -#ifdef CONFIG_KASAN_GENERIC - int alloc_meta_offset; - int free_meta_offset; -#endif - bool is_kmalloc; -}; - void __kasan_unpoison_range(const void *addr, size_t size); static __always_inline void kasan_unpoison_range(const void *addr, size_t size) { @@ -129,13 +120,6 @@ static __always_inline bool kasan_unpoison_pages(struct page *page, return false; } -void __kasan_cache_create_kmalloc(struct kmem_cache *cache); -static __always_inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) -{ - if (kasan_enabled()) - __kasan_cache_create_kmalloc(cache); -} - void __kasan_poison_slab(struct slab *slab); static __always_inline void kasan_poison_slab(struct slab *slab) { @@ -255,7 +239,6 @@ static inline bool kasan_unpoison_pages(struct page *page, unsigned int order, { return false; } -static inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) {} static inline void kasan_poison_slab(struct slab *slab) {} static inline void kasan_unpoison_object_data(struct kmem_cache *cache, void *object) {} @@ -306,6 +289,11 @@ static inline void kasan_unpoison_task_stack(struct task_struct *task) {} #ifdef CONFIG_KASAN_GENERIC +struct kasan_cache { + int alloc_meta_offset; + int free_meta_offset; +}; + size_t kasan_metadata_size(struct kmem_cache *cache, bool in_object); slab_flags_t kasan_never_merge(void); void kasan_cache_create(struct kmem_cache *cache, unsigned int *size, diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 5834bad8ad78..a61e7d55d0d3 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -72,7 +72,7 @@ struct kmem_cache { int obj_offset; #endif /* CONFIG_DEBUG_SLAB */ -#ifdef CONFIG_KASAN +#ifdef CONFIG_KASAN_GENERIC struct kasan_cache kasan_info; #endif diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index aa0ee1678d29..f6df03f934e5 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -136,7 +136,7 @@ struct kmem_cache { unsigned int *random_seq; #endif -#ifdef CONFIG_KASAN +#ifdef CONFIG_KASAN_GENERIC struct kasan_cache kasan_info; #endif -- cgit v1.2.3 From e9adcfecf572fcfaa9f8525904cf49c709974f73 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Tue, 3 Jan 2023 16:27:32 -0800 Subject: mm: remove zap_page_range and create zap_vma_pages zap_page_range was originally designed to unmap pages within an address range that could span multiple vmas. While working on [1], it was discovered that all callers of zap_page_range pass a range entirely within a single vma. In addition, the mmu notification call within zap_page range does not correctly handle ranges that span multiple vmas. When crossing a vma boundary, a new mmu_notifier_range_init/end call pair with the new vma should be made. Instead of fixing zap_page_range, do the following: - Create a new routine zap_vma_pages() that will remove all pages within the passed vma. Most users of zap_page_range pass the entire vma and can use this new routine. - For callers of zap_page_range not passing the entire vma, instead call zap_page_range_single(). - Remove zap_page_range. [1] https://lore.kernel.org/linux-mm/20221114235507.294320-2-mike.kravetz@oracle.com/ Link: https://lkml.kernel.org/r/20230104002732.232573-1-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Suggested-by: Peter Xu Acked-by: Michal Hocko Acked-by: Peter Xu Acked-by: Heiko Carstens [s390] Reviewed-by: Christoph Hellwig Cc: Christian Borntraeger Cc: Christian Brauner Cc: Dave Hansen Cc: David Hildenbrand Cc: Eric Dumazet Cc: Matthew Wilcox Cc: Michael Ellerman Cc: Nadav Amit Cc: Palmer Dabbelt Cc: Rik van Riel Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/mm.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 4ac5ea4b584c..eb5bfc77c2c2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1977,10 +1977,13 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); -void zap_page_range(struct vm_area_struct *vma, unsigned long address, - unsigned long size); void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *details); +static inline void zap_vma_pages(struct vm_area_struct *vma) +{ + zap_page_range_single(vma, vma->vm_start, + vma->vm_end - vma->vm_start, NULL); +} void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, struct vm_area_struct *start_vma, unsigned long start, unsigned long end); -- cgit v1.2.3 From fc4f4be9b5271e43eeb4c675d190fa9734de9ea3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 2 Jan 2023 17:08:54 +0100 Subject: mm/nommu: factor out check for NOMMU shared mappings into is_nommu_shared_mapping() Patch series "mm/nommu: don't use VM_MAYSHARE for MAP_PRIVATE mappings". Trying to reduce the confusion around VM_SHARED and VM_MAYSHARE first requires !CONFIG_MMU to stop using VM_MAYSHARE for MAP_PRIVATE mappings. CONFIG_MMU only sets VM_MAYSHARE for MAP_SHARED mappings. This paves the way for further VM_MAYSHARE and VM_SHARED cleanups: for example, renaming VM_MAYSHARED to VM_MAP_SHARED to make it cleaner what is actually means. Let's first get the weird case out of the way and not use VM_MAYSHARE in MAP_PRIVATE mappings, using a new VM_MAYOVERLAY flag instead. This patch (of 3): We want to stop using VM_MAYSHARE in private mappings to pave the way for clarifying the semantics of VM_MAYSHARE vs. VM_SHARED and reduce the confusion. While CONFIG_MMU uses VM_MAYSHARE to represent MAP_SHARED, !CONFIG_MMU also sets VM_MAYSHARE for selected R/O private file mappings that are an effective overlay of a file mapping. Let's factor out all relevant VM_MAYSHARE checks in !CONFIG_MMU code into is_nommu_shared_mapping() first. Note that whenever VM_SHARED is set, VM_MAYSHARE must be set as well (unless there is a serious BUG). So there is not need to test for VM_SHARED manually. No functional change intended. Link: https://lkml.kernel.org/r/20230102160856.500584-1-david@redhat.com Link: https://lkml.kernel.org/r/20230102160856.500584-2-david@redhat.com Signed-off-by: David Hildenbrand Cc: Arnd Bergmann Cc: David Hildenbrand Cc: Greg Kroah-Hartman Cc: Jens Axboe Cc: Nicolas Pitre Cc: Pavel Begunkov Signed-off-by: Andrew Morton --- include/linux/mm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index eb5bfc77c2c2..791bac40bf8e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1347,6 +1347,21 @@ static inline bool is_cow_mapping(vm_flags_t flags) return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; } +#ifndef CONFIG_MMU +static inline bool is_nommu_shared_mapping(vm_flags_t flags) +{ + /* + * NOMMU shared mappings are ordinary MAP_SHARED mappings and selected + * R/O MAP_PRIVATE file mappings that are an effective R/O overlay of + * a file mapping. R/O MAP_PRIVATE mappings might still modify + * underlying memory if ptrace is active, so this is only possible if + * ptrace does not apply. Note that there is no mprotect() to upgrade + * write permissions later. + */ + return flags & VM_MAYSHARE; +} +#endif + #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define SECTION_IN_PAGE_FLAGS #endif -- cgit v1.2.3 From b6b7a8faf05c709cd9f63d3b7d9c66bd91bc3b0d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 2 Jan 2023 17:08:55 +0100 Subject: mm/nommu: don't use VM_MAYSHARE for MAP_PRIVATE mappings Let's stop using VM_MAYSHARE for MAP_PRIVATE mappings and use VM_MAYOVERLAY instead. Rewrite determine_vm_flags() to make the whole logic easier to digest, and to cleanly separate MAP_PRIVATE vs. MAP_SHARED. No functional change intended. Link: https://lkml.kernel.org/r/20230102160856.500584-3-david@redhat.com Signed-off-by: David Hildenbrand Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Jens Axboe Cc: Nicolas Pitre Cc: Pavel Begunkov Signed-off-by: Andrew Morton --- include/linux/mm.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 791bac40bf8e..8a8563359946 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -276,7 +276,12 @@ extern unsigned int kobjsize(const void *objp); #define VM_MAYSHARE 0x00000080 #define VM_GROWSDOWN 0x00000100 /* general info on the segment */ +#ifdef CONFIG_MMU #define VM_UFFD_MISSING 0x00000200 /* missing pages tracking */ +#else /* CONFIG_MMU */ +#define VM_MAYOVERLAY 0x00000200 /* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */ +#define VM_UFFD_MISSING 0 +#endif /* CONFIG_MMU */ #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ #define VM_UFFD_WP 0x00001000 /* wrprotect pages tracking */ @@ -1358,7 +1363,7 @@ static inline bool is_nommu_shared_mapping(vm_flags_t flags) * ptrace does not apply. Note that there is no mprotect() to upgrade * write permissions later. */ - return flags & VM_MAYSHARE; + return flags & (VM_MAYSHARE | VM_MAYOVERLAY); } #endif -- cgit v1.2.3 From 8788f6781486769d9598dcaedc3fe0eb12fc3e59 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 30 Dec 2022 14:52:51 -0700 Subject: mm: add vma_has_recency() Add vma_has_recency() to indicate whether a VMA may exhibit temporal locality that the LRU algorithm relies on. This function returns false for VMAs marked by VM_SEQ_READ or VM_RAND_READ. While the former flag indicates linear access, i.e., a special case of spatial locality, both flags indicate a lack of temporal locality, i.e., the reuse of an area within a relatively small duration. "Recency" is chosen over "locality" to avoid confusion between temporal and spatial localities. Before this patch, the active/inactive LRU only ignored the accessed bit from VMAs marked by VM_SEQ_READ. After this patch, the active/inactive LRU and MGLRU share the same logic: they both ignore the accessed bit if vma_has_recency() returns false. For the active/inactive LRU, the following fio test showed a [6, 8]% increase in IOPS when randomly accessing mapped files under memory pressure. kb=$(awk '/MemTotal/ { print $2 }' /proc/meminfo) kb=$((kb - 8*1024*1024)) modprobe brd rd_nr=1 rd_size=$kb dd if=/dev/zero of=/dev/ram0 bs=1M mkfs.ext4 /dev/ram0 mount /dev/ram0 /mnt/ swapoff -a fio --name=test --directory=/mnt/ --ioengine=mmap --numjobs=8 \ --size=8G --rw=randrw --time_based --runtime=10m \ --group_reporting The discussion that led to this patch is here [1]. Additional test results are available in that thread. [1] https://lore.kernel.org/r/Y31s%2FK8T85jh05wH@google.com/ Link: https://lkml.kernel.org/r/20221230215252.2628425-1-yuzhao@google.com Signed-off-by: Yu Zhao Cc: Alexander Viro Cc: Andrea Righi Cc: Johannes Weiner Cc: Michael Larabel Signed-off-by: Andrew Morton --- include/linux/mm_inline.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index acf03147fff8..4abebf2615a3 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -594,4 +594,12 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr, #endif } +static inline bool vma_has_recency(struct vm_area_struct *vma) +{ + if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ)) + return false; + + return true; +} + #endif -- cgit v1.2.3 From 17e810229cb3068b692fa078bd9b3a6527e0866a Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Fri, 30 Dec 2022 14:52:52 -0700 Subject: mm: support POSIX_FADV_NOREUSE This patch adds POSIX_FADV_NOREUSE to vma_has_recency() so that the LRU algorithm can ignore access to mapped files marked by this flag. The advantages of POSIX_FADV_NOREUSE are: 1. Unlike MADV_SEQUENTIAL and MADV_RANDOM, it does not alter the default readahead behavior. 2. Unlike MADV_SEQUENTIAL and MADV_RANDOM, it does not split VMAs and therefore does not take mmap_lock. 3. Unlike MADV_COLD, setting it has a negligible cost, regardless of how many pages it affects. Its limitations are: 1. Like POSIX_FADV_RANDOM and POSIX_FADV_SEQUENTIAL, it currently does not support range. IOW, its scope is the entire file. 2. It currently does not ignore access through file descriptors. Specifically, for the active/inactive LRU, given a file page shared by two users and one of them having set POSIX_FADV_NOREUSE on the file, this page will be activated upon the second user accessing it. This corner case can be covered by checking POSIX_FADV_NOREUSE before calling folio_mark_accessed() on the read path. But it is considered not worth the effort. There have been a few attempts to support POSIX_FADV_NOREUSE, e.g., [1]. This time the goal is to fill a niche: a few desktop applications, e.g., large file transferring and video encoding/decoding, want fast file streaming with mmap() rather than direct IO. Among those applications, an SVT-AV1 regression was reported when running with MGLRU [2]. The following test can reproduce that regression. kb=$(awk '/MemTotal/ { print $2 }' /proc/meminfo) kb=$((kb - 8*1024*1024)) modprobe brd rd_nr=1 rd_size=$kb dd if=/dev/zero of=/dev/ram0 bs=1M mkfs.ext4 /dev/ram0 mount /dev/ram0 /mnt/ swapoff -a fallocate -l 8G /mnt/swapfile mkswap /mnt/swapfile swapon /mnt/swapfile wget http://ultravideo.cs.tut.fi/video/Bosphorus_3840x2160_120fps_420_8bit_YUV_Y4M.7z 7z e -o/mnt/ Bosphorus_3840x2160_120fps_420_8bit_YUV_Y4M.7z SvtAv1EncApp --preset 12 -w 3840 -h 2160 \ -i /mnt/Bosphorus_3840x2160.y4m For MGLRU, the following change showed a [9-11]% increase in FPS, which makes it on par with the active/inactive LRU. patch Source/App/EncApp/EbAppMain.c < #include 35d35 < #include /* _O_BINARY */ 117a118 > posix_fadvise(config->mmap.fd, 0, 0, POSIX_FADV_NOREUSE); EOF [1] https://lore.kernel.org/r/1308923350-7932-1-git-send-email-andrea@betterlinux.com/ [2] https://openbenchmarking.org/result/2209259-PTS-MGLRU8GB57 Link: https://lkml.kernel.org/r/20221230215252.2628425-2-yuzhao@google.com Signed-off-by: Yu Zhao Cc: Alexander Viro Cc: Andrea Righi Cc: Johannes Weiner Cc: Michael Larabel Signed-off-by: Andrew Morton --- include/linux/fs.h | 2 ++ include/linux/mm_inline.h | 3 +++ 2 files changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index c1769a2c5d70..d353c262d669 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -166,6 +166,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File supports DIRECT IO */ #define FMODE_CAN_ODIRECT ((__force fmode_t)0x400000) +#define FMODE_NOREUSE ((__force fmode_t)0x800000) + /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x4000000) diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 4abebf2615a3..26dcbda07e92 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -599,6 +599,9 @@ static inline bool vma_has_recency(struct vm_area_struct *vma) if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ)) return false; + if (vma->vm_file && (vma->vm_file->f_mode & FMODE_NOREUSE)) + return false; + return true; } -- cgit v1.2.3 From 02d65d6fb1aae151570c8bfd1bd77a8153d2e607 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Fri, 6 Jan 2023 15:52:51 -0600 Subject: mm: introduce folio_is_pfmemalloc Add a folio equivalent for page_is_pfmemalloc. This removes two instances of page_is_pfmemalloc(folio_page(folio, 0)) so the folio can be used directly. Link: https://lkml.kernel.org/r/20230106215251.599222-1-sidhartha.kumar@oracle.com Suggested-by: Matthew Wilcox Signed-off-by: Sidhartha Kumar Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: SeongJae Park Acked-by: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/mm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8a8563359946..76c97cb8ee9a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1926,6 +1926,21 @@ static inline bool page_is_pfmemalloc(const struct page *page) return (uintptr_t)page->lru.next & BIT(1); } +/* + * Return true only if the folio has been allocated with + * ALLOC_NO_WATERMARKS and the low watermark was not + * met implying that the system is under some pressure. + */ +static inline bool folio_is_pfmemalloc(const struct folio *folio) +{ + /* + * lru.next has bit 1 set if the page is allocated from the + * pfmemalloc reserves. Callers may simply overwrite it if + * they do not need to preserve that information. + */ + return (uintptr_t)folio->lru.next & BIT(1); +} + /* * Only to be called by the page allocator on a freshly allocated * page. -- cgit v1.2.3 From 2c3e674465e73e2f7eb52c39dc5c5e97e78e68ea Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Thu, 22 Dec 2022 18:50:47 +0000 Subject: firmware: arm_scmi: Refactor device create/destroy helpers Refactor SCMI device create/destroy helpers: it is now possible to ask for the creation of all the currently requested devices for a whole protocol, not only for the creation of a single well-defined device. While at that, re-instate uniqueness checks on the creation of SCMI SystemPower devices. Signed-off-by: Cristian Marussi Link: https://lore.kernel.org/r/20221222185049.737625-8-cristian.marussi@arm.com Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 4f765bc788ff..0ce5746a4470 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -804,11 +804,6 @@ struct scmi_device { #define to_scmi_dev(d) container_of(d, struct scmi_device, dev) -struct scmi_device * -scmi_device_create(struct device_node *np, struct device *parent, int protocol, - const char *name); -void scmi_device_destroy(struct scmi_device *scmi_dev); - struct scmi_device_id { u8 protocol_id; const char *name; -- cgit v1.2.3 From 338a588e9db3c5ea7a35bb332cb3bdb532fd1f08 Mon Sep 17 00:00:00 2001 From: Mike Leach Date: Mon, 16 Jan 2023 12:49:14 +0000 Subject: coresight: trace-id: Add API to dynamically assign Trace ID values The existing mechanism to assign Trace ID values to sources is limited and does not scale for larger multicore / multi trace source systems. The API introduces functions that reserve IDs based on availabilty represented by a coresight_trace_id_map structure. This records the used and free IDs in a bitmap. CPU bound sources such as ETMs use the coresight_trace_id_get_cpu_id coresight_trace_id_put_cpu_id pair of functions. The API will record the ID associated with the CPU. This ensures that the same ID will be re-used while perf events are active on the CPU. The put_cpu_id function will pend release of the ID until all perf cs_etm sessions are complete. For backward compatibility the functions will attempt to use the same CPU IDs as the legacy system would have used if these are still available. Non-cpu sources, such as the STM can use coresight_trace_id_get_system_id / coresight_trace_id_put_system_id. Signed-off-by: Mike Leach [ Fix checkpatch warning in drivers/hwtracing/coresight/coresight-trace-id.c ] Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20230116124928.5440-2-mike.leach@linaro.org --- include/linux/coresight-pmu.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h index 6c2fd6cc5a98..ffff4e6277e5 100644 --- a/include/linux/coresight-pmu.h +++ b/include/linux/coresight-pmu.h @@ -10,6 +10,16 @@ #define CORESIGHT_ETM_PMU_NAME "cs_etm" #define CORESIGHT_ETM_PMU_SEED 0x10 +/* + * The legacy Trace ID system based on fixed calculation from the cpu + * number. This has been replaced by drivers using a dynamic allocation + * system - but need to retain the legacy algorithm for backward comparibility + * in certain situations:- + * a) new perf running on older systems that generate the legacy mapping + * b) older tools that may not update at the same time as the kernel. + */ +#define CORESIGHT_LEGACY_CPU_TRACE_ID(cpu) (0x10 + (cpu * 2)) + /* * Below are the definition of bit offsets for perf option, and works as * arbitrary values for all ETM versions. -- cgit v1.2.3 From 42708bac18cf7f09c058058cd4564f879c53b900 Mon Sep 17 00:00:00 2001 From: Mike Leach Date: Mon, 16 Jan 2023 12:49:20 +0000 Subject: coresight: etmX.X: stm: Remove trace_id() callback CoreSight sources provide a callback (.trace_id) in the standard source ops which returns the ID to the core code. This was used to check that sources all had a unique Trace ID. Uniqueness is now gauranteed by the Trace ID allocation system, and the check code has been removed from the core. This patch removes the unneeded and unused .trace_id source ops from the ops structure and implementations in etm3x, etm4x and stm. Signed-off-by: Mike Leach Reviewed-by: Suzuki K Poulose Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20230116124928.5440-8-mike.leach@linaro.org --- include/linux/coresight.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 1554021231f9..e241eb88dfb9 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -314,14 +314,11 @@ struct coresight_ops_link { * Operations available for sources. * @cpu_id: returns the value of the CPU number this component * is associated to. - * @trace_id: returns the value of the component's trace ID as known - * to the HW. * @enable: enables tracing for a source. * @disable: disables tracing for a source. */ struct coresight_ops_source { int (*cpu_id)(struct coresight_device *csdev); - int (*trace_id)(struct coresight_device *csdev); int (*enable)(struct coresight_device *csdev, struct perf_event *event, u32 mode); void (*disable)(struct coresight_device *csdev, -- cgit v1.2.3 From 206bb3858949b6509de75f7d3697303a073cbaa1 Mon Sep 17 00:00:00 2001 From: Mike Leach Date: Mon, 16 Jan 2023 12:49:21 +0000 Subject: coresight: trace id: Remove legacy get trace ID function. Removes legacy coresight_get_trace_id() function now its use has been removed from the ETM code. Signed-off-by: Mike Leach Reviewed-by: Suzuki K Poulose Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20230116124928.5440-9-mike.leach@linaro.org --- include/linux/coresight-pmu.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h index ffff4e6277e5..624f4843453e 100644 --- a/include/linux/coresight-pmu.h +++ b/include/linux/coresight-pmu.h @@ -8,7 +8,6 @@ #define _LINUX_CORESIGHT_PMU_H #define CORESIGHT_ETM_PMU_NAME "cs_etm" -#define CORESIGHT_ETM_PMU_SEED 0x10 /* * The legacy Trace ID system based on fixed calculation from the cpu @@ -44,15 +43,4 @@ #define ETM4_CFG_BIT_RETSTK 12 #define ETM4_CFG_BIT_VMID_OPT 15 -static inline int coresight_get_trace_id(int cpu) -{ - /* - * A trace ID of value 0 is invalid, so let's start at some - * random value that fits in 7 bits and go from there. Since - * the common convention is to have data trace IDs be I(N) + 1, - * set instruction trace IDs as a function of the CPU number. - */ - return (CORESIGHT_ETM_PMU_SEED + (cpu * 2)); -} - #endif -- cgit v1.2.3 From aa19bb4c35834dd574b36d482cc44c78816e6fcd Mon Sep 17 00:00:00 2001 From: Mike Leach Date: Mon, 16 Jan 2023 12:49:26 +0000 Subject: coresight: events: PERF_RECORD_AUX_OUTPUT_HW_ID used for Trace ID Use the perf_report_aux_output_id() call to output the CoreSight trace ID and associated CPU as a PERF_RECORD_AUX_OUTPUT_HW_ID record in the perf.data file. Signed-off-by: Mike Leach Reviewed-by: Suzuki K Poulose Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20230116124928.5440-14-mike.leach@linaro.org --- include/linux/coresight-pmu.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h index 624f4843453e..51ac441a37c3 100644 --- a/include/linux/coresight-pmu.h +++ b/include/linux/coresight-pmu.h @@ -7,6 +7,8 @@ #ifndef _LINUX_CORESIGHT_PMU_H #define _LINUX_CORESIGHT_PMU_H +#include + #define CORESIGHT_ETM_PMU_NAME "cs_etm" /* @@ -43,4 +45,16 @@ #define ETM4_CFG_BIT_RETSTK 12 #define ETM4_CFG_BIT_VMID_OPT 15 +/* + * Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload. + * Used to associate a CPU with the CoreSight Trace ID. + * [07:00] - Trace ID - uses 8 bits to make value easy to read in file. + * [59:08] - Unused (SBZ) + * [63:60] - Version + */ +#define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0) +#define CS_AUX_HW_ID_VERSION_MASK GENMASK_ULL(63, 60) + +#define CS_AUX_HW_ID_CURR_VERSION 0 + #endif -- cgit v1.2.3 From 93c473948c588978cd55d9a3adad8b3e8057aa21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B3=20=C3=81gila=20Bitsch?= Date: Fri, 13 Jan 2023 01:53:19 +0100 Subject: usb: gadget: add WebUSB landing page support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a custom (non-USB IF) extension to the USB standard: https://wicg.github.io/webusb/ This specification is published under the W3C Community Contributor Agreement, which in particular allows to implement the specification without any royalties. The specification allows USB gadgets to announce an URL to landing page and describes a Javascript interface for websites to interact with the USB gadget, if the user allows it. It is currently supported by Chromium-based browsers, such as Chrome, Edge and Opera on all major operating systems including Linux. This patch adds optional support for Linux-based USB gadgets wishing to expose such a landing page. During device enumeration, a host recognizes that the announced USB version is at least 2.01, which means, that there are BOS descriptors available. The device than announces WebUSB support using a platform device capability. This includes a vendor code under which the landing page URL can be retrieved using a vendor-specific request. Previously, the BOS descriptors would unconditionally include an LPM related descriptor, as BOS descriptors were only ever sent when the device was LPM capable. As this is no longer the case, this patch puts this descriptor behind a lpm_capable condition. Usage is modeled after os_desc descriptors: echo 1 > webusb/use echo "https://www.kernel.org" > webusb/landingPage lsusb will report the device with the following lines: Platform Device Capability: bLength 24 bDescriptorType 16 bDevCapabilityType 5 bReserved 0 PlatformCapabilityUUID {3408b638-09a9-47a0-8bfd-a0768815b665} WebUSB: bcdVersion 1.00 bVendorCode 0 iLandingPage 1 https://www.kernel.org Signed-off-by: Jó Ágila Bitsch Link: https://lore.kernel.org/r/Y8Crf8P2qAWuuk/F@jo-einhundert Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/composite.h | 7 ++++ include/linux/usb/webusb.h | 83 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 include/linux/usb/webusb.h (limited to 'include/linux') diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 43ac3fa760db..91d22c3ed458 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -474,6 +475,12 @@ struct usb_composite_dev { struct usb_configuration *os_desc_config; unsigned int use_os_string:1; + /* WebUSB */ + u16 bcd_webusb_version; + u8 b_webusb_vendor_code; + char landing_page[WEBUSB_URL_RAW_MAX_LENGTH]; + unsigned int use_webusb:1; + /* private: */ /* internals */ unsigned int suspended:1; diff --git a/include/linux/usb/webusb.h b/include/linux/usb/webusb.h new file mode 100644 index 000000000000..b430d84357f3 --- /dev/null +++ b/include/linux/usb/webusb.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * WebUSB descriptors and constants + * + * Copyright (C) 2023 Jó Ágila Bitsch + */ + +#ifndef __LINUX_USB_WEBUSB_H +#define __LINUX_USB_WEBUSB_H + +#include "uapi/linux/usb/ch9.h" + +/* + * little endian PlatformCapablityUUID for WebUSB + * 3408b638-09a9-47a0-8bfd-a0768815b665 + * to identify Platform Device Capability descriptors as referring to WebUSB + * + * the UUID above MUST be sent over the wire as the byte sequence: + * {0x38, 0xB6, 0x08, 0x34, 0xA9, 0x09, 0xA0, 0x47, 0x8B, 0xFD, 0xA0, 0x76, 0x88, 0x15, 0xB6, 0x65}. + */ +#define WEBUSB_UUID \ + UUID_INIT(0x38b60834, 0xa909, 0xa047, 0x8b, 0xfd, 0xa0, 0x76, 0x88, 0x15, 0xb6, 0x65) + +/* + * WebUSB Platform Capability data + * + * A device announces support for the + * WebUSB command set by including the following Platform Descriptor Data in its + * Binary Object Store associated with the WebUSB_UUID above. + * See: https://wicg.github.io/webusb/#webusb-platform-capability-descriptor + */ +struct usb_webusb_cap_data { + __le16 bcdVersion; +#define WEBUSB_VERSION_1_00 cpu_to_le16(0x0100) /* currently only version 1.00 is defined */ + u8 bVendorCode; + u8 iLandingPage; +#define WEBUSB_LANDING_PAGE_NOT_PRESENT 0 +#define WEBUSB_LANDING_PAGE_PRESENT 1 /* we chose the fixed index 1 for the URL descriptor */ +} __packed; + +#define USB_WEBUSB_CAP_DATA_SIZE 4 + +/* + * Get URL Request + * + * The request to fetch an URL is defined in https://wicg.github.io/webusb/#get-url as: + * bmRequestType: (USB_DIR_IN | USB_TYPE_VENDOR) = 11000000B + * bRequest: bVendorCode + * wValue: iLandingPage + * wIndex: GET_URL = 2 + * wLength: Descriptor Length (typically U8_MAX = 255) + * Data: URL Descriptor + */ +#define WEBUSB_GET_URL 2 + +/* + * This descriptor contains a single URL and is returned by the Get URL request. + * + * See: https://wicg.github.io/webusb/#url-descriptor + */ +struct webusb_url_descriptor { + u8 bLength; +#define WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH 3 + u8 bDescriptorType; +#define WEBUSB_URL_DESCRIPTOR_TYPE 3 + u8 bScheme; +#define WEBUSB_URL_SCHEME_HTTP 0 +#define WEBUSB_URL_SCHEME_HTTPS 1 +#define WEBUSB_URL_SCHEME_NONE 255 + u8 URL[U8_MAX - WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH]; +} __packed; + +/* + * Buffer size to hold the longest URL that can be in an URL descriptor + * + * The descriptor can be U8_MAX bytes long. + * WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH bytes are used for a header. + * Since the longest prefix that might be stripped is "https://", we may accommodate an additional + * 8 bytes. + */ +#define WEBUSB_URL_RAW_MAX_LENGTH (U8_MAX - WEBUSB_URL_DESCRIPTOR_HEADER_LENGTH + 8) + +#endif /* __LINUX_USB_USBNET_H */ -- cgit v1.2.3 From 52af7863508e94f3e05e43a3f2f3943a71dcffab Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 17 Jan 2023 17:21:20 +0200 Subject: device property: Make fwnode_graph_for_each_endpoint() consistent Make fwnode_graph_for_each_endpoint() consistent with the rest of for_each_*() definitions in the file, i.e. use the form of for (iter = func(NULL); iter; \ iter = func(iter)) as it's done in all the rest of the similar macro definitions. Signed-off-by: Andy Shevchenko Acked-by: Rafael J. Wysocki Acked-by: Sakari Ailus Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/20230117152120.42531-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/property.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 1ffd4f9bb67b..0a29db15ff34 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -436,9 +436,9 @@ fwnode_graph_get_endpoint_by_id(const struct fwnode_handle *fwnode, unsigned int fwnode_graph_get_endpoint_count(struct fwnode_handle *fwnode, unsigned long flags); -#define fwnode_graph_for_each_endpoint(fwnode, child) \ - for (child = NULL; \ - (child = fwnode_graph_get_next_endpoint(fwnode, child)); ) +#define fwnode_graph_for_each_endpoint(fwnode, child) \ + for (child = fwnode_graph_get_next_endpoint(fwnode, NULL); child; \ + child = fwnode_graph_get_next_endpoint(fwnode, child)) int fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode, struct fwnode_endpoint *endpoint); -- cgit v1.2.3 From 41000b03af9e15075061cdbeea461cfa5e12a8eb Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Thu, 24 Nov 2022 13:39:08 +0100 Subject: earlycon: Increase options size Now that the clock frequency is also part of the options, 16 bytes is too little. Without this patch dmesg does not show the whole options, Eg: earlycon: uart0 at MMIO32 0x00000000fedc9000 (options '115200n8,480000') instead of: '115200n8,48000000' Signed-off-by: Ricardo Ribalda Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20221123-serial-clk-v3-2-49c516980ae0@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index fd59f600094a..026294c6ccb8 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -781,7 +781,7 @@ static inline int uart_poll_timeout(struct uart_port *port) struct earlycon_device { struct console *con; struct uart_port port; - char options[16]; /* e.g., 115200n8 */ + char options[32]; /* e.g., 115200n8 */ unsigned int baud; }; -- cgit v1.2.3 From ef460db2a7c1df5eda2ea6011e7586e54e23b8c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 25 Nov 2022 15:05:04 +0200 Subject: serial: 8250: Use defined IER bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of literal 0x0f, add a define for enabling all IER bits the 8250 driver is interested in. Don't make the define for combined flags part of UAPI. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221125130509.8482-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial.h b/include/linux/serial.h index 3d6fe3ef92cf..ad6e1c37e2d5 100644 --- a/include/linux/serial.h +++ b/include/linux/serial.h @@ -12,6 +12,11 @@ #include #include +#define UART_IER_ALL_INTR (UART_IER_MSI | \ + UART_IER_RLSI | \ + UART_IER_THRI | \ + UART_IER_RDI) + /* Helper for dealing with UART_LCR_WLEN* defines */ #define UART_LCR_WLEN(x) ((x) - 5) -- cgit v1.2.3 From d9c1d3cbdeec94f077679b73ed5ce3a4fe4bf4b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 25 Nov 2022 15:05:05 +0200 Subject: serial: 8250: Name MSR literals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add UART_MSR_STATUS_BITS for CD, RI, DSR & CTS. Use names for the literal. Don't make the define for combined flags part of UAPI. Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20221125130509.8482-3-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial.h b/include/linux/serial.h index ad6e1c37e2d5..bfda927dde15 100644 --- a/include/linux/serial.h +++ b/include/linux/serial.h @@ -28,6 +28,11 @@ static inline bool uart_lsr_tx_empty(u16 lsr) return (lsr & UART_LSR_BOTH_EMPTY) == UART_LSR_BOTH_EMPTY; } +#define UART_MSR_STATUS_BITS (UART_MSR_DCD | \ + UART_MSR_RI | \ + UART_MSR_DSR | \ + UART_MSR_CTS) + /* * Counters of the input lines (CTS, DSR, RI, CD) interrupts */ -- cgit v1.2.3 From feb36abbedea2644bf31693aa287a33a3a9fbd7c Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Thu, 12 Jan 2023 09:01:31 +0100 Subject: tty: vt: remove struct uni_screen MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It contains only lines with pointers to characters (u32s). So use simple clear 'u32 **lines' all over the code. This avoids zero-length arrays. It also makes the allocation less error-prone (size of the struct wasn't taken into account at all). Signed-off-by: Jiri Slaby (SUSE) Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20230112080136.4929-6-jirislaby@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/console_struct.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h index 1518568aaf0f..539f1cd45309 100644 --- a/include/linux/console_struct.h +++ b/include/linux/console_struct.h @@ -18,7 +18,6 @@ #include struct uni_pagedict; -struct uni_screen; #define NPAR 16 #define VC_TABSTOPS_COUNT 256U @@ -159,7 +158,7 @@ struct vc_data { struct vc_data **vc_display_fg; /* [!] Ptr to var holding fg console for this display */ struct uni_pagedict *uni_pagedict; struct uni_pagedict **uni_pagedict_loc; /* [!] Location of uni_pagedict variable for this console */ - struct uni_screen *vc_uni_screen; /* unicode screen content */ + u32 **vc_uni_lines; /* unicode screen content */ /* additional information is in vt_kern.h */ }; -- cgit v1.2.3 From 7de06d8455211d2d875bb174e361ccb6faa5ae3f Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 29 Dec 2022 16:50:29 +0100 Subject: soc: qcom-geni-se: add more symbol definitions The following symbols will be used when adding support for SE DMA in the qcom geni serial driver. Signed-off-by: Bartosz Golaszewski Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20221229155030.418800-14-brgl@bgdev.pl Signed-off-by: Greg Kroah-Hartman --- include/linux/qcom-geni-se.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qcom-geni-se.h b/include/linux/qcom-geni-se.h index f5672785c0c4..400213daa461 100644 --- a/include/linux/qcom-geni-se.h +++ b/include/linux/qcom-geni-se.h @@ -103,6 +103,7 @@ struct geni_se { #define SE_DMA_TX_FSM_RST 0xc58 #define SE_DMA_RX_IRQ_STAT 0xd40 #define SE_DMA_RX_IRQ_CLR 0xd44 +#define SE_DMA_RX_LEN_IN 0xd54 #define SE_DMA_RX_FSM_RST 0xd58 #define SE_HW_PARAM_0 0xe24 #define SE_HW_PARAM_1 0xe28 @@ -235,6 +236,8 @@ struct geni_se { #define RX_SBE BIT(2) #define RX_RESET_DONE BIT(3) #define RX_FLUSH_DONE BIT(4) +#define RX_DMA_PARITY_ERR BIT(5) +#define RX_DMA_BREAK GENMASK(8, 7) #define RX_GENI_GP_IRQ GENMASK(10, 5) #define RX_GENI_CANCEL_IRQ BIT(11) #define RX_GENI_GP_IRQ_EXT GENMASK(13, 12) -- cgit v1.2.3 From 163f080eb717d237f02d9a8c179b07ed31fdd6ad Mon Sep 17 00:00:00 2001 From: Christoph Niedermaier Date: Fri, 2 Dec 2022 11:41:25 +0100 Subject: serial: core: Add option to output RS485 RX_DURING_TX state via GPIO This patch provides a generic GPIO variable for outputting the state of RS485 RX_DURING_TX. The GPIO is defined by the devicetree property "rs485-rx-during-tx-gpios". To use it in a low level serial driver, the evaluation of this variable must be implemented there accordingly. Signed-off-by: Christoph Niedermaier Link: https://lore.kernel.org/r/20221202104127.122761-2-cniedermaier@dh-electronics.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 026294c6ccb8..0b37a86bedc5 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -579,6 +579,7 @@ struct uart_port { struct serial_rs485 rs485; struct serial_rs485 rs485_supported; /* Supported mask for serial_rs485 */ struct gpio_desc *rs485_term_gpio; /* enable RS485 bus termination */ + struct gpio_desc *rs485_rx_during_tx_gpio; /* Output GPIO that sets the state of RS485 RX during TX */ struct serial_iso7816 iso7816; void *private_data; /* generic platform data pointer */ }; -- cgit v1.2.3 From b300fb26c59a749bf49559932fa8a85eb916b5a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 17 Jan 2023 11:03:52 +0200 Subject: tty: Convert ->carrier_raised() and callchains to bool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return boolean from ->carrier_raised() instead of 0 and 1. Make the return type change also to tty_port_carrier_raised() that makes the ->carrier_raised() call (+ cd variable in moxa into which its return value is stored). Also cleans up a few unnecessary constructs related to this change: return xx ? 1 : 0; -> return xx; if (xx) return 1; return 0; -> return xx; Reviewed-by: Jiri Slaby Acked-by: Ulf Hansson # For MMC Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20230117090358.4796-7-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_port.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index fa3c3bdaa234..cf098459cb01 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -15,7 +15,7 @@ struct tty_struct; /** * struct tty_port_operations -- operations on tty_port - * @carrier_raised: return 1 if the carrier is raised on @port + * @carrier_raised: return true if the carrier is raised on @port * @dtr_rts: raise the DTR line if @raise is nonzero, otherwise lower DTR * @shutdown: called when the last close completes or a hangup finishes IFF the * port was initialized. Do not use to free resources. Turn off the device @@ -31,7 +31,7 @@ struct tty_struct; * the port itself. */ struct tty_port_operations { - int (*carrier_raised)(struct tty_port *port); + bool (*carrier_raised)(struct tty_port *port); void (*dtr_rts)(struct tty_port *port, int raise); void (*shutdown)(struct tty_port *port); int (*activate)(struct tty_port *port, struct tty_struct *tty); @@ -230,7 +230,7 @@ static inline void tty_port_set_kopened(struct tty_port *port, bool val) struct tty_struct *tty_port_tty_get(struct tty_port *port); void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); -int tty_port_carrier_raised(struct tty_port *port); +bool tty_port_carrier_raised(struct tty_port *port); void tty_port_raise_dtr_rts(struct tty_port *port); void tty_port_lower_dtr_rts(struct tty_port *port); void tty_port_hangup(struct tty_port *port); -- cgit v1.2.3 From 5d420399073770134d2b03e004b2c0201c7fa26f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 17 Jan 2023 11:03:53 +0200 Subject: tty: Convert ->dtr_rts() to take bool argument MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert the raise/on parameter in ->dtr_rts() to bool through the callchain. The parameter is used like bool. In USB serial, there remains a few implicit bool -> larger type conversions because some devices use u8 in their control messages. In moxa_tiocmget(), dtr variable was reused for line status which requires int so use a separate variable for status. Reviewed-by: Jiri Slaby Acked-by: Ulf Hansson # For MMC Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20230117090358.4796-8-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_port.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index cf098459cb01..c44e489de0ff 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -16,7 +16,7 @@ struct tty_struct; /** * struct tty_port_operations -- operations on tty_port * @carrier_raised: return true if the carrier is raised on @port - * @dtr_rts: raise the DTR line if @raise is nonzero, otherwise lower DTR + * @dtr_rts: raise the DTR line if @raise is true, otherwise lower DTR * @shutdown: called when the last close completes or a hangup finishes IFF the * port was initialized. Do not use to free resources. Turn off the device * only. Called under the port mutex to serialize against @activate and @@ -32,7 +32,7 @@ struct tty_struct; */ struct tty_port_operations { bool (*carrier_raised)(struct tty_port *port); - void (*dtr_rts)(struct tty_port *port, int raise); + void (*dtr_rts)(struct tty_port *port, bool raise); void (*shutdown)(struct tty_port *port); int (*activate)(struct tty_port *port, struct tty_struct *tty); void (*destruct)(struct tty_port *port); -- cgit v1.2.3 From 0388a152fc5544be82e736343496f99c4eef8d62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 17 Jan 2023 11:03:54 +0200 Subject: tty/serial: Make ->dcd_change()+uart_handle_dcd_change() status bool active MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert status parameter for ->dcd_change() and uart_handle_dcd_change() to bool which matches to how the parameter is used. Rename status to active to better describe what the parameter means. Acked-by: Rodolfo Giometti Reviewed-by: Jiri Slaby Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20230117090358.4796-9-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 3 +-- include/linux/tty_ldisc.h | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 0b37a86bedc5..8c4187c4884a 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -897,8 +897,7 @@ static inline bool uart_softcts_mode(struct uart_port *uport) * The following are helper functions for the low level drivers. */ -extern void uart_handle_dcd_change(struct uart_port *uport, - unsigned int status); +extern void uart_handle_dcd_change(struct uart_port *uport, bool active); extern void uart_handle_cts_change(struct uart_port *uport, unsigned int status); diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index dcb61ec11424..49dc172dedc7 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -170,7 +170,7 @@ int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass, * send, please arise a tasklet or workqueue to do the real data transfer. * Do not send data in this hook, it may lead to a deadlock. * - * @dcd_change: [DRV] ``void ()(struct tty_struct *tty, unsigned int status)`` + * @dcd_change: [DRV] ``void ()(struct tty_struct *tty, bool active)`` * * Tells the discipline that the DCD pin has changed its status. Used * exclusively by the %N_PPS (Pulse-Per-Second) line discipline. @@ -238,7 +238,7 @@ struct tty_ldisc_ops { void (*receive_buf)(struct tty_struct *tty, const unsigned char *cp, const char *fp, int count); void (*write_wakeup)(struct tty_struct *tty); - void (*dcd_change)(struct tty_struct *tty, unsigned int status); + void (*dcd_change)(struct tty_struct *tty, bool active); int (*receive_buf2)(struct tty_struct *tty, const unsigned char *cp, const char *fp, int count); void (*lookahead_buf)(struct tty_struct *tty, const unsigned char *cp, -- cgit v1.2.3 From 968d64578ec92968e8c79d766eb966efd1f68d7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 17 Jan 2023 11:03:55 +0200 Subject: serial: Make uart_handle_cts_change() status param bool active MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert uart_handle_cts_change() to bool which is more appropriate than unsigned int. Rename status to active to better describe what the parameter means. While at it, make the comment about the active parameter easier to parse. Cleanup callsites from operations that are not necessary with bool. Reviewed-by: Jiri Slaby Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20230117090358.4796-10-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 8c4187c4884a..9e3e5e0d11b2 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -898,8 +898,7 @@ static inline bool uart_softcts_mode(struct uart_port *uport) */ extern void uart_handle_dcd_change(struct uart_port *uport, bool active); -extern void uart_handle_cts_change(struct uart_port *uport, - unsigned int status); +extern void uart_handle_cts_change(struct uart_port *uport, bool active); extern void uart_insert_char(struct uart_port *port, unsigned int status, unsigned int overrun, unsigned int ch, unsigned int flag); -- cgit v1.2.3 From 87f22db4c251ff92d588c2cc710031a59d5e4ec0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 17 Jan 2023 11:03:56 +0200 Subject: tty: Return bool from tty_termios_hw_change() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change tty_termios_hw_change() to return bool. Reviewed-by: Jiri Slaby Reviewed-by: Johan Hovold Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20230117090358.4796-11-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 730c3301d710..093935e97f42 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -453,7 +453,7 @@ unsigned char tty_get_char_size(unsigned int cflag); unsigned char tty_get_frame_size(unsigned int cflag); void tty_termios_copy_hw(struct ktermios *new, const struct ktermios *old); -int tty_termios_hw_change(const struct ktermios *a, const struct ktermios *b); +bool tty_termios_hw_change(const struct ktermios *a, const struct ktermios *b); int tty_set_termios(struct tty_struct *tty, struct ktermios *kt); void tty_wakeup(struct tty_struct *tty); -- cgit v1.2.3 From 5701cb8bf50e1c723553344b3f731b308da8ea21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 17 Jan 2023 11:03:57 +0200 Subject: tty: Call ->dtr_rts() parameter active consistently MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert various parameter names for ->dtr_rts() and related functions from onoff, on, and raise to active. Reviewed-by: Jiri Slaby Acked-by: Ulf Hansson # For MMC Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20230117090358.4796-12-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/tty_port.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty_port.h b/include/linux/tty_port.h index c44e489de0ff..edf685a24f7c 100644 --- a/include/linux/tty_port.h +++ b/include/linux/tty_port.h @@ -16,7 +16,7 @@ struct tty_struct; /** * struct tty_port_operations -- operations on tty_port * @carrier_raised: return true if the carrier is raised on @port - * @dtr_rts: raise the DTR line if @raise is true, otherwise lower DTR + * @dtr_rts: raise the DTR line if @active is true, otherwise lower DTR * @shutdown: called when the last close completes or a hangup finishes IFF the * port was initialized. Do not use to free resources. Turn off the device * only. Called under the port mutex to serialize against @activate and @@ -32,7 +32,7 @@ struct tty_struct; */ struct tty_port_operations { bool (*carrier_raised)(struct tty_port *port); - void (*dtr_rts)(struct tty_port *port, bool raise); + void (*dtr_rts)(struct tty_port *port, bool active); void (*shutdown)(struct tty_port *port); int (*activate)(struct tty_port *port, struct tty_struct *tty); void (*destruct)(struct tty_port *port); -- cgit v1.2.3 From 4747ab89b4a652f835494fcf8342aaa0efb9b0fd Mon Sep 17 00:00:00 2001 From: Matthew Gerlach Date: Sun, 15 Jan 2023 07:14:46 -0800 Subject: fpga: dfl: add basic support for DFHv1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Version 1 of the Device Feature Header (DFH) definition adds functionality to the Device Feature List (DFL) bus. A DFHv1 header may have one or more parameter blocks that further describes the HW to SW. Add support to the DFL bus to parse the MSI-X parameter. The location of a feature's register set is explicitly described in DFHv1 and can be relative to the base of the DFHv1 or an absolute address. Parse the location and pass the information to DFL driver. Signed-off-by: Matthew Gerlach Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20230115151447.1353428-4-matthew.gerlach@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/dfl.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dfl.h b/include/linux/dfl.h index 431636a0dc78..0a7a00a0ee7f 100644 --- a/include/linux/dfl.h +++ b/include/linux/dfl.h @@ -27,11 +27,15 @@ enum dfl_id_type { * @id: id of the dfl device. * @type: type of DFL FIU of the device. See enum dfl_id_type. * @feature_id: feature identifier local to its DFL FIU type. + * @revision: revision of this dfl device feature. * @mmio_res: mmio resource of this dfl device. * @irqs: list of Linux IRQ numbers of this dfl device. * @num_irqs: number of IRQs supported by this dfl device. * @cdev: pointer to DFL FPGA container device this dfl device belongs to. * @id_entry: matched id entry in dfl driver's id table. + * @dfh_version: version of DFH for the device + * @param_size: size of the block parameters in bytes + * @params: pointer to block of parameters copied memory */ struct dfl_device { struct device dev; @@ -44,6 +48,9 @@ struct dfl_device { unsigned int num_irqs; struct dfl_fpga_cdev *cdev; const struct dfl_device_id *id_entry; + u8 dfh_version; + unsigned int param_size; + void *params; }; /** @@ -84,4 +91,5 @@ void dfl_driver_unregister(struct dfl_driver *dfl_drv); module_driver(__dfl_driver, dfl_driver_register, \ dfl_driver_unregister) +void *dfh_find_param(struct dfl_device *dfl_dev, int param_id, size_t *pcount); #endif /* __LINUX_DFL_H */ -- cgit v1.2.3 From ffc1e089725e3f8a15ddfdce283db42f7d0fa147 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Thu, 19 Jan 2023 16:19:15 +0100 Subject: VT: Add height parameter to con_font_get/set consw operations The current con_font_get/set API currently hardcodes a 32-pixel-tall limitation, which only dates from the old VGA hardware which could not handle taller fonts than that. This change just adds a vpitch parameter to release this constraint. Drivers which do not support vpitch != 32 can just return EINVAL when it is not 32, font loading tools will revert to trying 32 and succeed. This change makes the fbcon driver consider vpitch appropriately, thus making it able to load large fonts. Signed-off-by: Samuel Thibault Link: https://lore.kernel.org/r/20230119151934.932642243@ens-lyon.org Signed-off-by: Greg Kroah-Hartman --- include/linux/console.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 9cea254b34b8..9e52de94a583 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -59,8 +59,9 @@ struct consw { int (*con_switch)(struct vc_data *vc); int (*con_blank)(struct vc_data *vc, int blank, int mode_switch); int (*con_font_set)(struct vc_data *vc, struct console_font *font, - unsigned int flags); - int (*con_font_get)(struct vc_data *vc, struct console_font *font); + unsigned int vpitch, unsigned int flags); + int (*con_font_get)(struct vc_data *vc, struct console_font *font, + unsigned int vpitch); int (*con_font_default)(struct vc_data *vc, struct console_font *font, char *name); int (*con_resize)(struct vc_data *vc, unsigned int width, -- cgit v1.2.3 From e610e81464e4e52645a0f5d259b8a9e3632db6ff Mon Sep 17 00:00:00 2001 From: "Garmin.Chang" Date: Fri, 23 Dec 2022 16:05:53 +0800 Subject: soc: mediatek: pm-domains: Add support for mt8188 Add domain control data including bus protection data size change due to more protection steps in mt8188. Signed-off-by: Garmin.Chang Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20221223080553.9397-3-Garmin.Chang@mediatek.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/infracfg.h | 121 ++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h index 50804ac748bd..07f67b3d8e97 100644 --- a/include/linux/soc/mediatek/infracfg.h +++ b/include/linux/soc/mediatek/infracfg.h @@ -140,6 +140,127 @@ #define MT8192_TOP_AXI_PROT_EN_MM_2_MDP_2ND BIT(13) #define MT8192_TOP_AXI_PROT_EN_VDNR_CAM BIT(21) +#define MT8188_TOP_AXI_PROT_EN_SET 0x2A0 +#define MT8188_TOP_AXI_PROT_EN_CLR 0x2A4 +#define MT8188_TOP_AXI_PROT_EN_STA 0x228 +#define MT8188_TOP_AXI_PROT_EN_1_SET 0x2A8 +#define MT8188_TOP_AXI_PROT_EN_1_CLR 0x2AC +#define MT8188_TOP_AXI_PROT_EN_1_STA 0x258 +#define MT8188_TOP_AXI_PROT_EN_2_SET 0x714 +#define MT8188_TOP_AXI_PROT_EN_2_CLR 0x718 +#define MT8188_TOP_AXI_PROT_EN_2_STA 0x724 + +#define MT8188_TOP_AXI_PROT_EN_MM_SET 0x2D4 +#define MT8188_TOP_AXI_PROT_EN_MM_CLR 0x2D8 +#define MT8188_TOP_AXI_PROT_EN_MM_STA 0x2EC +#define MT8188_TOP_AXI_PROT_EN_MM_2_SET 0xDCC +#define MT8188_TOP_AXI_PROT_EN_MM_2_CLR 0xDD0 +#define MT8188_TOP_AXI_PROT_EN_MM_2_STA 0xDD8 + +#define MT8188_TOP_AXI_PROT_EN_INFRA_VDNR_SET 0xB84 +#define MT8188_TOP_AXI_PROT_EN_INFRA_VDNR_CLR 0xB88 +#define MT8188_TOP_AXI_PROT_EN_INFRA_VDNR_STA 0xB90 +#define MT8188_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_SET 0xBCC +#define MT8188_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_CLR 0xBD0 +#define MT8188_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_STA 0xBD8 + +#define MT8188_TOP_AXI_PROT_EN_MFG1_STEP1 BIT(11) +#define MT8188_TOP_AXI_PROT_EN_2_MFG1_STEP2 BIT(7) +#define MT8188_TOP_AXI_PROT_EN_1_MFG1_STEP3 BIT(19) +#define MT8188_TOP_AXI_PROT_EN_2_MFG1_STEP4 BIT(5) +#define MT8188_TOP_AXI_PROT_EN_MFG1_STEP5 GENMASK(22, 21) +#define MT8188_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_MFG1_STEP6 BIT(17) + +#define MT8188_TOP_AXI_PROT_EN_PEXTP_MAC_P0_STEP1 BIT(2) +#define MT8188_TOP_AXI_PROT_EN_INFRA_VDNR_PEXTP_MAC_P0_STEP2 (BIT(8) | BIT(18) | BIT(30)) +#define MT8188_TOP_AXI_PROT_EN_INFRA_VDNR_ETHER_STEP1 BIT(24) +#define MT8188_TOP_AXI_PROT_EN_INFRA_VDNR_HDMI_TX_STEP1 BIT(20) +#define MT8188_TOP_AXI_PROT_EN_2_ADSP_AO_STEP1 GENMASK(31, 29) +#define MT8188_TOP_AXI_PROT_EN_2_ADSP_AO_STEP2 (GENMASK(4, 3) | BIT(28)) +#define MT8188_TOP_AXI_PROT_EN_2_ADSP_INFRA_STEP1 (GENMASK(16, 14) | BIT(23) | \ + BIT(27)) +#define MT8188_TOP_AXI_PROT_EN_2_ADSP_INFRA_STEP2 (GENMASK(19, 17) | GENMASK(26, 25)) +#define MT8188_TOP_AXI_PROT_EN_2_ADSP_STEP1 GENMASK(11, 8) +#define MT8188_TOP_AXI_PROT_EN_2_ADSP_STEP2 GENMASK(22, 21) +#define MT8188_TOP_AXI_PROT_EN_2_AUDIO_STEP1 BIT(20) +#define MT8188_TOP_AXI_PROT_EN_2_AUDIO_STEP2 BIT(12) +#define MT8188_TOP_AXI_PROT_EN_2_AUDIO_ASRC_STEP1 BIT(24) +#define MT8188_TOP_AXI_PROT_EN_2_AUDIO_ASRC_STEP2 BIT(13) + +#define MT8188_TOP_AXI_PROT_EN_VPPSYS0_STEP1 BIT(10) +#define MT8188_TOP_AXI_PROT_EN_MM_2_VPPSYS0_STEP2 GENMASK(9, 8) +#define MT8188_TOP_AXI_PROT_EN_VPPSYS0_STEP3 BIT(23) +#define MT8188_TOP_AXI_PROT_EN_MM_2_VPPSYS0_STEP4 (BIT(1) | BIT(4) | BIT(11)) +#define MT8188_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_VPPSYS0_STEP5 (BIT(20)) +#define MT8188_TOP_AXI_PROT_EN_MM_VDOSYS0_STEP1 (GENMASK(18, 17) | GENMASK(21, 20)) +#define MT8188_TOP_AXI_PROT_EN_VDOSYS0_STEP2 BIT(6) +#define MT8188_TOP_AXI_PROT_EN_SUB_INFRA_VDNR_VDOSYS0_STEP3 BIT(21) +#define MT8188_TOP_AXI_PROT_EN_MM_VDOSYS1_STEP1 GENMASK(31, 30) +#define MT8188_TOP_AXI_PROT_EN_MM_VDOSYS1_STEP2 BIT(22) +#define MT8188_TOP_AXI_PROT_EN_MM_2_VDOSYS1_STEP3 BIT(10) +#define MT8188_TOP_AXI_PROT_EN_INFRA_VDNR_DP_TX_STEP1 BIT(23) +#define MT8188_TOP_AXI_PROT_EN_INFRA_VDNR_EDP_TX_STEP1 BIT(22) + +#define MT8188_TOP_AXI_PROT_EN_MM_VPPSYS1_STEP1 GENMASK(6, 5) +#define MT8188_TOP_AXI_PROT_EN_MM_VPPSYS1_STEP2 BIT(23) +#define MT8188_TOP_AXI_PROT_EN_MM_2_VPPSYS1_STEP3 BIT(18) +#define MT8188_TOP_AXI_PROT_EN_MM_2_WPE_STEP1 BIT(23) +#define MT8188_TOP_AXI_PROT_EN_MM_2_WPE_STEP2 BIT(21) +#define MT8188_TOP_AXI_PROT_EN_MM_VDEC0_STEP1 BIT(13) +#define MT8188_TOP_AXI_PROT_EN_MM_2_VDEC0_STEP2 BIT(13) +#define MT8188_TOP_AXI_PROT_EN_MM_VDEC1_STEP1 BIT(14) +#define MT8188_TOP_AXI_PROT_EN_MM_VDEC1_STEP2 BIT(29) +#define MT8188_TOP_AXI_PROT_EN_MM_VENC_STEP1 (BIT(9) | BIT(11)) +#define MT8188_TOP_AXI_PROT_EN_MM_VENC_STEP2 BIT(26) +#define MT8188_TOP_AXI_PROT_EN_MM_2_VENC_STEP3 BIT(2) +#define MT8188_TOP_AXI_PROT_EN_MM_IMG_VCORE_STEP1 (BIT(1) | BIT(3)) +#define MT8188_TOP_AXI_PROT_EN_MM_IMG_VCORE_STEP2 BIT(25) +#define MT8188_TOP_AXI_PROT_EN_MM_2_IMG_VCORE_STEP3 BIT(16) +#define MT8188_TOP_AXI_PROT_EN_MM_2_IMG_MAIN_STEP1 GENMASK(27, 26) +#define MT8188_TOP_AXI_PROT_EN_MM_2_IMG_MAIN_STEP2 GENMASK(25, 24) +#define MT8188_TOP_AXI_PROT_EN_MM_CAM_VCORE_STEP1 (BIT(2) | BIT(4)) +#define MT8188_TOP_AXI_PROT_EN_2_CAM_VCORE_STEP2 BIT(0) +#define MT8188_TOP_AXI_PROT_EN_1_CAM_VCORE_STEP3 BIT(22) +#define MT8188_TOP_AXI_PROT_EN_MM_CAM_VCORE_STEP4 BIT(24) +#define MT8188_TOP_AXI_PROT_EN_MM_2_CAM_VCORE_STEP5 BIT(17) +#define MT8188_TOP_AXI_PROT_EN_MM_2_CAM_MAIN_STEP1 GENMASK(31, 30) +#define MT8188_TOP_AXI_PROT_EN_2_CAM_MAIN_STEP2 BIT(2) +#define MT8188_TOP_AXI_PROT_EN_MM_2_CAM_MAIN_STEP3 GENMASK(29, 28) +#define MT8188_TOP_AXI_PROT_EN_2_CAM_MAIN_STEP4 BIT(1) + +#define MT8188_SMI_COMMON_CLAMP_EN_STA 0x3C0 +#define MT8188_SMI_COMMON_CLAMP_EN_SET 0x3C4 +#define MT8188_SMI_COMMON_CLAMP_EN_CLR 0x3C8 + +#define MT8188_SMI_COMMON_SMI_CLAMP_DIP_TO_VDO0 GENMASK(3, 1) +#define MT8188_SMI_COMMON_SMI_CLAMP_DIP_TO_VPP1 GENMASK(2, 1) +#define MT8188_SMI_COMMON_SMI_CLAMP_IPE_TO_VPP1 BIT(0) + +#define MT8188_SMI_COMMON_SMI_CLAMP_CAM_SUBA_TO_VPP0 GENMASK(3, 2) +#define MT8188_SMI_COMMON_SMI_CLAMP_CAM_SUBB_TO_VDO0 GENMASK(3, 2) + +#define MT8188_SMI_LARB10_RESET_ADDR 0xC +#define MT8188_SMI_LARB11A_RESET_ADDR 0xC +#define MT8188_SMI_LARB11C_RESET_ADDR 0xC +#define MT8188_SMI_LARB12_RESET_ADDR 0xC +#define MT8188_SMI_LARB11B_RESET_ADDR 0xC +#define MT8188_SMI_LARB15_RESET_ADDR 0xC +#define MT8188_SMI_LARB16B_RESET_ADDR 0xA0 +#define MT8188_SMI_LARB17B_RESET_ADDR 0xA0 +#define MT8188_SMI_LARB16A_RESET_ADDR 0xA0 +#define MT8188_SMI_LARB17A_RESET_ADDR 0xA0 + +#define MT8188_SMI_LARB10_RESET BIT(0) +#define MT8188_SMI_LARB11A_RESET BIT(0) +#define MT8188_SMI_LARB11C_RESET BIT(0) +#define MT8188_SMI_LARB12_RESET BIT(8) +#define MT8188_SMI_LARB11B_RESET BIT(0) +#define MT8188_SMI_LARB15_RESET BIT(0) +#define MT8188_SMI_LARB16B_RESET BIT(4) +#define MT8188_SMI_LARB17B_RESET BIT(4) +#define MT8188_SMI_LARB16A_RESET BIT(4) +#define MT8188_SMI_LARB17A_RESET BIT(4) + #define MT8186_TOP_AXI_PROT_EN_SET (0x2A0) #define MT8186_TOP_AXI_PROT_EN_CLR (0x2A4) #define MT8186_TOP_AXI_PROT_EN_STA (0x228) -- cgit v1.2.3 From cecafc0a830f7ea89bc11c644e2841f603fcc4e6 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Thu, 5 Jan 2023 21:01:27 +0800 Subject: KVM: MMU: Make the definition of 'INVALID_GPA' common KVM already has a 'GPA_INVALID' defined as (~(gpa_t)0) in kvm_types.h, and it is used by ARM code. We do not need another definition of 'INVALID_GPA' for X86 specifically. Instead of using the common 'GPA_INVALID' for X86, replace it with 'INVALID_GPA', and change the users of 'GPA_INVALID' so that the diff can be smaller. Also because the name 'INVALID_GPA' tells the user we are using an invalid GPA, while the name 'GPA_INVALID' is emphasizing the GPA is an invalid one. No functional change intended. Signed-off-by: Yu Zhang Reviewed-by: Paul Durrant Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20230105130127.866171-1-yu.c.zhang@linux.intel.com Signed-off-by: Oliver Upton --- include/linux/kvm_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 76de36e56cdf..2728d49bbdf6 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -40,7 +40,7 @@ typedef unsigned long gva_t; typedef u64 gpa_t; typedef u64 gfn_t; -#define GPA_INVALID (~(gpa_t)0) +#define INVALID_GPA (~(gpa_t)0) typedef unsigned long hva_t; typedef u64 hpa_t; -- cgit v1.2.3 From 344da544f177f919cf6919e5abcd388f27aa53db Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 16 Dec 2022 16:52:01 -0800 Subject: x86/nmi: Print reasons why backtrace NMIs are ignored Instrument nmi_trigger_cpumask_backtrace() to dump out diagnostics based on evidence accumulated by exc_nmi(). These diagnostics are dumped for CPUs that ignored an NMI backtrace request for more than 10 seconds. [ paulmck: Apply Ingo Molnar feedback. ] Signed-off-by: Paul E. McKenney Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Reviewed-by: Ingo Molnar --- include/linux/nmi.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index f700ff2df074..048c0b9aa623 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -214,4 +214,12 @@ int proc_watchdog_cpumask(struct ctl_table *, int, void *, size_t *, loff_t *); #include #endif +#ifdef CONFIG_NMI_CHECK_CPU +void nmi_backtrace_stall_snap(const struct cpumask *btp); +void nmi_backtrace_stall_check(const struct cpumask *btp); +#else +static inline void nmi_backtrace_stall_snap(const struct cpumask *btp) {} +static inline void nmi_backtrace_stall_check(const struct cpumask *btp) {} +#endif + #endif -- cgit v1.2.3 From 13e80244ca7e51d5eb7803f05e0579b11fc89048 Mon Sep 17 00:00:00 2001 From: Hanna Hawa Date: Wed, 28 Dec 2022 16:48:12 +0000 Subject: pinctrl: Add an API to get the pinctrl pins if initialized Add an API to get the pinctrl pins if it was initialized before driver probed. This API will be used in I2C core to get the device pinctrl information for recovery state change. Signed-off-by: Hanna Hawa Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij Signed-off-by: Wolfram Sang --- include/linux/pinctrl/devinfo.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pinctrl/devinfo.h b/include/linux/pinctrl/devinfo.h index 9e8b559e1253..bb6653af4f92 100644 --- a/include/linux/pinctrl/devinfo.h +++ b/include/linux/pinctrl/devinfo.h @@ -18,6 +18,8 @@ struct device; #ifdef CONFIG_PINCTRL +#include + /* The device core acts as a consumer toward pinctrl */ #include @@ -44,6 +46,14 @@ struct dev_pin_info { extern int pinctrl_bind_pins(struct device *dev); extern int pinctrl_init_done(struct device *dev); +static inline struct pinctrl *dev_pinctrl(struct device *dev) +{ + if (!dev->pins) + return NULL; + + return dev->pins->p; +} + #else /* Stubs if we're not using pinctrl */ @@ -58,5 +68,10 @@ static inline int pinctrl_init_done(struct device *dev) return 0; } +static inline struct pinctrl *dev_pinctrl(struct device *dev) +{ + return NULL; +} + #endif /* CONFIG_PINCTRL */ #endif /* PINCTRL_DEVINFO_H */ -- cgit v1.2.3 From e3e289fbc0b520cf469469e8cdba84a50424eb65 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 19 Nov 2022 07:48:15 +0000 Subject: uacce: supports device isolation feature UACCE adds the hardware error isolation feature. To improve service reliability, some uacce devices that frequently encounter hardware errors are isolated. Therefore, this feature is added. Users can configure the hardware error threshold by 'isolate_strategy' sysfs node. The user space can get the device isolated state by 'isolate' sysfs node. If the number of device errors exceeds the configured error threshold, the device will be isolated. It means the uacce device is unavailable. Signed-off-by: Kai Ye Link: https://lore.kernel.org/r/20221119074817.12063-2-yekai13@huawei.com Signed-off-by: Greg Kroah-Hartman --- include/linux/uacce.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uacce.h b/include/linux/uacce.h index 9ce88c28b0a8..0a81c3dfd26c 100644 --- a/include/linux/uacce.h +++ b/include/linux/uacce.h @@ -8,6 +8,7 @@ #define UACCE_NAME "uacce" #define UACCE_MAX_REGION 2 #define UACCE_MAX_NAME_SIZE 64 +#define UACCE_MAX_ERR_THRESHOLD 65535 struct uacce_queue; struct uacce_device; @@ -30,6 +31,9 @@ struct uacce_qfile_region { * @is_q_updated: check whether the task is finished * @mmap: mmap addresses of queue to user space * @ioctl: ioctl for user space users of the queue + * @get_isolate_state: get the device state after set the isolate strategy + * @isolate_err_threshold_write: stored the isolate error threshold to the device + * @isolate_err_threshold_read: read the isolate error threshold value from the device */ struct uacce_ops { int (*get_available_instances)(struct uacce_device *uacce); @@ -43,6 +47,9 @@ struct uacce_ops { struct uacce_qfile_region *qfr); long (*ioctl)(struct uacce_queue *q, unsigned int cmd, unsigned long arg); + enum uacce_dev_state (*get_isolate_state)(struct uacce_device *uacce); + int (*isolate_err_threshold_write)(struct uacce_device *uacce, u32 num); + u32 (*isolate_err_threshold_read)(struct uacce_device *uacce); }; /** @@ -57,6 +64,11 @@ struct uacce_interface { const struct uacce_ops *ops; }; +enum uacce_dev_state { + UACCE_DEV_NORMAL, + UACCE_DEV_ISOLATE, +}; + enum uacce_q_state { UACCE_Q_ZOMBIE = 0, UACCE_Q_INIT, -- cgit v1.2.3 From cd0ac51c5760d4eed4981be5de9cad0255976512 Mon Sep 17 00:00:00 2001 From: Kai Ye Date: Sat, 19 Nov 2022 07:48:17 +0000 Subject: crypto: hisilicon/qm - define the device isolation strategy Define the device isolation strategy by the device driver. The user configures a hardware error threshold value by uacce interface. If the number of hardware errors exceeds the value of setting error threshold in one hour. The device will not be available in user space. The VF device use the PF device isolation strategy. All the hardware errors are processed by PF driver. Signed-off-by: Kai Ye Acked-by: Herbert Xu Link: https://lore.kernel.org/r/20221119074817.12063-4-yekai13@huawei.com Signed-off-by: Greg Kroah-Hartman --- include/linux/hisi_acc_qm.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index be3aedaa96dc..d08bff0f87f9 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -272,6 +272,20 @@ struct hisi_qm_poll_data { u16 *qp_finish_id; }; +/** + * struct qm_err_isolate + * @isolate_lock: protects device error log + * @err_threshold: user config error threshold which triggers isolation + * @is_isolate: device isolation state + * @uacce_hw_errs: index into qm device error list + */ +struct qm_err_isolate { + struct mutex isolate_lock; + u32 err_threshold; + bool is_isolate; + struct list_head qm_hw_errs; +}; + struct hisi_qm { enum qm_hw_ver ver; enum qm_fun_type fun_type; @@ -341,6 +355,7 @@ struct hisi_qm { struct qm_shaper_factor *factor; u32 mb_qos; u32 type_rate; + struct qm_err_isolate isolate_data; }; struct hisi_qp_status { -- cgit v1.2.3 From b3c71626a9333b0b29f9921a39cef30b5961766f Mon Sep 17 00:00:00 2001 From: Mao Jinlong Date: Tue, 17 Jan 2023 06:57:01 -0800 Subject: Coresight: Add coresight TPDM source driver Add driver to support Coresight device TPDM (Trace, Profiling and Diagnostics Monitor). TPDM is a monitor to collect data from different datasets. This change is to add probe/enable/disable functions for tpdm source. Signed-off-by: Tao Zhang Signed-off-by: Mao Jinlong Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20230120095301.30792-1-quic_jinlmao@quicinc.com --- include/linux/coresight.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index e241eb88dfb9..f19a47b9bb5a 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -61,6 +61,7 @@ enum coresight_dev_subtype_source { CORESIGHT_DEV_SUBTYPE_SOURCE_PROC, CORESIGHT_DEV_SUBTYPE_SOURCE_BUS, CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE, + CORESIGHT_DEV_SUBTYPE_SOURCE_OTHERS, }; enum coresight_dev_subtype_helper { -- cgit v1.2.3 From 6640727fc55b7f0b561e927bc9c5b9d4c459fd8e Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sun, 8 Jan 2023 21:56:52 +0000 Subject: parport_pc: Let chipset drivers mask ECR bits on writes Provide an `ecr_writable' parameter to `__parport_pc_probe_port' so that callers can specify a mask of bits to modify on ECR writes. To avoid the need for separate bit set and bit clear masks always set bit 0 whenever a non-zero mask has been set, as all the currently known cases where a mask is required, that is Oxford Semiconductor devices, do require this bit to be set. If further cases are discovered where the bit is required to be clear, we can update code accordingly, but chances are very low as the bit is supposed to be read-only[1]. Skip ECR probing, which can be problematic as the Oxford Semiconductor OX12PCI840 part has been reported to lock up on setting bit 2, whenever a non-zero mask has been requested by a port subdriver, assuming that the ECR must be there if the subdriver has requested a specific way to access it. References: [1] "Extended Capabilities Port Protocol and ISA Interface Standard", Microsoft Corporation, Revision: 1.14, July 14, 1993, Table 14 "Extended Control Register" Signed-off-by: Maciej W. Rozycki Signed-off-by: Sudip Mukherjee Link: https://lore.kernel.org/r/20230108215656.6433-3-sudipm.mukherjee@gmail.com Signed-off-by: Greg Kroah-Hartman --- include/linux/parport_pc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/parport_pc.h b/include/linux/parport_pc.h index 3d6fc576d6a1..f1ec5c10c3b3 100644 --- a/include/linux/parport_pc.h +++ b/include/linux/parport_pc.h @@ -26,6 +26,9 @@ struct parport_pc_private { /* Whether or not there's an ECR. */ int ecr; + /* Bitmask of writable ECR bits. */ + unsigned char ecr_writable; + /* Number of PWords that FIFO will hold. */ int fifo_depth; -- cgit v1.2.3 From a86367803838b369fe5486ac18771d14723c258c Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 10 Nov 2022 18:23:07 +0800 Subject: drivers: base: transport_class: fix possible memory leak Current some drivers(like iscsi) call transport_register_device() failed, they don't call transport_destroy_device() to release the memory allocated in transport_setup_device(), because they don't know what was done, it should be internal thing to release the resource in register function. So fix this leak by calling destroy function inside register function. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221110102307.3492557-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- include/linux/transport_class.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/transport_class.h b/include/linux/transport_class.h index 63076fb835e3..2efc271a96fa 100644 --- a/include/linux/transport_class.h +++ b/include/linux/transport_class.h @@ -70,8 +70,14 @@ void transport_destroy_device(struct device *); static inline int transport_register_device(struct device *dev) { + int ret; + transport_setup_device(dev); - return transport_add_device(dev); + ret = transport_add_device(dev); + if (ret) + transport_destroy_device(dev); + + return ret; } static inline void -- cgit v1.2.3 From 8aaec1177521eed07240ec5ac2bc55b2a1c35b42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?joewu=20=28=E5=90=B3=E4=BB=B2=E6=8C=AF=29?= Date: Fri, 20 Jan 2023 09:17:02 -0800 Subject: Input: cros_ec_keyb - add 3 buttons for monitor function Add 3 extra buttons: 'brightness up', 'brightness down' and 'screen lock' to support monitor manipulating function. Signed-off-by: Joe Wu Link: https://lore.kernel.org/r/e23628e2cb464d238eb1c33a9e1e516b@msi.com Signed-off-by: Dmitry Torokhov --- include/linux/platform_data/cros_ec_commands.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index 5744a2d746aa..a2073ed43972 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -3471,6 +3471,9 @@ struct ec_response_get_next_event_v1 { #define EC_MKBP_VOL_UP 1 #define EC_MKBP_VOL_DOWN 2 #define EC_MKBP_RECOVERY 3 +#define EC_MKBP_BRI_UP 4 +#define EC_MKBP_BRI_DOWN 5 +#define EC_MKBP_SCREEN_LOCK 6 /* Switches */ #define EC_MKBP_LID_OPEN 0 -- cgit v1.2.3 From aca1d27ac38a61d7db4b56418386992cb96b63f0 Mon Sep 17 00:00:00 2001 From: Demi Marie Obenour Date: Thu, 19 Jan 2023 14:03:57 -0500 Subject: efi: xen: Implement memory descriptor lookup based on hypercall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Xen on x86 boots dom0 in EFI mode but without providing a memory map. This means that some consistency checks we would like to perform on configuration tables or other data structures in memory are not currently possible. Xen does, however, expose EFI memory descriptor info via a Xen hypercall, so let's wire that up instead. It turns out that the returned information is not identical to what Linux's efi_mem_desc_lookup would return: the address returned is the address passed to the hypercall, and the size returned is the number of bytes remaining in the configuration table. However, none of the callers of efi_mem_desc_lookup() currently care about this. In the future, Xen may gain a hypercall that returns the actual start address, which can be used instead. Co-developed-by: Ard Biesheuvel Signed-off-by: Ard Biesheuvel Signed-off-by: Demi Marie Obenour Tested-by: Marek Marczykowski-Górecki Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index f6b107da1cbc..4d7a44f60990 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -731,6 +731,7 @@ extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size); extern int __init efi_uart_console_only (void); extern u64 efi_mem_desc_end(efi_memory_desc_t *md); extern int efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md); +extern int __efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md); extern void efi_mem_reserve(phys_addr_t addr, u64 size); extern int efi_mem_reserve_persistent(phys_addr_t addr, u64 size); extern void efi_initialize_iomem_resources(struct resource *code_resource, -- cgit v1.2.3 From c0fecaa44dc341d86e4ce96efcda9ea8b4c106af Mon Sep 17 00:00:00 2001 From: Demi Marie Obenour Date: Thu, 19 Jan 2023 14:03:58 -0500 Subject: efi: Apply allowlist to EFI configuration tables when running under Xen MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As it turns out, Xen does not guarantee that EFI boot services data regions in memory are preserved, which means that EFI configuration tables pointing into such memory regions may be corrupted before the dom0 OS has had a chance to inspect them. This is causing problems for Qubes OS when it attempts to perform system firmware updates, which requires that the contents of the EFI System Resource Table are valid when the fwupd userspace program runs. However, other configuration tables such as the memory attributes table or the runtime properties table are equally affected, and so we need a comprehensive workaround that works for any table type. So when running under Xen, check the EFI memory descriptor covering the start of the table, and disregard the table if it does not reside in memory that is preserved by Xen. Co-developed-by: Ard Biesheuvel Signed-off-by: Ard Biesheuvel Signed-off-by: Demi Marie Obenour Tested-by: Marek Marczykowski-Górecki Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 4d7a44f60990..1a1adc8d3ba3 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1322,4 +1322,14 @@ struct linux_efi_initrd { /* Header of a populated EFI secret area */ #define EFI_SECRET_TABLE_HEADER_GUID EFI_GUID(0x1e74f542, 0x71dd, 0x4d66, 0x96, 0x3e, 0xef, 0x42, 0x87, 0xff, 0x17, 0x3b) +bool xen_efi_config_table_is_usable(const efi_guid_t *guid, unsigned long table); + +static inline +bool efi_config_table_is_usable(const efi_guid_t *guid, unsigned long table) +{ + if (!IS_ENABLED(CONFIG_XEN_EFI)) + return true; + return xen_efi_config_table_is_usable(guid, table); +} + #endif /* _LINUX_EFI_H */ -- cgit v1.2.3 From 43651e60aa167974955b23f73a25eb79886ab6d0 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Mon, 16 Jan 2023 10:47:35 +0100 Subject: mtd: rawnand: Fix nand_chip kdoc Describe the continuous read nand_chip fields to avoid the following htmldocs warning: include/linux/mtd/rawnand.h:1325: warning: Function parameter or member 'cont_read' not described in 'nand_chip' Reported-by: Stephen Rothwell Fixes: 003fe4b9545b ("mtd: rawnand: Support for sequential cache reads") Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230116094735.11483-1-miquel.raynal@bootlin.com --- include/linux/mtd/rawnand.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 1b0936fe3c6e..f8d4be9c587a 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1260,6 +1260,10 @@ struct nand_secure_region { * @read_retries: The number of read retry modes supported * @secure_regions: Structure containing the secure regions info * @nr_secure_regions: Number of secure regions + * @cont_read: Sequential page read internals + * @cont_read.ongoing: Whether a continuous read is ongoing or not + * @cont_read.first_page: Start of the continuous read operation + * @cont_read.last_page: End of the continuous read operation * @controller: The hardware controller structure which is shared among multiple * independent devices * @ecc: The ECC controller structure -- cgit v1.2.3 From 8681e3663411ee6b64dd0714b23f5c86f64f7533 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 20 Jan 2023 18:31:02 +0100 Subject: drm/simpledrm: Support the XB24/AB24 format Add XB24 and AB24 to the list of supported formats. The format helpers support conversion to these formats and they are documented in the simple-framebuffer device tree bindings. Reviewed-by: Thomas Zimmermann Signed-off-by: Thierry Reding Link: https://patchwork.freedesktop.org/patch/msgid/20230120173103.4002342-8-thierry.reding@gmail.com --- include/linux/platform_data/simplefb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/simplefb.h b/include/linux/platform_data/simplefb.h index 27ea99af6e1d..4f94d52ac99f 100644 --- a/include/linux/platform_data/simplefb.h +++ b/include/linux/platform_data/simplefb.h @@ -22,6 +22,7 @@ { "r8g8b8", 24, {16, 8}, {8, 8}, {0, 8}, {0, 0}, DRM_FORMAT_RGB888 }, \ { "x8r8g8b8", 32, {16, 8}, {8, 8}, {0, 8}, {0, 0}, DRM_FORMAT_XRGB8888 }, \ { "a8r8g8b8", 32, {16, 8}, {8, 8}, {0, 8}, {24, 8}, DRM_FORMAT_ARGB8888 }, \ + { "x8b8g8r8", 32, {0, 8}, {8, 8}, {16, 8}, {0, 0}, DRM_FORMAT_XBGR8888 }, \ { "a8b8g8r8", 32, {0, 8}, {8, 8}, {16, 8}, {24, 8}, DRM_FORMAT_ABGR8888 }, \ { "x2r10g10b10", 32, {20, 10}, {10, 10}, {0, 10}, {0, 0}, DRM_FORMAT_XRGB2101010 }, \ { "a2r10g10b10", 32, {20, 10}, {10, 10}, {0, 10}, {30, 2}, DRM_FORMAT_ARGB2101010 }, \ -- cgit v1.2.3 From 8786b095df02c1b881643146869a7d6f80411e6a Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 18 Jan 2023 22:55:12 +0100 Subject: i2c: gpio: support write-only sda/scl w/o pull-up There are slave devices that understand I2C but have read-only SDA and SCL. Examples are FD650 7-segment LED controller and its derivatives. Typical board designs don't even have a pull-up for both pins. Handle the new attributes for write-only SDA and missing pull-up on SDA/SCL. For either pin the open-drain and has-no-pullup properties are mutually-exclusive, what is documented in the DT property documentation. We don't add an extra warning here because the open-drain properties are marked deprecated anyway. Signed-off-by: Heiner Kallweit [wsa: switched to device properties] Signed-off-by: Wolfram Sang --- include/linux/platform_data/i2c-gpio.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/i2c-gpio.h b/include/linux/platform_data/i2c-gpio.h index a907774fd177..545639bcca72 100644 --- a/include/linux/platform_data/i2c-gpio.h +++ b/include/linux/platform_data/i2c-gpio.h @@ -16,16 +16,25 @@ * isn't actively driven high when setting the output value high. * gpio_get_value() must return the actual pin state even if the * pin is configured as an output. + * @sda_is_output_only: SDA output drivers can't be turned off. + * This is for clients that can only read SDA/SCL. + * @sda_has_no_pullup: SDA is used in a non-compliant way and has no pull-up. + * Therefore disable open-drain. * @scl_is_open_drain: SCL is set up as open drain. Same requirements * as for sda_is_open_drain apply. * @scl_is_output_only: SCL output drivers cannot be turned off. + * @scl_has_no_pullup: SCL is used in a non-compliant way and has no pull-up. + * Therefore disable open-drain. */ struct i2c_gpio_platform_data { int udelay; int timeout; unsigned int sda_is_open_drain:1; + unsigned int sda_is_output_only:1; + unsigned int sda_has_no_pullup:1; unsigned int scl_is_open_drain:1; unsigned int scl_is_output_only:1; + unsigned int scl_has_no_pullup:1; }; #endif /* _LINUX_I2C_GPIO_H */ -- cgit v1.2.3 From 998101f2a78cf506022f2094461105cb3d00e19f Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Sat, 21 Jan 2023 20:24:16 +0100 Subject: fbdev: Remove unused struct fb_deferred_io .first_io field This optional callback was added in the commit 1f45f9dbb392 ("fb_defio: add first_io callback") but it was never used by a driver. Let's remove it since it's unlikely that will be used after a decade that was added. Signed-off-by: Javier Martinez Canillas Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230121192418.2814955-2-javierm@redhat.com --- include/linux/fb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index 30183fd259ae..daf336385613 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -215,7 +215,6 @@ struct fb_deferred_io { struct mutex lock; /* mutex that protects the pageref list */ struct list_head pagereflist; /* list of pagerefs for touched pages */ /* callback */ - void (*first_io)(struct fb_info *info); void (*deferred_io)(struct fb_info *info, struct list_head *pagelist); }; #endif -- cgit v1.2.3 From b1cd16330c8c6bc73e769b7b63e46cec2e292d6c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 17 Nov 2022 23:34:19 +0300 Subject: KVM: account allocation in generic version of kvm_arch_alloc_vm() Account the allocation of VMs in the generic version of kvm_arch_alloc_vm(), the VM is tied to the current task/process. Note, x86 already accounts its allocation. Signed-off-by: Alexey Dobriyan Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/Y3aay2u2KQgiR0un@p183 [sean: reworded changelog] Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 109b18e2789c..c2bd15204c3f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1461,7 +1461,7 @@ int kvm_arch_create_vm_debugfs(struct kvm *kvm); */ static inline struct kvm *kvm_arch_alloc_vm(void) { - return kzalloc(sizeof(struct kvm), GFP_KERNEL); + return kzalloc(sizeof(struct kvm), GFP_KERNEL_ACCOUNT); } #endif -- cgit v1.2.3 From b9926482ab91cd6dc19e28ae4f0ae98c6c1217d7 Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Tue, 20 Sep 2022 14:02:10 +0800 Subject: kvm_host.h: fix spelling typo in function declaration Make parameters in function declaration consistent with those in function definition for better cscope-ability Signed-off-by: Wang Liang Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20220920060210.4842-1-wangliangzz@126.com Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c2bd15204c3f..a7d6a6111f5e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1353,7 +1353,7 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); int kvm_vcpu_yield_to(struct kvm_vcpu *target); -void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible); +void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode); void kvm_flush_remote_tlbs(struct kvm *kvm); -- cgit v1.2.3 From e251c21372c07694f547afe3c9828f7f6ef01267 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 20 Jan 2023 18:42:48 +0100 Subject: of: Introduce of_translate_dma_region() This function is similar to of_translate_dma_address() but also reads a length in addition to an address from a device tree property. Reviewed-by: Rob Herring Signed-off-by: Thierry Reding Link: https://lore.kernel.org/r/20230120174251.4004100-2-thierry.reding@gmail.com Signed-off-by: Joerg Roedel --- include/linux/of_address.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_address.h b/include/linux/of_address.h index 265f26eeaf6b..376671594746 100644 --- a/include/linux/of_address.h +++ b/include/linux/of_address.h @@ -38,6 +38,8 @@ struct of_pci_range { /* Translate a DMA address from device space to CPU space */ extern u64 of_translate_dma_address(struct device_node *dev, const __be32 *in_addr); +extern const __be32 *of_translate_dma_region(struct device_node *dev, const __be32 *addr, + phys_addr_t *start, size_t *length); #ifdef CONFIG_OF_ADDRESS extern u64 of_translate_address(struct device_node *np, const __be32 *addr); -- cgit v1.2.3 From a5bf3cfce8cb77d9d24613ab52d520896f83dd48 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 20 Jan 2023 18:42:50 +0100 Subject: iommu: Implement of_iommu_get_resv_regions() This is an implementation that IOMMU drivers can use to obtain reserved memory regions from a device tree node. It uses the reserved-memory DT bindings to find the regions associated with a given device. If these regions are marked accordingly, identity mappings will be created for them in the IOMMU domain that the devices will be attached to. Cc: Frank Rowand Cc: devicetree@vger.kernel.org Reviewed-by: Rob Herring Acked-by: Robin Murphy Signed-off-by: Thierry Reding Link: https://lore.kernel.org/r/20230120174251.4004100-4-thierry.reding@gmail.com Signed-off-by: Joerg Roedel --- include/linux/of_iommu.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h index 55c1eb300a86..9a5e6b410dd2 100644 --- a/include/linux/of_iommu.h +++ b/include/linux/of_iommu.h @@ -12,6 +12,9 @@ extern const struct iommu_ops *of_iommu_configure(struct device *dev, struct device_node *master_np, const u32 *id); +extern void of_iommu_get_resv_regions(struct device *dev, + struct list_head *list); + #else static inline const struct iommu_ops *of_iommu_configure(struct device *dev, @@ -21,6 +24,11 @@ static inline const struct iommu_ops *of_iommu_configure(struct device *dev, return NULL; } +static inline void of_iommu_get_resv_regions(struct device *dev, + struct list_head *list) +{ +} + #endif /* CONFIG_OF_IOMMU */ #endif /* __OF_IOMMU_H */ -- cgit v1.2.3 From 1369459b2e219a6f4c861404c4f195cd81dcbb40 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 23 Jan 2023 16:35:54 -0400 Subject: iommu: Add a gfp parameter to iommu_map() The internal mechanisms support this, but instead of exposting the gfp to the caller it wrappers it into iommu_map() and iommu_map_atomic() Fix this instead of adding more variants for GFP_KERNEL_ACCOUNT. Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Reviewed-by: Mathieu Poirier Link: https://lore.kernel.org/r/1-v3-76b587fe28df+6e3-iommu_map_gfp_jgg@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 46e1347bfa22..d2020994f292 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -467,7 +467,7 @@ extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain, extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot); + phys_addr_t paddr, size_t size, int prot, gfp_t gfp); extern int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot); extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, @@ -773,7 +773,7 @@ static inline struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) } static inline int iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot) + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { return -ENODEV; } -- cgit v1.2.3 From 4dc6376af596d9b2a46fa9baf94c9f2fa5a3d246 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 23 Jan 2023 16:35:55 -0400 Subject: iommu: Remove iommu_map_atomic() There is only one call site and it can now just pass the GFP_ATOMIC to the normal iommu_map(). Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v3-76b587fe28df+6e3-iommu_map_gfp_jgg@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d2020994f292..521cd79700f4 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -468,8 +468,6 @@ extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp); -extern int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot); extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size); extern size_t iommu_unmap_fast(struct iommu_domain *domain, @@ -778,13 +776,6 @@ static inline int iommu_map(struct iommu_domain *domain, unsigned long iova, return -ENODEV; } -static inline int iommu_map_atomic(struct iommu_domain *domain, - unsigned long iova, phys_addr_t paddr, - size_t size, int prot) -{ - return -ENODEV; -} - static inline size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { -- cgit v1.2.3 From f2b2c051be6262edc3c6fce50d3d4f01b59ba228 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 23 Jan 2023 16:35:56 -0400 Subject: iommu: Add a gfp parameter to iommu_map_sg() Follow the pattern for iommu_map() and remove iommu_map_sg_atomic(). This allows __iommu_dma_alloc_noncontiguous() to use a GFP_KERNEL allocation here, based on the provided gfp flags. Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/3-v3-76b587fe28df+6e3-iommu_map_gfp_jgg@nvidia.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 521cd79700f4..d5c16dc33c87 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -474,10 +474,8 @@ extern size_t iommu_unmap_fast(struct iommu_domain *domain, unsigned long iova, size_t size, struct iommu_iotlb_gather *iotlb_gather); extern ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, - struct scatterlist *sg, unsigned int nents, int prot); -extern ssize_t iommu_map_sg_atomic(struct iommu_domain *domain, - unsigned long iova, struct scatterlist *sg, - unsigned int nents, int prot); + struct scatterlist *sg, unsigned int nents, + int prot, gfp_t gfp); extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova); extern void iommu_set_fault_handler(struct iommu_domain *domain, iommu_fault_handler_t handler, void *token); @@ -791,14 +789,7 @@ static inline size_t iommu_unmap_fast(struct iommu_domain *domain, static inline ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, - unsigned int nents, int prot) -{ - return -ENODEV; -} - -static inline ssize_t iommu_map_sg_atomic(struct iommu_domain *domain, - unsigned long iova, struct scatterlist *sg, - unsigned int nents, int prot) + unsigned int nents, int prot, gfp_t gfp) { return -ENODEV; } @@ -1109,7 +1100,8 @@ iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, static inline size_t iommu_map_sgtable(struct iommu_domain *domain, unsigned long iova, struct sg_table *sgt, int prot) { - return iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, prot); + return iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, prot, + GFP_KERNEL); } #ifdef CONFIG_IOMMU_DEBUGFS -- cgit v1.2.3 From 9cad72dfc5567c66ab0e5a0a2474c5f36c268694 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Sat, 21 Jan 2023 14:58:46 +0000 Subject: usb: gadget: Add support for RZ/V2M USB3DRD driver The RZ/V2M USB3.1 Gen1 Interface (USB) composed of a USB3.1 Gen1 Dual Role Device controller (USB3DRD), a USB3.1 Gen1 Host controller (USB3HOST), a USB3.1 Gen1 Peripheral controller (USB3PERI). The reset for both host and peri are located in USB3DRD block. The USB3DRD registers are mapped in the AXI address space of the Peripheral module. Add USB3DRD driver to handle reset for both host and peri modules. Signed-off-by: Biju Das Link: https://lore.kernel.org/r/20230121145853.4792-6-biju.das.jz@bp.renesas.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/rzv2m_usb3drd.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 include/linux/usb/rzv2m_usb3drd.h (limited to 'include/linux') diff --git a/include/linux/usb/rzv2m_usb3drd.h b/include/linux/usb/rzv2m_usb3drd.h new file mode 100644 index 000000000000..4cc848de229f --- /dev/null +++ b/include/linux/usb/rzv2m_usb3drd.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __RZV2M_USB3DRD_H +#define __RZV2M_USB3DRD_H + +#include + +struct rzv2m_usb3drd { + void __iomem *reg; + int drd_irq; + struct device *dev; + struct reset_control *drd_rstc; +}; + +#if IS_ENABLED(CONFIG_USB_RZV2M_USB3DRD) +void rzv2m_usb3drd_reset(struct device *dev, bool host); +#else +static inline void rzv2m_usb3drd_reset(struct device *dev, bool host) { } +#endif + +#endif /* __RZV2M_USB3DRD_H */ -- cgit v1.2.3 From be234d00240cbb53d5a1fa0e58b2d14ff121dca2 Mon Sep 17 00:00:00 2001 From: "Nancy.Lin" Date: Fri, 13 Jan 2023 18:44:26 +0800 Subject: soc: mediatek: add mtk-mmsys ethdr and mdp_rdma components Add new mmsys component: ethdr_mixer and mdp_rdma. These components will use in mt8195 vdosys1. Signed-off-by: Nancy.Lin Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: CK Hu Tested-by: AngeloGioacchino Del Regno Tested-by: Bo-Chen Chen Link: https://lore.kernel.org/r/20230113104434.28023-4-nancy.lin@mediatek.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/mtk-mmsys.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/mtk-mmsys.h b/include/linux/soc/mediatek/mtk-mmsys.h index b85f66db33e1..3e4b080627c3 100644 --- a/include/linux/soc/mediatek/mtk-mmsys.h +++ b/include/linux/soc/mediatek/mtk-mmsys.h @@ -36,7 +36,16 @@ enum mtk_ddp_comp_id { DDP_COMPONENT_DSI1, DDP_COMPONENT_DSI2, DDP_COMPONENT_DSI3, + DDP_COMPONENT_ETHDR_MIXER, DDP_COMPONENT_GAMMA, + DDP_COMPONENT_MDP_RDMA0, + DDP_COMPONENT_MDP_RDMA1, + DDP_COMPONENT_MDP_RDMA2, + DDP_COMPONENT_MDP_RDMA3, + DDP_COMPONENT_MDP_RDMA4, + DDP_COMPONENT_MDP_RDMA5, + DDP_COMPONENT_MDP_RDMA6, + DDP_COMPONENT_MDP_RDMA7, DDP_COMPONENT_MERGE0, DDP_COMPONENT_MERGE1, DDP_COMPONENT_MERGE2, -- cgit v1.2.3 From 3dd20b715c4483ae5d8ddb22cbc8e116944094e4 Mon Sep 17 00:00:00 2001 From: "Nancy.Lin" Date: Fri, 13 Jan 2023 18:44:29 +0800 Subject: soc: mediatek: add mtk-mmsys config API for mt8195 vdosys1 Add four mmsys config APIs. The config APIs are used for config mmsys reg. Some mmsys regs need to be set according to the HW engine binding to the mmsys simultaneously. 1. mtk_mmsys_merge_async_config: config merge async width/height. async is used for cross-clock domain synchronization. 2. mtk_mmsys_hdr_confing: config hdr backend async width/height. 3. mtk_mmsys_mixer_in_config and mtk_mmsys_mixer_in_config: config mixer related settings. Signed-off-by: Nancy.Lin Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: CK Hu Tested-by: AngeloGioacchino Del Regno Tested-by: Bo-Chen Chen Link: https://lore.kernel.org/r/20230113104434.28023-7-nancy.lin@mediatek.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/mtk-mmsys.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/mtk-mmsys.h b/include/linux/soc/mediatek/mtk-mmsys.h index 3e4b080627c3..5a6753aed482 100644 --- a/include/linux/soc/mediatek/mtk-mmsys.h +++ b/include/linux/soc/mediatek/mtk-mmsys.h @@ -83,4 +83,13 @@ void mtk_mmsys_ddp_disconnect(struct device *dev, void mtk_mmsys_ddp_dpi_fmt_config(struct device *dev, u32 val); +void mtk_mmsys_merge_async_config(struct device *dev, int idx, int width, int height); + +void mtk_mmsys_hdr_config(struct device *dev, int be_width, int be_height); + +void mtk_mmsys_mixer_in_config(struct device *dev, int idx, bool alpha_sel, u16 alpha, + u8 mode, u32 biwidth); + +void mtk_mmsys_mixer_in_channel_swap(struct device *dev, int idx, bool channel_swap); + #endif /* __MTK_MMSYS_H */ -- cgit v1.2.3 From 8af1f6b5bccb61edc40c3c00cefa7be07c1e5a91 Mon Sep 17 00:00:00 2001 From: "Nancy.Lin" Date: Fri, 13 Jan 2023 18:44:30 +0800 Subject: soc: mediatek: add cmdq support of mtk-mmsys config API for mt8195 vdosys1 Add cmdq support for mtk-mmsys config API. The mmsys config register settings need to take effect with the other HW settings(like OVL_ADAPTOR...) at the same vblanking time. If we use CPU to write the mmsys reg, we can't guarantee all the settings can be written in the same vblanking time. Cmdq is used for this purpose. We prepare all the related HW settings in one cmdq packet. The first command in the packet is "wait stream done", and then following with all the HW settings. After the cmdq packet is flush to GCE HW. The GCE waits for the "stream done event" to coming and then starts flushing all the HW settings. This can guarantee all the settings flush in the same vblanking. Signed-off-by: Nancy.Lin Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: CK Hu Tested-by: AngeloGioacchino Del Regno Tested-by: Bo-Chen Chen Link: https://lore.kernel.org/r/20230113104434.28023-8-nancy.lin@mediatek.com Signed-off-by: Matthias Brugger --- include/linux/soc/mediatek/mtk-mmsys.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/mediatek/mtk-mmsys.h b/include/linux/soc/mediatek/mtk-mmsys.h index 5a6753aed482..dc2963a0a0f7 100644 --- a/include/linux/soc/mediatek/mtk-mmsys.h +++ b/include/linux/soc/mediatek/mtk-mmsys.h @@ -6,6 +6,10 @@ #ifndef __MTK_MMSYS_H #define __MTK_MMSYS_H +#include +#include +#include + enum mtk_ddp_comp_id; struct device; @@ -83,13 +87,16 @@ void mtk_mmsys_ddp_disconnect(struct device *dev, void mtk_mmsys_ddp_dpi_fmt_config(struct device *dev, u32 val); -void mtk_mmsys_merge_async_config(struct device *dev, int idx, int width, int height); +void mtk_mmsys_merge_async_config(struct device *dev, int idx, int width, + int height, struct cmdq_pkt *cmdq_pkt); -void mtk_mmsys_hdr_config(struct device *dev, int be_width, int be_height); +void mtk_mmsys_hdr_config(struct device *dev, int be_width, int be_height, + struct cmdq_pkt *cmdq_pkt); void mtk_mmsys_mixer_in_config(struct device *dev, int idx, bool alpha_sel, u16 alpha, - u8 mode, u32 biwidth); + u8 mode, u32 biwidth, struct cmdq_pkt *cmdq_pkt); -void mtk_mmsys_mixer_in_channel_swap(struct device *dev, int idx, bool channel_swap); +void mtk_mmsys_mixer_in_channel_swap(struct device *dev, int idx, bool channel_swap, + struct cmdq_pkt *cmdq_pkt); #endif /* __MTK_MMSYS_H */ -- cgit v1.2.3 From c24973ed795fec5c12d8a822a0de99a4b7bab394 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Thu, 19 Jan 2023 15:09:19 +0200 Subject: gpu: host1x: Implement job tracking using DMA fences In anticipation of removal of the intr API, implement job tracking using DMA fences instead. The main two things about this are making cdma_update schedule the work since fence completion can now be called from interrupt context, and some complication in ensuring the callback is not running when we free the fence. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- include/linux/host1x.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index dc55d9d3b94f..db6cf6f34361 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -288,8 +289,9 @@ struct host1x_job { u32 syncpt_incrs; u32 syncpt_end; - /* Completion waiter ref */ - void *waiter; + /* Completion fence for job tracking */ + struct dma_fence *fence; + struct dma_fence_cb fence_cb; /* Maximum time to wait for this job */ unsigned int timeout; -- cgit v1.2.3 From d5179020f5ce44fd449790a9c12ef6c1a90a2ca7 Mon Sep 17 00:00:00 2001 From: Mikko Perttunen Date: Thu, 19 Jan 2023 15:09:21 +0200 Subject: gpu: host1x: External timeout/cancellation for fences Currently all fences have a 30 second timeout to ensure they are cleaned up if the fence never completes otherwise. However, this one size fits all solution doesn't actually fit in every case, such as syncpoint waiting where we want to be able to have timeouts longer than 30 seconds. As such, we want to be able to give control over fence cancellation to the caller (and maybe eventually get rid of the internal timeout altogether). Here we add this cancellation mechanism by essentially adding a function for entering the timeout path by function call, and changing the syncpoint wait function to use it. Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- include/linux/host1x.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index db6cf6f34361..9a9de4b97a25 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -222,7 +222,9 @@ u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base); void host1x_syncpt_release_vblank_reservation(struct host1x_client *client, u32 syncpt_id); -struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold); +struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold, + bool timeout); +void host1x_fence_cancel(struct dma_fence *fence); /* * host1x channel -- cgit v1.2.3 From 1ddc7618294084fff8d673217a9479550990ee84 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 23 Jan 2023 12:59:45 +0530 Subject: bus: mhi: ep: Change state_lock to mutex state_lock, the spinlock type is meant to protect race against concurrent MHI state transitions. In mhi_ep_set_m0_state(), while the state_lock is being held, the channels are resumed in mhi_ep_resume_channels() if the previous state was M3. This causes sleeping in atomic bug, since mhi_ep_resume_channels() use mutex internally. Since the state_lock is supposed to be held throughout the state change, it is not ideal to drop the lock before calling mhi_ep_resume_channels(). So to fix this issue, let's change the type of state_lock to mutex. This would also allow holding the lock throughout all state transitions thereby avoiding any potential race. Cc: # 5.19 Fixes: e4b7b5f0f30a ("bus: mhi: ep: Add support for suspending and resuming channels") Reported-by: Dan Carpenter Reviewed-by: Jeffrey Hugo Signed-off-by: Manivannan Sadhasivam --- include/linux/mhi_ep.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mhi_ep.h b/include/linux/mhi_ep.h index 478aece17046..f198a8ac7ee7 100644 --- a/include/linux/mhi_ep.h +++ b/include/linux/mhi_ep.h @@ -70,8 +70,8 @@ struct mhi_ep_db_info { * @cmd_ctx_cache_phys: Physical address of the host command context cache * @chdb: Array of channel doorbell interrupt info * @event_lock: Lock for protecting event rings - * @list_lock: Lock for protecting state transition and channel doorbell lists * @state_lock: Lock for protecting state transitions + * @list_lock: Lock for protecting state transition and channel doorbell lists * @st_transition_list: List of state transitions * @ch_db_list: List of queued channel doorbells * @wq: Dedicated workqueue for handling rings and state changes @@ -117,8 +117,8 @@ struct mhi_ep_cntrl { struct mhi_ep_db_info chdb[4]; struct mutex event_lock; + struct mutex state_lock; spinlock_t list_lock; - spinlock_t state_lock; struct list_head st_transition_list; struct list_head ch_db_list; -- cgit v1.2.3 From 206351c5c2d9906b0304c5b10d5162707d5d4bcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 16 Jan 2023 12:08:35 +0200 Subject: mfd: intel-m10-bmc: Add missing includes to header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit linux/mfd/intel-m10-bmc.h is using: - pr_err(), thus include also linux/dev_printk.h - FIELD_GET(), this include also linux/bitfield.h - GENMASK(), thus include also linux/bits.h Signed-off-by: Ilpo Järvinen Reviewed-by: Russ Weight Reviewed-by: Xu Yilun Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230116100845.6153-2-ilpo.jarvinen@linux.intel.com --- include/linux/mfd/intel-m10-bmc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h index f0044b14136e..0d4db5d9d5af 100644 --- a/include/linux/mfd/intel-m10-bmc.h +++ b/include/linux/mfd/intel-m10-bmc.h @@ -7,6 +7,9 @@ #ifndef __MFD_INTEL_M10_BMC_H #define __MFD_INTEL_M10_BMC_H +#include +#include +#include #include #define M10BMC_LEGACY_BUILD_VER 0x300468 -- cgit v1.2.3 From 16e5d95a5c451027a2e7ef89dd146a1c6c74ca6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 16 Jan 2023 12:08:36 +0200 Subject: mfd: intel-m10-bmc: Create m10bmc_platform_info for type specific info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BMC type specific info is currently set by a switch/case block. The size of this info is expected to grow as more dev types and features are added which would have made the switch block bloaty. Store type specific info into struct and place them into .driver_data instead because it makes things a bit cleaner. The m10bmc_type enum can be dropped as the differentiation is now fully handled by the platform info. The info member of struct intel_m10bmc that is added here is not used yet in this change but its addition logically still belongs to this change. The CSR map change that comes after this change needs to have the info member. Reviewed-by: Russ Weight Reviewed-by: Xu Yilun Signed-off-by: Ilpo Järvinen Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230116100845.6153-3-ilpo.jarvinen@linux.intel.com --- include/linux/mfd/intel-m10-bmc.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h index 0d4db5d9d5af..f418cad88e64 100644 --- a/include/linux/mfd/intel-m10-bmc.h +++ b/include/linux/mfd/intel-m10-bmc.h @@ -121,14 +121,26 @@ /* Address of 4KB inverted bit vector containing staging area FLASH count */ #define STAGING_FLASH_COUNT 0x17ffb000 +/** + * struct intel_m10bmc_platform_info - Intel MAX 10 BMC platform specific information + * @cells: MFD cells + * @n_cells: MFD cells ARRAY_SIZE() + */ +struct intel_m10bmc_platform_info { + struct mfd_cell *cells; + int n_cells; +}; + /** * struct intel_m10bmc - Intel MAX 10 BMC parent driver data structure * @dev: this device * @regmap: the regmap used to access registers by m10bmc itself + * @info: the platform information for MAX10 BMC */ struct intel_m10bmc { struct device *dev; struct regmap *regmap; + const struct intel_m10bmc_platform_info *info; }; /* -- cgit v1.2.3 From 603aed8ffd4c9cb633c05a514cfb5e8ca6b0751d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 16 Jan 2023 12:08:38 +0200 Subject: mfd: intel-m10-bmc: Split into core and spi specific parts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the common code from intel-m10-bmc driver into intel-m10-bmc-core and move the SPI bus parts into an interface specific file. intel-m10-bmc-core becomes the core MFD functions which can support multiple bus interface like SPI bus. Co-developed-by: Tianfei zhang Signed-off-by: Tianfei zhang Reviewed-by: Russ Weight Acked-by: Guenter Roeck # hwmon Reviewed-by: Xu Yilun Signed-off-by: Ilpo Järvinen Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230116100845.6153-5-ilpo.jarvinen@linux.intel.com --- include/linux/mfd/intel-m10-bmc.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h index f418cad88e64..a80deb61b69a 100644 --- a/include/linux/mfd/intel-m10-bmc.h +++ b/include/linux/mfd/intel-m10-bmc.h @@ -174,4 +174,10 @@ m10bmc_raw_read(struct intel_m10bmc *m10bmc, unsigned int addr, #define m10bmc_sys_read(m10bmc, offset, val) \ m10bmc_raw_read(m10bmc, M10BMC_SYS_BASE + (offset), val) +/* + * MAX10 BMC Core support + */ +int m10bmc_dev_init(struct intel_m10bmc *m10bmc, const struct intel_m10bmc_platform_info *info); +extern const struct attribute_group *m10bmc_dev_groups[]; + #endif /* __MFD_INTEL_M10_BMC_H */ -- cgit v1.2.3 From 6052a005caf9cd484fe6368a31c736ac17ebaf66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 16 Jan 2023 12:08:39 +0200 Subject: mfd: intel-m10-bmc: Support multiple CSR register layouts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are different addresses for the MAX10 CSR registers. Introducing a new data structure m10bmc_csr_map for the register definition of MAX10 CSR. Provide the csr_map for SPI. Co-developed-by: Tianfei zhang Signed-off-by: Tianfei zhang Reviewed-by: Russ Weight Reviewed-by: Xu Yilun Signed-off-by: Ilpo Järvinen Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230116100845.6153-6-ilpo.jarvinen@linux.intel.com --- include/linux/mfd/intel-m10-bmc.h | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h index a80deb61b69a..d569a72c7d4f 100644 --- a/include/linux/mfd/intel-m10-bmc.h +++ b/include/linux/mfd/intel-m10-bmc.h @@ -121,14 +121,39 @@ /* Address of 4KB inverted bit vector containing staging area FLASH count */ #define STAGING_FLASH_COUNT 0x17ffb000 +/** + * struct m10bmc_csr_map - Intel MAX 10 BMC CSR register map + */ +struct m10bmc_csr_map { + unsigned int base; + unsigned int build_version; + unsigned int fw_version; + unsigned int mac_low; + unsigned int mac_high; + unsigned int doorbell; + unsigned int auth_result; + unsigned int bmc_prog_addr; + unsigned int bmc_reh_addr; + unsigned int bmc_magic; + unsigned int sr_prog_addr; + unsigned int sr_reh_addr; + unsigned int sr_magic; + unsigned int pr_prog_addr; + unsigned int pr_reh_addr; + unsigned int pr_magic; + unsigned int rsu_update_counter; +}; + /** * struct intel_m10bmc_platform_info - Intel MAX 10 BMC platform specific information * @cells: MFD cells * @n_cells: MFD cells ARRAY_SIZE() + * @csr_map: the mappings for register definition of MAX10 BMC */ struct intel_m10bmc_platform_info { struct mfd_cell *cells; int n_cells; + const struct m10bmc_csr_map *csr_map; }; /** @@ -167,12 +192,17 @@ m10bmc_raw_read(struct intel_m10bmc *m10bmc, unsigned int addr, * The base of the system registers could be configured by HW developers, and * in HW SPEC, the base is not added to the addresses of the system registers. * - * This macro helps to simplify the accessing of the system registers. And if + * This function helps to simplify the accessing of the system registers. And if * the base is reconfigured in HW, SW developers could simply change the - * M10BMC_SYS_BASE accordingly. + * csr_map's base accordingly. */ -#define m10bmc_sys_read(m10bmc, offset, val) \ - m10bmc_raw_read(m10bmc, M10BMC_SYS_BASE + (offset), val) +static inline int m10bmc_sys_read(struct intel_m10bmc *m10bmc, unsigned int offset, + unsigned int *val) +{ + const struct m10bmc_csr_map *csr_map = m10bmc->info->csr_map; + + return m10bmc_raw_read(m10bmc, csr_map->base + offset, val); +} /* * MAX10 BMC Core support -- cgit v1.2.3 From bcababfc60ccc622268b2317a22fabd879fbc0a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 16 Jan 2023 12:08:41 +0200 Subject: mfd: intel-m10-bmc: Prefix register defines with M10BMC_N3000 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prefix the M10BMC defines register defines with M10BMC_N3000 to make it more obvious these are related to some board type. All current non-N3000 board types have the same layout so they'll be reused. The less generic makes it more obvious they're not meant for the generic/interface agnostic code. Reviewed-by: Russ Weight Reviewed-by: Xu Yilun Signed-off-by: Ilpo Järvinen Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230116100845.6153-8-ilpo.jarvinen@linux.intel.com --- include/linux/mfd/intel-m10-bmc.h | 66 +++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h index d569a72c7d4f..470dc3773c01 100644 --- a/include/linux/mfd/intel-m10-bmc.h +++ b/include/linux/mfd/intel-m10-bmc.h @@ -12,38 +12,38 @@ #include #include -#define M10BMC_LEGACY_BUILD_VER 0x300468 -#define M10BMC_SYS_BASE 0x300800 -#define M10BMC_SYS_END 0x300fff -#define M10BMC_FLASH_BASE 0x10000000 -#define M10BMC_FLASH_END 0x1fffffff -#define M10BMC_MEM_END M10BMC_FLASH_END +#define M10BMC_N3000_LEGACY_BUILD_VER 0x300468 +#define M10BMC_N3000_SYS_BASE 0x300800 +#define M10BMC_N3000_SYS_END 0x300fff +#define M10BMC_N3000_FLASH_BASE 0x10000000 +#define M10BMC_N3000_FLASH_END 0x1fffffff +#define M10BMC_N3000_MEM_END M10BMC_N3000_FLASH_END #define M10BMC_STAGING_BASE 0x18000000 #define M10BMC_STAGING_SIZE 0x3800000 /* Register offset of system registers */ -#define NIOS2_FW_VERSION 0x0 -#define M10BMC_MAC_LOW 0x10 -#define M10BMC_MAC_BYTE4 GENMASK(7, 0) -#define M10BMC_MAC_BYTE3 GENMASK(15, 8) -#define M10BMC_MAC_BYTE2 GENMASK(23, 16) -#define M10BMC_MAC_BYTE1 GENMASK(31, 24) -#define M10BMC_MAC_HIGH 0x14 -#define M10BMC_MAC_BYTE6 GENMASK(7, 0) -#define M10BMC_MAC_BYTE5 GENMASK(15, 8) -#define M10BMC_MAC_COUNT GENMASK(23, 16) -#define M10BMC_TEST_REG 0x3c -#define M10BMC_BUILD_VER 0x68 -#define M10BMC_VER_MAJOR_MSK GENMASK(23, 16) -#define M10BMC_VER_PCB_INFO_MSK GENMASK(31, 24) -#define M10BMC_VER_LEGACY_INVALID 0xffffffff +#define NIOS2_N3000_FW_VERSION 0x0 +#define M10BMC_N3000_MAC_LOW 0x10 +#define M10BMC_N3000_MAC_BYTE4 GENMASK(7, 0) +#define M10BMC_N3000_MAC_BYTE3 GENMASK(15, 8) +#define M10BMC_N3000_MAC_BYTE2 GENMASK(23, 16) +#define M10BMC_N3000_MAC_BYTE1 GENMASK(31, 24) +#define M10BMC_N3000_MAC_HIGH 0x14 +#define M10BMC_N3000_MAC_BYTE6 GENMASK(7, 0) +#define M10BMC_N3000_MAC_BYTE5 GENMASK(15, 8) +#define M10BMC_N3000_MAC_COUNT GENMASK(23, 16) +#define M10BMC_N3000_TEST_REG 0x3c +#define M10BMC_N3000_BUILD_VER 0x68 +#define M10BMC_N3000_VER_MAJOR_MSK GENMASK(23, 16) +#define M10BMC_N3000_VER_PCB_INFO_MSK GENMASK(31, 24) +#define M10BMC_N3000_VER_LEGACY_INVALID 0xffffffff /* Secure update doorbell register, in system register region */ -#define M10BMC_DOORBELL 0x400 +#define M10BMC_N3000_DOORBELL 0x400 /* Authorization Result register, in system register region */ -#define M10BMC_AUTH_RESULT 0x404 +#define M10BMC_N3000_AUTH_RESULT 0x404 /* Doorbell register fields */ #define DRBL_RSU_REQUEST BIT(0) @@ -106,20 +106,20 @@ #define RSU_COMPLETE_TIMEOUT_MS (40 * 60 * 1000) /* Addresses for security related data in FLASH */ -#define BMC_REH_ADDR 0x17ffc004 -#define BMC_PROG_ADDR 0x17ffc000 -#define BMC_PROG_MAGIC 0x5746 +#define M10BMC_N3000_BMC_REH_ADDR 0x17ffc004 +#define M10BMC_N3000_BMC_PROG_ADDR 0x17ffc000 +#define M10BMC_N3000_BMC_PROG_MAGIC 0x5746 -#define SR_REH_ADDR 0x17ffd004 -#define SR_PROG_ADDR 0x17ffd000 -#define SR_PROG_MAGIC 0x5253 +#define M10BMC_N3000_SR_REH_ADDR 0x17ffd004 +#define M10BMC_N3000_SR_PROG_ADDR 0x17ffd000 +#define M10BMC_N3000_SR_PROG_MAGIC 0x5253 -#define PR_REH_ADDR 0x17ffe004 -#define PR_PROG_ADDR 0x17ffe000 -#define PR_PROG_MAGIC 0x5250 +#define M10BMC_N3000_PR_REH_ADDR 0x17ffe004 +#define M10BMC_N3000_PR_PROG_ADDR 0x17ffe000 +#define M10BMC_N3000_PR_PROG_MAGIC 0x5250 /* Address of 4KB inverted bit vector containing staging area FLASH count */ -#define STAGING_FLASH_COUNT 0x17ffb000 +#define M10BMC_N3000_STAGING_FLASH_COUNT 0x17ffb000 /** * struct m10bmc_csr_map - Intel MAX 10 BMC CSR register map -- cgit v1.2.3 From 001a734a55d09aa1716eb2cd5ccab8b4d7a068a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 16 Jan 2023 12:08:43 +0200 Subject: fpga: m10bmc-sec: Make rsu status type specific MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The rsu status field moves from the doorbell register to the auth result register in the PMCI implementation of the MAX10 BMC. In order to prepare for that, refactor the sec update driver code to have a type specific ops that provides ->rsu_status(). Co-developed-by: Tianfei zhang Signed-off-by: Tianfei zhang Co-developed-by: Russ Weight Signed-off-by: Russ Weight Signed-off-by: Ilpo Järvinen Acked-by: Xu Yilun Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230116100845.6153-10-ilpo.jarvinen@linux.intel.com --- include/linux/mfd/intel-m10-bmc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h index 470dc3773c01..1f75b33240ad 100644 --- a/include/linux/mfd/intel-m10-bmc.h +++ b/include/linux/mfd/intel-m10-bmc.h @@ -91,7 +91,6 @@ #define HOST_STATUS_ABORT_RSU 0x2 #define rsu_prog(doorbell) FIELD_GET(DRBL_RSU_PROGRESS, doorbell) -#define rsu_stat(doorbell) FIELD_GET(DRBL_RSU_STATUS, doorbell) /* interval 100ms and timeout 5s */ #define NIOS_HANDSHAKE_INTERVAL_US (100 * 1000) -- cgit v1.2.3 From 869b9eddf0b38a22c27a400e2fa849d2ff2aa7e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 16 Jan 2023 12:08:44 +0200 Subject: mfd: intel-m10-bmc: Add PMCI driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the mfd driver for the Platform Management Component Interface (PMCI) based interface of Intel MAX10 BMC controller. PMCI is a software-visible interface, connected to card BMC which provided the basic functionality of read/write BMC register. The access to the register is done indirectly via a hardware controller/bridge that handles read/write/clear commands and acknowledgments for the commands. Previously, intel-m10-bmc provided sysfs under /sys/bus/spi/devices/... which is generalized in this change because not all MAX10 BMC appear under SPI anymore. Co-developed-by: Tianfei zhang Signed-off-by: Tianfei zhang Co-developed-by: Russ Weight Signed-off-by: Russ Weight Co-developed-by: Matthew Gerlach Signed-off-by: Matthew Gerlach Reviewed-by: Xu Yilun Signed-off-by: Ilpo Järvinen Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230116100845.6153-11-ilpo.jarvinen@linux.intel.com --- include/linux/mfd/intel-m10-bmc.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h index 1f75b33240ad..810534b1bd12 100644 --- a/include/linux/mfd/intel-m10-bmc.h +++ b/include/linux/mfd/intel-m10-bmc.h @@ -120,6 +120,34 @@ /* Address of 4KB inverted bit vector containing staging area FLASH count */ #define M10BMC_N3000_STAGING_FLASH_COUNT 0x17ffb000 +#define M10BMC_N6000_INDIRECT_BASE 0x400 + +#define M10BMC_N6000_SYS_BASE 0x0 +#define M10BMC_N6000_SYS_END 0xfff + +#define M10BMC_N6000_DOORBELL 0x1c0 +#define M10BMC_N6000_AUTH_RESULT 0x1c4 + +#define M10BMC_N6000_BUILD_VER 0x0 +#define NIOS2_N6000_FW_VERSION 0x4 +#define M10BMC_N6000_MAC_LOW 0x20 +#define M10BMC_N6000_MAC_HIGH (M10BMC_N6000_MAC_LOW + 4) + +/* Addresses for security related data in FLASH */ +#define M10BMC_N6000_BMC_REH_ADDR 0x7ffc004 +#define M10BMC_N6000_BMC_PROG_ADDR 0x7ffc000 +#define M10BMC_N6000_BMC_PROG_MAGIC 0x5746 + +#define M10BMC_N6000_SR_REH_ADDR 0x7ffd004 +#define M10BMC_N6000_SR_PROG_ADDR 0x7ffd000 +#define M10BMC_N6000_SR_PROG_MAGIC 0x5253 + +#define M10BMC_N6000_PR_REH_ADDR 0x7ffe004 +#define M10BMC_N6000_PR_PROG_ADDR 0x7ffe000 +#define M10BMC_N6000_PR_PROG_MAGIC 0x5250 + +#define M10BMC_N6000_STAGING_FLASH_COUNT 0x7ff5000 + /** * struct m10bmc_csr_map - Intel MAX 10 BMC CSR register map */ -- cgit v1.2.3 From a77ad4bf792652340ab334956e69b46ec2fdaefb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:03 +0100 Subject: of: device: make of_device_uevent_modalias() take a const device * of_device_uevent_modalias() does not modify the device pointer passed to it, so mark it constant. In order to properly do this, a number of busses need to have a modalias function added as they were attempting to just point to of_device_uevent_modalias instead of their bus-specific modalias function. This is fine except if the prototype for a bus and device type modalias function diverges and then problems could happen. To prevent all of that, just wrap the call to of_device_uevent_modalias() directly for each bus and device type individually. Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: Chen-Yu Tsai Cc: Jernej Skrabec Cc: Samuel Holland Cc: David Airlie Cc: Daniel Vetter Cc: Benjamin Herrenschmidt Cc: Rob Herring Cc: Frank Rowand Cc: Liang He Cc: Thomas Gleixner Cc: Christophe JAILLET Cc: Thomas Zimmermann Cc: Dmitry Baryshkov Cc: Douglas Anderson Cc: Lyude Paul Cc: Corentin Labbe Cc: Zou Wei Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-sunxi@lists.linux.dev Cc: dri-devel@lists.freedesktop.org Cc: devicetree@vger.kernel.org Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-2-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/of_device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_device.h b/include/linux/of_device.h index ab7d557d541d..f4b57614979d 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -36,7 +36,7 @@ extern ssize_t of_device_modalias(struct device *dev, char *str, ssize_t len); extern int of_device_request_module(struct device *dev); extern void of_device_uevent(const struct device *dev, struct kobj_uevent_env *env); -extern int of_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env); +extern int of_device_uevent_modalias(const struct device *dev, struct kobj_uevent_env *env); static inline struct device_node *of_cpu_device_node_get(int cpu) { @@ -83,7 +83,7 @@ static inline int of_device_request_module(struct device *dev) return -ENODEV; } -static inline int of_device_uevent_modalias(struct device *dev, +static inline int of_device_uevent_modalias(const struct device *dev, struct kobj_uevent_env *env) { return -ENODEV; -- cgit v1.2.3 From fa838c8ce53714eba788ea877520788bc72c27bd Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:04 +0100 Subject: i3c: move dev_to_i3cdev() to use container_of_const() The driver core is changing to pass some pointers as const, so move dev_to_i3cdev() to use container_of_const() to handle this change. dev_to_i3cdev() now properly keeps the const-ness of the pointer passed into it, while as before it could be lost. Acked-by: Alexandre Belloni Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-3-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/i3c/device.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h index 1c997abe868c..68b558929aec 100644 --- a/include/linux/i3c/device.h +++ b/include/linux/i3c/device.h @@ -186,7 +186,14 @@ static inline struct i3c_driver *drv_to_i3cdrv(struct device_driver *drv) } struct device *i3cdev_to_dev(struct i3c_device *i3cdev); -struct i3c_device *dev_to_i3cdev(struct device *dev); + +/** + * dev_to_i3cdev() - Returns the I3C device containing @dev + * @dev: device object + * + * Return: a pointer to an I3C device object. + */ +#define dev_to_i3cdev(__dev) container_of_const(__dev, struct i3c_device, dev) const struct i3c_device_id * i3c_device_match_id(struct i3c_device *i3cdev, -- cgit v1.2.3 From 34be683add6ccdbc9d589b79077ea7f48ff7ee0c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:05 +0100 Subject: platform/surface: aggregator: move to_ssam_device() to use container_of_const() The driver core is changing to pass some pointers as const, so move to_ssam_device() to use container_of_const() to handle this change. to_ssam_device() now properly keeps the const-ness of the pointer passed into it, while as before it could be lost. Reviewed-by: Maximilian Luz Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-4-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/surface_aggregator/device.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h index 46c45d1b6368..24151a0e2c96 100644 --- a/include/linux/surface_aggregator/device.h +++ b/include/linux/surface_aggregator/device.h @@ -229,10 +229,7 @@ static inline bool is_ssam_device(struct device *d) * Return: Returns a pointer to the &struct ssam_device wrapping the given * device @d. */ -static inline struct ssam_device *to_ssam_device(struct device *d) -{ - return container_of(d, struct ssam_device, dev); -} +#define to_ssam_device(d) container_of_const(d, struct ssam_device, dev) /** * to_ssam_device_driver() - Casts the given device driver to a SSAM client -- cgit v1.2.3 From 49b7fc1c25481c823e391b5617546b1ad4af0852 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:06 +0100 Subject: firewire: move fw_device() and fw_unit() to use container_of_const() The driver core is changing to pass some pointers as const, so move fw_device() and fw_unit() functions to use container_of_const() to handle this change. fw_device() and fw_unit() now properly keeps the const-ness of the pointer passed into it, while as before it could be lost. This also required turning fw_parent_device() into a macro to preserve the const-ness of the pointer passed into it if necessary. Cc: Stefan Richter Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-5-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/firewire.h | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 980019053e54..03ac31ada5e6 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -208,10 +208,7 @@ struct fw_device { struct fw_attribute_group attribute_group; }; -static inline struct fw_device *fw_device(struct device *dev) -{ - return container_of(dev, struct fw_device, device); -} +#define fw_device(dev) container_of_const(dev, struct fw_device, device) static inline int fw_device_is_shutdown(struct fw_device *device) { @@ -229,10 +226,7 @@ struct fw_unit { struct fw_attribute_group attribute_group; }; -static inline struct fw_unit *fw_unit(struct device *dev) -{ - return container_of(dev, struct fw_unit, device); -} +#define fw_unit(dev) container_of_const(dev, struct fw_unit, device) static inline struct fw_unit *fw_unit_get(struct fw_unit *unit) { @@ -246,10 +240,7 @@ static inline void fw_unit_put(struct fw_unit *unit) put_device(&unit->device); } -static inline struct fw_device *fw_parent_device(struct fw_unit *unit) -{ - return fw_device(unit->device.parent); -} +#define fw_parent_device(unit) fw_device(unit->device.parent) struct ieee1394_device_id; -- cgit v1.2.3 From 162736b0d71a9630f7c99dda7cefd5600fa03d69 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:07 +0100 Subject: driver core: make struct device_type.uevent() take a const * The uevent() callback in struct device_type should not be modifying the device that is passed into it, so mark it as a const * and propagate the function signature changes out into all relevant subsystems that use this callback. Cc: Alan Stern Cc: Andreas Noever Cc: Andy Shevchenko Cc: Bard Liao Cc: Chaitanya Kulkarni Cc: Dan Williams Cc: Dmitry Torokhov Cc: Frank Rowand Cc: Ira Weiny Cc: Jason Gunthorpe Cc: Jens Axboe Cc: Jilin Yuan Cc: Jiri Slaby Cc: Len Brown Cc: Mark Gross Cc: "Martin K. Petersen" Cc: "Matthew Wilcox (Oracle)" Cc: Maximilian Luz Cc: Michael Jamet Cc: Ming Lei Cc: Pierre-Louis Bossart Cc: Rob Herring Cc: Sakari Ailus Cc: Sanyog Kale Cc: Sean Young Cc: Stefan Richter Cc: Thomas Gleixner Cc: Won Chung Cc: Yehezkel Bernat Acked-by: Rafael J. Wysocki Acked-by: Mika Westerberg # for Thunderbolt Acked-by: Mauro Carvalho Chehab Acked-by: Alexandre Belloni Acked-by: Heikki Krogerus Acked-by: Wolfram Sang Acked-by: Vinod Koul Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-6-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/acpi.h | 4 ++-- include/linux/device.h | 2 +- include/linux/i3c/device.h | 2 +- include/linux/soundwire/sdw_type.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 5e6a876e17ba..564b62f13bd0 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -723,7 +723,7 @@ const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids, const void *acpi_device_get_match_data(const struct device *dev); extern bool acpi_driver_match_device(struct device *dev, const struct device_driver *drv); -int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *); +int acpi_device_uevent_modalias(const struct device *, struct kobj_uevent_env *); int acpi_device_modalias(struct device *, char *, int); struct platform_device *acpi_create_platform_device(struct acpi_device *, @@ -958,7 +958,7 @@ static inline union acpi_object *acpi_evaluate_dsm(acpi_handle handle, return NULL; } -static inline int acpi_device_uevent_modalias(struct device *dev, +static inline int acpi_device_uevent_modalias(const struct device *dev, struct kobj_uevent_env *env) { return -ENODEV; diff --git a/include/linux/device.h b/include/linux/device.h index 44e3acae7b36..dad0614aad96 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -88,7 +88,7 @@ int subsys_virtual_register(struct bus_type *subsys, struct device_type { const char *name; const struct attribute_group **groups; - int (*uevent)(struct device *dev, struct kobj_uevent_env *env); + int (*uevent)(const struct device *dev, struct kobj_uevent_env *env); char *(*devnode)(struct device *dev, umode_t *mode, kuid_t *uid, kgid_t *gid); void (*release)(struct device *dev); diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h index 68b558929aec..ce115ef08fec 100644 --- a/include/linux/i3c/device.h +++ b/include/linux/i3c/device.h @@ -303,7 +303,7 @@ int i3c_device_do_priv_xfers(struct i3c_device *dev, int i3c_device_do_setdasa(struct i3c_device *dev); -void i3c_device_get_info(struct i3c_device *dev, struct i3c_device_info *info); +void i3c_device_get_info(const struct i3c_device *dev, struct i3c_device_info *info); struct i3c_ibi_payload { unsigned int len; diff --git a/include/linux/soundwire/sdw_type.h b/include/linux/soundwire/sdw_type.h index 52eb66cd11bc..d8c27f1e5559 100644 --- a/include/linux/soundwire/sdw_type.h +++ b/include/linux/soundwire/sdw_type.h @@ -21,7 +21,7 @@ static inline int is_sdw_slave(const struct device *dev) int __sdw_register_driver(struct sdw_driver *drv, struct module *owner); void sdw_unregister_driver(struct sdw_driver *drv); -int sdw_slave_uevent(struct device *dev, struct kobj_uevent_env *env); +int sdw_slave_uevent(const struct device *dev, struct kobj_uevent_env *env); /** * module_sdw_driver() - Helper macro for registering a Soundwire driver -- cgit v1.2.3 From a9b12f8b4e3309c4c25d39e7ab818943b9c48c1c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:08 +0100 Subject: driver core: make struct device_type.devnode() take a const * The devnode() callback in struct device_type should not be modifying the device that is passed into it, so mark it as a const * and propagate the function signature changes out into all relevant subsystems that use this callback. Cc: Jens Axboe Cc: Alison Schofield Cc: Vishal Verma Cc: Ira Weiny Cc: Ben Widawsky Cc: Jeremy Kerr Cc: Joel Stanley Cc: Alistar Popple Cc: Eddie James Cc: Jonathan Cameron Cc: Jilin Yuan Cc: Heikki Krogerus Cc: Alan Stern Cc: Andy Shevchenko Cc: Thomas Gleixner Cc: Jason Gunthorpe Cc: "Rafael J. Wysocki" Cc: Won Chung Acked-by: Dan Williams Acked-by: Alexander Shishkin Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-7-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index dad0614aad96..318861000b83 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -89,7 +89,7 @@ struct device_type { const char *name; const struct attribute_group **groups; int (*uevent)(const struct device *dev, struct kobj_uevent_env *env); - char *(*devnode)(struct device *dev, umode_t *mode, + char *(*devnode)(const struct device *dev, umode_t *mode, kuid_t *uid, kgid_t *gid); void (*release)(struct device *dev); -- cgit v1.2.3 From 42bb5be8936f40a1d0e618766645e7fd0cbfe591 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:09 +0100 Subject: driver core: device_get_devnode() should take a const * device_get_devnode() should take a constant * to struct device as it does not modify it in any way, so modify the function definition to do this and move it out of device.h as it does not need to be exposed to the whole kernel tree. Cc: Thomas Gleixner Cc: Jason Gunthorpe Cc: Ira Weiny Cc: Dan Williams Cc: Won Chung Acked-by: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-8-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 318861000b83..90aaf308c259 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -907,8 +907,6 @@ int device_rename(struct device *dev, const char *new_name); int device_move(struct device *dev, struct device *new_parent, enum dpm_order dpm_order); int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid); -const char *device_get_devnode(struct device *dev, umode_t *mode, kuid_t *uid, - kgid_t *gid, const char **tmp); int device_is_dependent(struct device *dev, void *target); static inline bool device_supports_offline(struct device *dev) -- cgit v1.2.3 From 8afdae83e318f3cfb6eb1ee2ce289356936a663c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:12 +0100 Subject: drivers: hv: move device_to_hv_device to use container_of_const() The driver core is changing to pass some pointers as const, so move device_to_hv_device() to use container_of_const() to handle this change. device_to_hv_device() now properly keeps the const-ness of the pointer passed into it, while as before it could be lost. Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Dexuan Cui Acked-by: Wei Liu Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-11-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 85f7c5a63aa6..8430e27f3c3f 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1309,10 +1309,7 @@ struct hv_device { }; -static inline struct hv_device *device_to_hv_device(struct device *d) -{ - return container_of(d, struct hv_device, device); -} +#define device_to_hv_device(d) container_of_const(d, struct hv_device, device) static inline struct hv_driver *drv_to_hv_drv(struct device_driver *d) { -- cgit v1.2.3 From af3011b6637cd7e94b2420455d41ba9e66c03aed Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:13 +0100 Subject: virtio: move dev_to_virtio() to use container_of_const() The driver core is changing to pass some pointers as const, so move dev_to_virtio() to use container_of_const() to handle this change. dev_to_virtio() now properly keeps the const-ness of the pointer passed into it, while as before it could be lost. Cc: "Michael S. Tsirkin" Cc: Jason Wang Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-12-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/virtio.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index dcab9c7e8784..2b472514c49b 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -127,10 +127,7 @@ struct virtio_device { void *priv; }; -static inline struct virtio_device *dev_to_virtio(struct device *_dev) -{ - return container_of(_dev, struct virtio_device, dev); -} +#define dev_to_virtio(_dev) container_of_const(_dev, struct virtio_device, dev) void virtio_add_status(struct virtio_device *dev, unsigned int status); int register_virtio_device(struct virtio_device *dev); -- cgit v1.2.3 From deaf8b21c803b3932071a7cf9b11f36e13000f5c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:15 +0100 Subject: mcb: move to_mcb_device() to use container_of_const() The driver core is changing to pass some pointers as const, so move to_mcb_device() to use container_of_const() to handle this change. to_mcb_device() now properly keeps the const-ness of the pointer passed into it, while as before it could be lost. Cc: Johannes Thumshirn Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-14-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/mcb.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mcb.h b/include/linux/mcb.h index f6efb16f9d1b..1e5893138afe 100644 --- a/include/linux/mcb.h +++ b/include/linux/mcb.h @@ -76,10 +76,7 @@ struct mcb_device { struct device *dma_dev; }; -static inline struct mcb_device *to_mcb_device(struct device *dev) -{ - return container_of(dev, struct mcb_device, dev); -} +#define to_mcb_device(__dev) container_of_const(__dev, struct mcb_device, dev) /** * struct mcb_driver - MEN Chameleon Bus device driver -- cgit v1.2.3 From 2a81ada32f0e584fc0c943e0d3a8c9f4fae411d6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:17 +0100 Subject: driver core: make struct bus_type.uevent() take a const * The uevent() callback in struct bus_type should not be modifying the device that is passed into it, so mark it as a const * and propagate the function signature changes out into all relevant subsystems that use this callback. Acked-by: Rafael J. Wysocki Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-16-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 2 +- include/linux/spi/spi.h | 2 +- include/linux/ssb/ssb.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 7b4a48b5159b..87e4d029c915 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -90,7 +90,7 @@ struct bus_type { const struct attribute_group **drv_groups; int (*match)(struct device *dev, struct device_driver *drv); - int (*uevent)(struct device *dev, struct kobj_uevent_env *env); + int (*uevent)(const struct device *dev, struct kobj_uevent_env *env); int (*probe)(struct device *dev); void (*sync_state)(struct device *dev); void (*remove)(struct device *dev); diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 9a32495fbb1f..2edf7f09239e 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -223,7 +223,7 @@ struct spi_device { static_assert((SPI_MODE_KERNEL_MASK & SPI_MODE_USER_MASK) == 0, "SPI_MODE_USER_MASK & SPI_MODE_KERNEL_MASK must not overlap"); -static inline struct spi_device *to_spi_device(struct device *dev) +static inline struct spi_device *to_spi_device(const struct device *dev) { return dev ? container_of(dev, struct spi_device, dev) : NULL; } diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index f9b53acb4e02..1f326da289d3 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -285,7 +285,7 @@ struct ssb_device { /* Go from struct device to struct ssb_device. */ static inline -struct ssb_device * dev_to_ssb_dev(struct device *dev) +struct ssb_device * dev_to_ssb_dev(const struct device *dev) { struct __ssb_dev_wrapper *wrap; wrap = container_of(dev, struct __ssb_dev_wrapper, dev); -- cgit v1.2.3 From 56d5f362ad0f8f60bc7c6fd5d7bc2b528d6963f5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 11 Jan 2023 12:30:18 +0100 Subject: kobject: kset_uevent_ops: make uevent() callback take a const * The uevent() callback in struct kset_uevent_ops does not modify the kobject passed into it, so make the pointer const to enforce this restriction. When doing so, fix up all existing uevent() callbacks to have the correct signature to preserve the build. Cc: Christine Caulfield Cc: David Teigland Cc: Bob Peterson Cc: Andreas Gruenbacher Acked-by: Rafael J. Wysocki Acked-by: Hans de Goede Link: https://lore.kernel.org/r/20230111113018.459199-17-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 58a5b75612e3..bdab370a24f4 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -137,7 +137,7 @@ struct kobj_uevent_env { struct kset_uevent_ops { int (* const filter)(const struct kobject *kobj); const char *(* const name)(const struct kobject *kobj); - int (* const uevent)(struct kobject *kobj, struct kobj_uevent_env *env); + int (* const uevent)(const struct kobject *kobj, struct kobj_uevent_env *env); }; struct kobj_attribute { -- cgit v1.2.3 From 37fe46051dc94c589b616018ba9d2fd4bacfbd8a Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 13 Jan 2023 20:13:43 +0300 Subject: dmaengine: Fix dma_slave_config.dst_addr description The dst_addr member of the dma_slave_config structure has been mistakenly marked as ignored if the *source* address belongs to the memory. That is relevant to the src_addr field of the structure, while the dst_addr field contains a destination device address that should be ignored if the destination is the CPU memory. Correct the @dst_addr description accordingly. Link: https://lore.kernel.org/r/20230113171409.30470-2-Sergey.Semin@baikalelectronics.ru Tested-by: Manivannan Sadhasivam Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam Acked-by: Vinod Koul --- include/linux/dmaengine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index c923f4e60f24..0c020682d894 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -394,7 +394,7 @@ enum dma_slave_buswidth { * should be read (RX), if the source is memory this argument is * ignored. * @dst_addr: this is the physical address where DMA slave data - * should be written (TX), if the source is memory this argument + * should be written (TX), if the destination is memory this argument * is ignored. * @src_addr_width: this is the width in bytes of the source (RX) * register where DMA data shall be read. If the source -- cgit v1.2.3 From 002bbaa2f60e07d465f92a163365569481d34108 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 13 Jan 2023 20:13:45 +0300 Subject: dmaengine: dw-edma: Convert ll/dt phys address to PCI bus/DMA address The dw_edma_region.paddr field should be a memory base address visible by the DW eDMA controller. If the DMA engine is embedded in the DW PCIe Host/Endpoint controller, the address should belong to the Local CPU/ Application memory. If eDMA is remotely accessible across the PCI bus via PCI memory IOs, the address should be part of the PCI bus memory space. The latter case hasn't been well covered in the corresponding glue-driver. Since pci_dev.resource[] contains resources defined in the CPU memory space, they need to be converted to the PCI bus address space. Convert the LL, DT and CSRs PCI memory ranges with pci_bus_address(). In addition, extend the dw_edma_region.paddr field size. The field normally contains a memory range base address to be set in the DW eDMA Linked-List pointer register or as a base address of the Linked-List data buffer. In accordance with [1] the LL range is supposed to be created in the Local CPU/Application memory, but depending on the DW eDMA utilization the memory can be created as a part of the PCI bus address space (as in the case of the DW PCIe Endpoint prototype kit). In the former case dw_edma_region.paddr should be a dma_addr_t, while in the latter one it should be a pci_bus_addr_t. Since the corresponding CSRs are always 64 bits wide, convert dw_edma_region.paddr to be u64, and let the client make sure it has a valid address visible by the DW eDMA controller. For instance, the DW eDMA PCIe glue-driver initializes the field with addresses from the PCI bus memory space. [1] DesignWare Cores PCI Express Controller Databook - DWC PCIe Root Port, v.5.40a, March 2019, p.1103 Link: https://lore.kernel.org/r/20230113171409.30470-4-Sergey.Semin@baikalelectronics.ru Fixes: 41aaff2a2ac0 ("dmaengine: Add Synopsys eDMA IP PCIe glue-logic") Tested-by: Manivannan Sadhasivam Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam Acked-by: Vinod Koul --- include/linux/dma/edma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma/edma.h b/include/linux/dma/edma.h index 7d8062e9c544..a864978ddd27 100644 --- a/include/linux/dma/edma.h +++ b/include/linux/dma/edma.h @@ -18,7 +18,7 @@ struct dw_edma; struct dw_edma_region { - phys_addr_t paddr; + u64 paddr; void __iomem *vaddr; size_t sz; }; -- cgit v1.2.3 From 993d57bbaacf7ad7a742e51cb717e21e0eb4ef72 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 13 Jan 2023 20:13:49 +0300 Subject: dmaengine: dw-edma: Add CPU to PCI bus address translation Since 9575632052ba ("dmaengine: make slave address physical"), the source and destination addresses of the DMA slave device have been converted to physical addresses in the CPU address space. It's the DMA device driver's responsibility to convert them to the DMA bus address space. In case of the DW eDMA device, the source or destination peripheral (slave) devices reside in PCI bus space. Thus we need to perform the PCI Host/Endpoint windows- based (i.e. DT "ranges" property) address translation; otherwise the eDMA transactions won't work as expected (or can be even harmful) if the CPU and PCI address spaces don't match. Note 1: Even though the DMA interleaved template has both source and destination addresses declared as dma_addr_t, only the CPU memory range should be mapped to be seen by the DMA device since it's a subject of the DMA getting towards the system side. The device part must not be mapped since the slave device resides in the PCI bus space, which isn't affected by IOMMUs or iATU translations. DW PCIe eDMA generates corresponding MWr/MRd TLPs on its own. Note 2: This functionality is mainly required for the remote eDMA setup since the CPU address must be manually translated into the PCI bus space before being written to LLI.{SAR,DAR}. If eDMA is embedded in the locally accessible DW PCIe Root Port/Endpoint, software-based translation isn't required since hardware will translate it via the Outbound iATU as long as the DMA_BYPASS flag is cleared. If DMA_BYPASS is set or there is no Outbound iATU entry that contains the SAR or DAR (for Read and Write channel respectively), there won't be any translation performed but DMA will proceed with the corresponding source/destination address as-is. Link: https://lore.kernel.org/r/20230113171409.30470-8-Sergey.Semin@baikalelectronics.ru Tested-by: Manivannan Sadhasivam Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam Acked-by: Vinod Koul --- include/linux/dma/edma.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma/edma.h b/include/linux/dma/edma.h index a864978ddd27..07a23ecc834f 100644 --- a/include/linux/dma/edma.h +++ b/include/linux/dma/edma.h @@ -23,8 +23,23 @@ struct dw_edma_region { size_t sz; }; +/** + * struct dw_edma_core_ops - platform-specific eDMA methods + * @irq_vector: Get IRQ number of the passed eDMA channel. Note the + * method accepts the channel id in the end-to-end + * numbering with the eDMA write channels being placed + * first in the row. + * @pci_address: Get PCIe bus address corresponding to the passed CPU + * address. Note there is no need in specifying this + * function if the address translation is performed by + * the DW PCIe RP/EP controller with the DW eDMA device in + * subject and DMA_BYPASS isn't set for all the outbound + * iATU windows. That will be done by the controller + * automatically. + */ struct dw_edma_core_ops { int (*irq_vector)(struct device *dev, unsigned int nr); + u64 (*pci_address)(struct device *dev, phys_addr_t cpu_addr); }; enum dw_edma_map_format { -- cgit v1.2.3 From 18b1746bddf5e7f6b2618966596d9517172a5cd7 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Thu, 26 Jan 2023 00:28:03 +0200 Subject: RDMA/mlx5: Remove implicit ODP cache entry Implicit ODP mkey doesn't have unique properties. It shares the same properties as the order 18 cache entry. There is no need to devote a special entry for that. Link: https://lore.kernel.org/r/20230125222807.6921-3-michaelgur@nvidia.com Signed-off-by: Aharon Landau Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/driver.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d476255c9a3f..f79c20d50eb4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -734,7 +734,6 @@ enum { enum { MKEY_CACHE_LAST_STD_ENTRY = 20, - MLX5_IMR_MTT_CACHE_ENTRY, MLX5_IMR_KSM_CACHE_ENTRY, MAX_MKEY_CACHE_ENTRIES }; -- cgit v1.2.3 From da27f796a832122ee533c7685438dad1c4e338dd Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Fri, 27 Jan 2023 13:46:51 -0500 Subject: ipc,namespace: batch free ipc_namespace structures Instead of waiting for an RCU grace period between each ipc_namespace structure that is being freed, wait an RCU grace period for every batch of ipc_namespace structures. Thanks to Al Viro for the suggestion of the helper function. This speeds up the run time of the test case that allocates ipc_namespaces in a loop from 6 minutes, to a little over 1 second: real 0m1.192s user 0m0.038s sys 0m1.152s Signed-off-by: Rik van Riel Reported-by: Chris Mason Tested-by: Giuseppe Scrivano Suggested-by: Al Viro Signed-off-by: Al Viro --- include/linux/mount.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index 62475996fac6..ec55a031aa8c 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -88,6 +88,7 @@ extern void mnt_drop_write(struct vfsmount *mnt); extern void mnt_drop_write_file(struct file *file); extern void mntput(struct vfsmount *mnt); extern struct vfsmount *mntget(struct vfsmount *mnt); +extern void mnt_make_shortterm(struct vfsmount *mnt); extern struct vfsmount *mnt_clone_internal(const struct path *path); extern bool __mnt_is_readonly(struct vfsmount *mnt); extern bool mnt_may_suid(struct vfsmount *mnt); -- cgit v1.2.3 From d35d0c9de762e003129dfc1240df7152a4bc31e8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 3 Jan 2023 15:12:46 +0200 Subject: leds: Add missing includes and forward declarations in leds.h Add missing includes and forward declarations to leds.h. While at it, replace headers by forward declarations and vise versa. Signed-off-by: Andy Shevchenko Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230103131256.33894-2-andriy.shevchenko@linux.intel.com --- include/linux/leds.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 31cb74b90ffc..4df46a85e5c2 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -10,17 +10,21 @@ #include #include -#include -#include #include #include #include #include +#include #include -struct device; -struct led_pattern; +struct attribute_group; struct device_node; +struct fwnode_handle; +struct gpio_desc; +struct kernfs_node; +struct led_pattern; +struct platform_device; + /* * LED Core */ @@ -529,7 +533,6 @@ struct led_properties { const char *label; }; -struct gpio_desc; typedef int (*gpio_blink_set_t)(struct gpio_desc *desc, int state, unsigned long *delay_on, unsigned long *delay_off); -- cgit v1.2.3 From 156a5bb89ca6f3edd2be0bfd0de15e575442927e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 3 Jan 2023 15:12:47 +0200 Subject: leds: Move led_init_default_state_get() to the global header There are users inside and outside LED framework that have implemented a local copy of led_init_default_state_get(). In order to deduplicate that, as the first step move the declaration from LED header to the global one. Signed-off-by: Andy Shevchenko Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230103131256.33894-3-andriy.shevchenko@linux.intel.com --- include/linux/leds.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 4df46a85e5c2..d71201a968b6 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -82,6 +82,8 @@ struct led_init_data { bool devname_mandatory; }; +enum led_default_state led_init_default_state_get(struct fwnode_handle *fwnode); + struct led_hw_trigger_type { int dummy; }; -- cgit v1.2.3 From acf63c458b55ecfb2015b33dd6ba3cc8fbc1c5d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 16 Jan 2023 12:08:45 +0200 Subject: fpga: m10bmc-sec: Add support for N6000 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for PMCI-based flash access path and N6000 sec update support. Access to flash staging area is different for N6000 from that of the SPI interfaced counterparts. Introduce intel_m10bmc_flash_bulk_ops to allow interface specific differentiations for the flash access path for sec update and make m10bmc_sec_read/write() in sec update driver to use the new operations. The .flash_mutex serializes read/read. Flash update (erase+write) must use ->lock/unlock_write() to prevent reads during update (reads would timeout on setting flash MUX as BMC will prevent it). Create a type specific RSU status reg handler for N6000 because the field has moved from doorbell to auth result register. If a failure is detected while altering the flash MUX, it seems safer to try to set it back and doesn't seem harmful. Likely there are enough troubles in that case anyway so setting it back fails too (which is harmless sans the small extra delay) or just confirms that the value wasn't changed. Co-developed-by: Tianfei zhang Signed-off-by: Tianfei zhang Co-developed-by: Russ Weight Signed-off-by: Russ Weight Acked-by: Xu Yilun Signed-off-by: Ilpo Järvinen Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230116100845.6153-12-ilpo.jarvinen@linux.intel.com --- include/linux/mfd/intel-m10-bmc.h | 51 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/intel-m10-bmc.h b/include/linux/mfd/intel-m10-bmc.h index 810534b1bd12..1812ebfa11a8 100644 --- a/include/linux/mfd/intel-m10-bmc.h +++ b/include/linux/mfd/intel-m10-bmc.h @@ -127,6 +127,7 @@ #define M10BMC_N6000_DOORBELL 0x1c0 #define M10BMC_N6000_AUTH_RESULT 0x1c4 +#define AUTH_RESULT_RSU_STATUS GENMASK(23, 16) #define M10BMC_N6000_BUILD_VER 0x0 #define NIOS2_N6000_FW_VERSION 0x4 @@ -148,6 +149,35 @@ #define M10BMC_N6000_STAGING_FLASH_COUNT 0x7ff5000 +#define M10BMC_N6000_FLASH_MUX_CTRL 0x1d0 +#define M10BMC_N6000_FLASH_MUX_SELECTION GENMASK(2, 0) +#define M10BMC_N6000_FLASH_MUX_IDLE 0 +#define M10BMC_N6000_FLASH_MUX_NIOS 1 +#define M10BMC_N6000_FLASH_MUX_HOST 2 +#define M10BMC_N6000_FLASH_MUX_PFL 4 +#define get_flash_mux(mux) FIELD_GET(M10BMC_N6000_FLASH_MUX_SELECTION, mux) + +#define M10BMC_N6000_FLASH_NIOS_REQUEST BIT(4) +#define M10BMC_N6000_FLASH_HOST_REQUEST BIT(5) + +#define M10BMC_N6000_FLASH_CTRL 0x40 +#define M10BMC_N6000_FLASH_WR_MODE BIT(0) +#define M10BMC_N6000_FLASH_RD_MODE BIT(1) +#define M10BMC_N6000_FLASH_BUSY BIT(2) +#define M10BMC_N6000_FLASH_FIFO_SPACE GENMASK(13, 4) +#define M10BMC_N6000_FLASH_READ_COUNT GENMASK(25, 16) + +#define M10BMC_N6000_FLASH_ADDR 0x44 +#define M10BMC_N6000_FLASH_FIFO 0x800 +#define M10BMC_N6000_READ_BLOCK_SIZE 0x800 +#define M10BMC_N6000_FIFO_MAX_BYTES 0x800 +#define M10BMC_N6000_FIFO_WORD_SIZE 4 +#define M10BMC_N6000_FIFO_MAX_WORDS (M10BMC_N6000_FIFO_MAX_BYTES / \ + M10BMC_N6000_FIFO_WORD_SIZE) + +#define M10BMC_FLASH_INT_US 1 +#define M10BMC_FLASH_TIMEOUT_US 10000 + /** * struct m10bmc_csr_map - Intel MAX 10 BMC CSR register map */ @@ -183,16 +213,37 @@ struct intel_m10bmc_platform_info { const struct m10bmc_csr_map *csr_map; }; +struct intel_m10bmc; + +/** + * struct intel_m10bmc_flash_bulk_ops - device specific operations for flash R/W + * @read: read a block of data from flash + * @write: write a block of data to flash + * @lock_write: locks flash access for erase+write + * @unlock_write: unlock flash access + * + * Write must be protected with @lock_write and @unlock_write. While the flash + * is locked, @read returns -EBUSY. + */ +struct intel_m10bmc_flash_bulk_ops { + int (*read)(struct intel_m10bmc *m10bmc, u8 *buf, u32 addr, u32 size); + int (*write)(struct intel_m10bmc *m10bmc, const u8 *buf, u32 offset, u32 size); + int (*lock_write)(struct intel_m10bmc *m10bmc); + void (*unlock_write)(struct intel_m10bmc *m10bmc); +}; + /** * struct intel_m10bmc - Intel MAX 10 BMC parent driver data structure * @dev: this device * @regmap: the regmap used to access registers by m10bmc itself * @info: the platform information for MAX10 BMC + * @flash_bulk_ops: optional device specific operations for flash R/W */ struct intel_m10bmc { struct device *dev; struct regmap *regmap; const struct intel_m10bmc_platform_info *info; + const struct intel_m10bmc_flash_bulk_ops *flash_bulk_ops; }; /* -- cgit v1.2.3 From 79729f26b074a5d2722c27fa76cc45ef721e65cd Mon Sep 17 00:00:00 2001 From: Evgeniy Baskov Date: Tue, 22 Nov 2022 14:12:31 +0300 Subject: efi/libstub: Add memory attribute protocol definitions EFI_MEMORY_ATTRIBUTE_PROTOCOL servers as a better alternative to DXE services for setting memory attributes in EFI Boot Services environment. This protocol is better since it is a part of UEFI specification itself and not UEFI PI specification like DXE services. Add EFI_MEMORY_ATTRIBUTE_PROTOCOL definitions. Support mixed mode properly for its calls. Tested-by: Mario Limonciello Signed-off-by: Evgeniy Baskov Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 1a1adc8d3ba3..507390dda8b9 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -391,6 +391,7 @@ void efi_native_runtime_setup(void); #define EFI_RT_PROPERTIES_TABLE_GUID EFI_GUID(0xeb66918a, 0x7eef, 0x402a, 0x84, 0x2e, 0x93, 0x1d, 0x21, 0xc3, 0x8a, 0xe9) #define EFI_DXE_SERVICES_TABLE_GUID EFI_GUID(0x05ad34ba, 0x6f02, 0x4214, 0x95, 0x2e, 0x4d, 0xa0, 0x39, 0x8e, 0x2b, 0xb9) #define EFI_SMBIOS_PROTOCOL_GUID EFI_GUID(0x03583ff6, 0xcb36, 0x4940, 0x94, 0x7e, 0xb9, 0xb3, 0x9f, 0x4a, 0xfa, 0xf7) +#define EFI_MEMORY_ATTRIBUTE_PROTOCOL_GUID EFI_GUID(0xf4560cf6, 0x40ec, 0x4b4a, 0xa1, 0x92, 0xbf, 0x1d, 0x57, 0xd0, 0xb1, 0x89) #define EFI_IMAGE_SECURITY_DATABASE_GUID EFI_GUID(0xd719b2cb, 0x3d3a, 0x4596, 0xa3, 0xbc, 0xda, 0xd0, 0x0e, 0x67, 0x65, 0x6f) #define EFI_SHIM_LOCK_GUID EFI_GUID(0x605dab50, 0xe046, 0x4300, 0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23) -- cgit v1.2.3 From dbd7a2a941b8cbf9e5f79a777ed9fe0090eebb61 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 23 Jan 2023 07:37:45 -0800 Subject: PM / devfreq: Fix build issues with devfreq disabled The existing no-op shims for when PM_DEVFREQ (or an individual governor) only do half the job. The governor specific config/tuning structs need to be available to avoid compile errors in drivers using devfreq. Fixes: 6563f60f14cb ("drm/msm/gpu: Add devfreq tuning debugfs") Signed-off-by: Rob Clark Acked-by: MyungJoo Ham Acked-by: Chanwoo Choi Patchwork: https://patchwork.freedesktop.org/patch/519801/ Link: https://lore.kernel.org/r/20230123153745.3185032-1-robdclark@gmail.com --- include/linux/devfreq.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 4dc7cda4fd46..7fd704bb8f3d 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -273,8 +273,8 @@ void devm_devfreq_unregister_notifier(struct device *dev, struct devfreq *devfreq_get_devfreq_by_node(struct device_node *node); struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, const char *phandle_name, int index); +#endif /* CONFIG_PM_DEVFREQ */ -#if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND) /** * struct devfreq_simple_ondemand_data - ``void *data`` fed to struct devfreq * and devfreq_add_device @@ -292,9 +292,7 @@ struct devfreq_simple_ondemand_data { unsigned int upthreshold; unsigned int downdifferential; }; -#endif -#if IS_ENABLED(CONFIG_DEVFREQ_GOV_PASSIVE) enum devfreq_parent_dev_type { DEVFREQ_PARENT_DEV, CPUFREQ_PARENT_DEV, @@ -337,9 +335,8 @@ struct devfreq_passive_data { struct notifier_block nb; struct list_head cpu_data_list; }; -#endif -#else /* !CONFIG_PM_DEVFREQ */ +#if !defined(CONFIG_PM_DEVFREQ) static inline struct devfreq *devfreq_add_device(struct device *dev, struct devfreq_dev_profile *profile, const char *governor_name, -- cgit v1.2.3 From b3c650ad9bb88ecf36b9aeacf9e7eb7478258da7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 21 Nov 2022 17:15:41 +0100 Subject: pwm: Move pwm_capture() dummy to restore order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the dummy pwm_capture(), to make the declaration order of all dummies to match the declaration order of the real functions. Signed-off-by: Geert Uytterhoeven Acked-by: Uwe Kleine-König Signed-off-by: Thierry Reding --- include/linux/pwm.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 161e91167b9c..7b7b93b6fb81 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -440,13 +440,6 @@ static inline int pwm_config(struct pwm_device *pwm, int duty_ns, return -EINVAL; } -static inline int pwm_capture(struct pwm_device *pwm, - struct pwm_capture *result, - unsigned long timeout) -{ - return -EINVAL; -} - static inline int pwm_enable(struct pwm_device *pwm) { might_sleep(); @@ -458,6 +451,13 @@ static inline void pwm_disable(struct pwm_device *pwm) might_sleep(); } +static inline int pwm_capture(struct pwm_device *pwm, + struct pwm_capture *result, + unsigned long timeout) +{ + return -EINVAL; +} + static inline int pwm_set_chip_data(struct pwm_device *pwm, void *data) { return -EINVAL; -- cgit v1.2.3 From 724ef01569519d1e7a95231688b6a5f8eaba29f2 Mon Sep 17 00:00:00 2001 From: Mario Kicherer Date: Thu, 26 Jan 2023 15:40:50 +0100 Subject: mtd: spinand: Add support for AllianceMemory AS5F34G04SND Add support for AllianceMemory AS5F34G04SND SPI NAND flash Datasheet: - https://www.alliancememory.com/wp-content/uploads/pdf/flash/AllianceMemory_SPI_NAND_Flash_July2020_Rev1.0.pdf Signed-off-by: Mario Kicherer Reviewed-by: Dhruva Gole Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230126144050.2656358-1-dev@kicherer.org --- include/linux/mtd/spinand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 6d3392a7edc6..01be9f0f008a 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -260,6 +260,7 @@ struct spinand_manufacturer { }; /* SPI NAND manufacturers */ +extern const struct spinand_manufacturer alliancememory_spinand_manufacturer; extern const struct spinand_manufacturer ato_spinand_manufacturer; extern const struct spinand_manufacturer gigadevice_spinand_manufacturer; extern const struct spinand_manufacturer macronix_spinand_manufacturer; -- cgit v1.2.3 From 246a1c4c6b7ffba88a2553d2b88f7b6280f253a2 Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Mon, 23 Jan 2023 20:09:54 +0100 Subject: ata: pata_parport: add driver (PARIDE replacement) The pata_parport is a libata-based replacement of the old PARIDE subsystem - driver for parallel port IDE devices. It uses the original paride low-level protocol drivers but does not need the high-level drivers (pd, pcd, pf, pt, pg). The IDE devices behind parallel port adapters are handled by the ATA layer. This will allow paride and its high-level drivers to be removed. Unfortunately, libata drivers cannot sleep so pata_parport claims parport before activating the ata host and keeps it claimed (and protocol connected) until the ata host is removed. This means that no devices can be chained (neither other pata_parport devices nor a printer). paride and pata_parport are mutually exclusive because the compiled protocol drivers are incompatible. Tested with: - Imation SuperDisk LS-120 and HP C4381A (EPAT) - Freecom Parallel CD (FRPW) - Toshiba Mobile CD-RW 2793008 w/Freecom Parallel Cable rev.903 (FRIQ) - Backpack CD-RW 222011 and CD-RW 19350 (BPCK6) The following bugs in low-level protocol drivers were found and will be fixed later: Note: EPP-32 mode is buggy in EPAT - and also in all other protocol drivers - they don't handle non-multiple-of-4 block transfers correctly. This causes problems with LS-120 drive. There is also another bug in EPAT: EPP modes don't work unless a 4-bit or 8-bit mode is used first (probably some initialization missing?). Once the device is initialized, EPP works until power cycle. So after device power on, you have to: echo "parport0 epat 0" >/sys/bus/pata_parport/new_device echo pata_parport.0 >/sys/bus/pata_parport/delete_device echo "parport0 epat 4" >/sys/bus/pata_parport/new_device (autoprobe will initialize correctly as it tries the slowest modes first but you'll get the broken EPP-32 mode) Note: EPP modes are buggy in FRPW, only modes 0 and 1 work. Signed-off-by: Ondrej Zary Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Acked-by: Jens Axboe Signed-off-by: Damien Le Moal --- include/linux/pata_parport.h | 111 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 include/linux/pata_parport.h (limited to 'include/linux') diff --git a/include/linux/pata_parport.h b/include/linux/pata_parport.h new file mode 100644 index 000000000000..58781846f282 --- /dev/null +++ b/include/linux/pata_parport.h @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * pata_parport.h (c) 1997-8 Grant R. Guenther + * Under the terms of the GPL. + * + * This file defines the interface for parallel port IDE adapter chip drivers. + */ + +#ifndef LINUX_PATA_PARPORT_H +#define LINUX_PATA_PARPORT_H + +#include + +#define PI_PCD 1 /* dummy for paride protocol modules */ + +struct pi_adapter { + struct device dev; + struct pi_protocol *proto; /* adapter protocol */ + int port; /* base address of parallel port */ + int mode; /* transfer mode in use */ + int delay; /* adapter delay setting */ + int devtype; /* dummy for paride protocol modules */ + char *device; /* dummy for paride protocol modules */ + int unit; /* unit number for chained adapters */ + int saved_r0; /* saved port state */ + int saved_r2; /* saved port state */ + unsigned long private; /* for protocol module */ + struct pardevice *pardev; /* pointer to pardevice */ +}; + +typedef struct pi_adapter PIA; /* for paride protocol modules */ + +/* registers are addressed as (cont,regr) + * cont: 0 for command register file, 1 for control register(s) + * regr: 0-7 for register number. + */ + +/* macros and functions exported to the protocol modules */ +#define delay_p (pi->delay ? udelay(pi->delay) : (void)0) +#define out_p(offs, byte) do { outb(byte, pi->port + offs); delay_p; } while (0) +#define in_p(offs) (delay_p, inb(pi->port + offs)) + +#define w0(byte) out_p(0, byte) +#define r0() in_p(0) +#define w1(byte) out_p(1, byte) +#define r1() in_p(1) +#define w2(byte) out_p(2, byte) +#define r2() in_p(2) +#define w3(byte) out_p(3, byte) +#define w4(byte) out_p(4, byte) +#define r4() in_p(4) +#define w4w(data) do { outw(data, pi->port + 4); delay_p; } while (0) +#define w4l(data) do { outl(data, pi->port + 4); delay_p; } while (0) +#define r4w() (delay_p, inw(pi->port + 4)) +#define r4l() (delay_p, inl(pi->port + 4)) + +static inline u16 pi_swab16(char *b, int k) +{ + union { u16 u; char t[2]; } r; + + r.t[0] = b[2 * k + 1]; r.t[1] = b[2 * k]; + return r.u; +} + +static inline u32 pi_swab32(char *b, int k) +{ + union { u32 u; char f[4]; } r; + + r.f[0] = b[4 * k + 1]; r.f[1] = b[4 * k]; + r.f[2] = b[4 * k + 3]; r.f[3] = b[4 * k + 2]; + return r.u; +} + +struct pi_protocol { + char name[8]; + + int max_mode; + int epp_first; /* modes >= this use 8 ports */ + + int default_delay; + int max_units; /* max chained units probed for */ + + void (*write_regr)(struct pi_adapter *pi, int cont, int regr, int val); + int (*read_regr)(struct pi_adapter *pi, int cont, int regr); + void (*write_block)(struct pi_adapter *pi, char *buf, int count); + void (*read_block)(struct pi_adapter *pi, char *buf, int count); + + void (*connect)(struct pi_adapter *pi); + void (*disconnect)(struct pi_adapter *pi); + + int (*test_port)(struct pi_adapter *pi); + int (*probe_unit)(struct pi_adapter *pi); + int (*test_proto)(struct pi_adapter *pi, char *scratch, int verbose); + void (*log_adapter)(struct pi_adapter *pi, char *scratch, int verbose); + + int (*init_proto)(struct pi_adapter *pi); + void (*release_proto)(struct pi_adapter *pi); + struct module *owner; + struct device_driver driver; + struct scsi_host_template sht; +}; + +#define PATA_PARPORT_SHT ATA_PIO_SHT + +int pata_parport_register_driver(struct pi_protocol *pr); +void pata_parport_unregister_driver(struct pi_protocol *pr); +/* defines for old paride protocol modules */ +#define paride_register pata_parport_register_driver +#define paride_unregister pata_parport_unregister_driver + +#endif /* LINUX_PATA_PARPORT_H */ -- cgit v1.2.3 From e1d5d71d189f290343fb1f18eecf77335c5d1ef3 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Fri, 27 Jan 2023 00:14:53 +0100 Subject: usb: uvc: move media/v4l2-uvc.h to usb/uvc.h Since the headerfile is only used in usb devices it is better placed with the other usb files. Reviewed-by: Laurent Pinchart Reviewed-by: Daniel Scally Tested-by: Daniel Scally Signed-off-by: Michael Grzeschik Link: https://lore.kernel.org/r/20230126231456.3402323-3-m.grzeschik@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/uvc.h | 367 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 367 insertions(+) create mode 100644 include/linux/usb/uvc.h (limited to 'include/linux') diff --git a/include/linux/usb/uvc.h b/include/linux/usb/uvc.h new file mode 100644 index 000000000000..b010a36fc1d9 --- /dev/null +++ b/include/linux/usb/uvc.h @@ -0,0 +1,367 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * v4l2 uvc internal API header + * + * Some commonly needed functions for uvc drivers + */ + +#ifndef __LINUX_V4L2_UVC_H +#define __LINUX_V4L2_UVC_H + +/* ------------------------------------------------------------------------ + * GUIDs + */ +#define UVC_GUID_UVC_CAMERA \ + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01} +#define UVC_GUID_UVC_OUTPUT \ + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02} +#define UVC_GUID_UVC_MEDIA_TRANSPORT_INPUT \ + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03} +#define UVC_GUID_UVC_PROCESSING \ + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01} +#define UVC_GUID_UVC_SELECTOR \ + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02} +#define UVC_GUID_EXT_GPIO_CONTROLLER \ + {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x03} + +#define UVC_GUID_FORMAT_MJPEG \ + { 'M', 'J', 'P', 'G', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_YUY2 \ + { 'Y', 'U', 'Y', '2', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_YUY2_ISIGHT \ + { 'Y', 'U', 'Y', '2', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0x00, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_NV12 \ + { 'N', 'V', '1', '2', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_YV12 \ + { 'Y', 'V', '1', '2', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_I420 \ + { 'I', '4', '2', '0', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_UYVY \ + { 'U', 'Y', 'V', 'Y', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_Y800 \ + { 'Y', '8', '0', '0', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_Y8 \ + { 'Y', '8', ' ', ' ', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_Y10 \ + { 'Y', '1', '0', ' ', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_Y12 \ + { 'Y', '1', '2', ' ', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_Y16 \ + { 'Y', '1', '6', ' ', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_BY8 \ + { 'B', 'Y', '8', ' ', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_BA81 \ + { 'B', 'A', '8', '1', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_GBRG \ + { 'G', 'B', 'R', 'G', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_GRBG \ + { 'G', 'R', 'B', 'G', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_RGGB \ + { 'R', 'G', 'G', 'B', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_BG16 \ + { 'B', 'G', '1', '6', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_GB16 \ + { 'G', 'B', '1', '6', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_RG16 \ + { 'R', 'G', '1', '6', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_GR16 \ + { 'G', 'R', '1', '6', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_RGBP \ + { 'R', 'G', 'B', 'P', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_BGR3 \ + { 0x7d, 0xeb, 0x36, 0xe4, 0x4f, 0x52, 0xce, 0x11, \ + 0x9f, 0x53, 0x00, 0x20, 0xaf, 0x0b, 0xa7, 0x70} +#define UVC_GUID_FORMAT_BGR4 \ + { 0x7e, 0xeb, 0x36, 0xe4, 0x4f, 0x52, 0xce, 0x11, \ + 0x9f, 0x53, 0x00, 0x20, 0xaf, 0x0b, 0xa7, 0x70} +#define UVC_GUID_FORMAT_M420 \ + { 'M', '4', '2', '0', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} + +#define UVC_GUID_FORMAT_H264 \ + { 'H', '2', '6', '4', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_H265 \ + { 'H', '2', '6', '5', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_Y8I \ + { 'Y', '8', 'I', ' ', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_Y12I \ + { 'Y', '1', '2', 'I', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_Z16 \ + { 'Z', '1', '6', ' ', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_RW10 \ + { 'R', 'W', '1', '0', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_INVZ \ + { 'I', 'N', 'V', 'Z', 0x90, 0x2d, 0x58, 0x4a, \ + 0x92, 0x0b, 0x77, 0x3f, 0x1f, 0x2c, 0x55, 0x6b} +#define UVC_GUID_FORMAT_INZI \ + { 'I', 'N', 'Z', 'I', 0x66, 0x1a, 0x42, 0xa2, \ + 0x90, 0x65, 0xd0, 0x18, 0x14, 0xa8, 0xef, 0x8a} +#define UVC_GUID_FORMAT_INVI \ + { 'I', 'N', 'V', 'I', 0xdb, 0x57, 0x49, 0x5e, \ + 0x8e, 0x3f, 0xf4, 0x79, 0x53, 0x2b, 0x94, 0x6f} +#define UVC_GUID_FORMAT_CNF4 \ + { 'C', ' ', ' ', ' ', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} + +#define UVC_GUID_FORMAT_D3DFMT_L8 \ + {0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} +#define UVC_GUID_FORMAT_KSMEDIA_L8_IR \ + {0x32, 0x00, 0x00, 0x00, 0x02, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} + +#define UVC_GUID_FORMAT_HEVC \ + { 'H', 'E', 'V', 'C', 0x00, 0x00, 0x10, 0x00, \ + 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} + +/* ------------------------------------------------------------------------ + * Video formats + */ + +struct uvc_format_desc { + char *name; + u8 guid[16]; + u32 fcc; +}; + +static struct uvc_format_desc uvc_fmts[] = { + { + .name = "YUV 4:2:2 (YUYV)", + .guid = UVC_GUID_FORMAT_YUY2, + .fcc = V4L2_PIX_FMT_YUYV, + }, + { + .name = "YUV 4:2:2 (YUYV)", + .guid = UVC_GUID_FORMAT_YUY2_ISIGHT, + .fcc = V4L2_PIX_FMT_YUYV, + }, + { + .name = "YUV 4:2:0 (NV12)", + .guid = UVC_GUID_FORMAT_NV12, + .fcc = V4L2_PIX_FMT_NV12, + }, + { + .name = "MJPEG", + .guid = UVC_GUID_FORMAT_MJPEG, + .fcc = V4L2_PIX_FMT_MJPEG, + }, + { + .name = "YVU 4:2:0 (YV12)", + .guid = UVC_GUID_FORMAT_YV12, + .fcc = V4L2_PIX_FMT_YVU420, + }, + { + .name = "YUV 4:2:0 (I420)", + .guid = UVC_GUID_FORMAT_I420, + .fcc = V4L2_PIX_FMT_YUV420, + }, + { + .name = "YUV 4:2:0 (M420)", + .guid = UVC_GUID_FORMAT_M420, + .fcc = V4L2_PIX_FMT_M420, + }, + { + .name = "YUV 4:2:2 (UYVY)", + .guid = UVC_GUID_FORMAT_UYVY, + .fcc = V4L2_PIX_FMT_UYVY, + }, + { + .name = "Greyscale 8-bit (Y800)", + .guid = UVC_GUID_FORMAT_Y800, + .fcc = V4L2_PIX_FMT_GREY, + }, + { + .name = "Greyscale 8-bit (Y8 )", + .guid = UVC_GUID_FORMAT_Y8, + .fcc = V4L2_PIX_FMT_GREY, + }, + { + .name = "Greyscale 8-bit (D3DFMT_L8)", + .guid = UVC_GUID_FORMAT_D3DFMT_L8, + .fcc = V4L2_PIX_FMT_GREY, + }, + { + .name = "IR 8-bit (L8_IR)", + .guid = UVC_GUID_FORMAT_KSMEDIA_L8_IR, + .fcc = V4L2_PIX_FMT_GREY, + }, + { + .name = "Greyscale 10-bit (Y10 )", + .guid = UVC_GUID_FORMAT_Y10, + .fcc = V4L2_PIX_FMT_Y10, + }, + { + .name = "Greyscale 12-bit (Y12 )", + .guid = UVC_GUID_FORMAT_Y12, + .fcc = V4L2_PIX_FMT_Y12, + }, + { + .name = "Greyscale 16-bit (Y16 )", + .guid = UVC_GUID_FORMAT_Y16, + .fcc = V4L2_PIX_FMT_Y16, + }, + { + .name = "BGGR Bayer (BY8 )", + .guid = UVC_GUID_FORMAT_BY8, + .fcc = V4L2_PIX_FMT_SBGGR8, + }, + { + .name = "BGGR Bayer (BA81)", + .guid = UVC_GUID_FORMAT_BA81, + .fcc = V4L2_PIX_FMT_SBGGR8, + }, + { + .name = "GBRG Bayer (GBRG)", + .guid = UVC_GUID_FORMAT_GBRG, + .fcc = V4L2_PIX_FMT_SGBRG8, + }, + { + .name = "GRBG Bayer (GRBG)", + .guid = UVC_GUID_FORMAT_GRBG, + .fcc = V4L2_PIX_FMT_SGRBG8, + }, + { + .name = "RGGB Bayer (RGGB)", + .guid = UVC_GUID_FORMAT_RGGB, + .fcc = V4L2_PIX_FMT_SRGGB8, + }, + { + .name = "RGB565", + .guid = UVC_GUID_FORMAT_RGBP, + .fcc = V4L2_PIX_FMT_RGB565, + }, + { + .name = "BGR 8:8:8 (BGR3)", + .guid = UVC_GUID_FORMAT_BGR3, + .fcc = V4L2_PIX_FMT_BGR24, + }, + { + .name = "BGRA/X 8:8:8:8 (BGR4)", + .guid = UVC_GUID_FORMAT_BGR4, + .fcc = V4L2_PIX_FMT_XBGR32, + }, + { + .name = "H.264", + .guid = UVC_GUID_FORMAT_H264, + .fcc = V4L2_PIX_FMT_H264, + }, + { + .name = "H.265", + .guid = UVC_GUID_FORMAT_H265, + .fcc = V4L2_PIX_FMT_HEVC, + }, + { + .name = "Greyscale 8 L/R (Y8I)", + .guid = UVC_GUID_FORMAT_Y8I, + .fcc = V4L2_PIX_FMT_Y8I, + }, + { + .name = "Greyscale 12 L/R (Y12I)", + .guid = UVC_GUID_FORMAT_Y12I, + .fcc = V4L2_PIX_FMT_Y12I, + }, + { + .name = "Depth data 16-bit (Z16)", + .guid = UVC_GUID_FORMAT_Z16, + .fcc = V4L2_PIX_FMT_Z16, + }, + { + .name = "Bayer 10-bit (SRGGB10P)", + .guid = UVC_GUID_FORMAT_RW10, + .fcc = V4L2_PIX_FMT_SRGGB10P, + }, + { + .name = "Bayer 16-bit (SBGGR16)", + .guid = UVC_GUID_FORMAT_BG16, + .fcc = V4L2_PIX_FMT_SBGGR16, + }, + { + .name = "Bayer 16-bit (SGBRG16)", + .guid = UVC_GUID_FORMAT_GB16, + .fcc = V4L2_PIX_FMT_SGBRG16, + }, + { + .name = "Bayer 16-bit (SRGGB16)", + .guid = UVC_GUID_FORMAT_RG16, + .fcc = V4L2_PIX_FMT_SRGGB16, + }, + { + .name = "Bayer 16-bit (SGRBG16)", + .guid = UVC_GUID_FORMAT_GR16, + .fcc = V4L2_PIX_FMT_SGRBG16, + }, + { + .name = "Depth data 16-bit (Z16)", + .guid = UVC_GUID_FORMAT_INVZ, + .fcc = V4L2_PIX_FMT_Z16, + }, + { + .name = "Greyscale 10-bit (Y10 )", + .guid = UVC_GUID_FORMAT_INVI, + .fcc = V4L2_PIX_FMT_Y10, + }, + { + .name = "IR:Depth 26-bit (INZI)", + .guid = UVC_GUID_FORMAT_INZI, + .fcc = V4L2_PIX_FMT_INZI, + }, + { + .name = "4-bit Depth Confidence (Packed)", + .guid = UVC_GUID_FORMAT_CNF4, + .fcc = V4L2_PIX_FMT_CNF4, + }, + { + .name = "HEVC", + .guid = UVC_GUID_FORMAT_HEVC, + .fcc = V4L2_PIX_FMT_HEVC, + }, +}; + +static inline struct uvc_format_desc *uvc_format_by_guid(const u8 guid[16]) +{ + unsigned int len = ARRAY_SIZE(uvc_fmts); + unsigned int i; + + for (i = 0; i < len; ++i) { + if (memcmp(guid, uvc_fmts[i].guid, 16) == 0) + return &uvc_fmts[i]; + } + + return NULL; +} + +#endif /* __LINUX_V4L2_UVC_H */ -- cgit v1.2.3 From 466be4c9a6f0b7810991b4ac6c3e55345ea63954 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Fri, 27 Jan 2023 00:14:54 +0100 Subject: usb: uvc: move uvc_fmts and uvc_format_by_guid to own compile unit The media driver USB_VIDEO_CLASS and USB_F_UVC are using the same function uvc_format_by_guid. Since the function is inline, every user will get a copy of the used uvc_fmts array and the function. This patch moves the code to an own compile unit and add this dependency as UVC_COMMON to both users. Reviewed-by: Laurent Pinchart Reviewed-by: Daniel Scally Tested-by: Daniel Scally Signed-off-by: Michael Grzeschik Link: https://lore.kernel.org/r/20230126231456.3402323-4-m.grzeschik@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/uvc.h | 210 +----------------------------------------------- 1 file changed, 1 insertion(+), 209 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/uvc.h b/include/linux/usb/uvc.h index b010a36fc1d9..8cebb46602a1 100644 --- a/include/linux/usb/uvc.h +++ b/include/linux/usb/uvc.h @@ -148,220 +148,12 @@ { 'H', 'E', 'V', 'C', 0x00, 0x00, 0x10, 0x00, \ 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} -/* ------------------------------------------------------------------------ - * Video formats - */ - struct uvc_format_desc { char *name; u8 guid[16]; u32 fcc; }; -static struct uvc_format_desc uvc_fmts[] = { - { - .name = "YUV 4:2:2 (YUYV)", - .guid = UVC_GUID_FORMAT_YUY2, - .fcc = V4L2_PIX_FMT_YUYV, - }, - { - .name = "YUV 4:2:2 (YUYV)", - .guid = UVC_GUID_FORMAT_YUY2_ISIGHT, - .fcc = V4L2_PIX_FMT_YUYV, - }, - { - .name = "YUV 4:2:0 (NV12)", - .guid = UVC_GUID_FORMAT_NV12, - .fcc = V4L2_PIX_FMT_NV12, - }, - { - .name = "MJPEG", - .guid = UVC_GUID_FORMAT_MJPEG, - .fcc = V4L2_PIX_FMT_MJPEG, - }, - { - .name = "YVU 4:2:0 (YV12)", - .guid = UVC_GUID_FORMAT_YV12, - .fcc = V4L2_PIX_FMT_YVU420, - }, - { - .name = "YUV 4:2:0 (I420)", - .guid = UVC_GUID_FORMAT_I420, - .fcc = V4L2_PIX_FMT_YUV420, - }, - { - .name = "YUV 4:2:0 (M420)", - .guid = UVC_GUID_FORMAT_M420, - .fcc = V4L2_PIX_FMT_M420, - }, - { - .name = "YUV 4:2:2 (UYVY)", - .guid = UVC_GUID_FORMAT_UYVY, - .fcc = V4L2_PIX_FMT_UYVY, - }, - { - .name = "Greyscale 8-bit (Y800)", - .guid = UVC_GUID_FORMAT_Y800, - .fcc = V4L2_PIX_FMT_GREY, - }, - { - .name = "Greyscale 8-bit (Y8 )", - .guid = UVC_GUID_FORMAT_Y8, - .fcc = V4L2_PIX_FMT_GREY, - }, - { - .name = "Greyscale 8-bit (D3DFMT_L8)", - .guid = UVC_GUID_FORMAT_D3DFMT_L8, - .fcc = V4L2_PIX_FMT_GREY, - }, - { - .name = "IR 8-bit (L8_IR)", - .guid = UVC_GUID_FORMAT_KSMEDIA_L8_IR, - .fcc = V4L2_PIX_FMT_GREY, - }, - { - .name = "Greyscale 10-bit (Y10 )", - .guid = UVC_GUID_FORMAT_Y10, - .fcc = V4L2_PIX_FMT_Y10, - }, - { - .name = "Greyscale 12-bit (Y12 )", - .guid = UVC_GUID_FORMAT_Y12, - .fcc = V4L2_PIX_FMT_Y12, - }, - { - .name = "Greyscale 16-bit (Y16 )", - .guid = UVC_GUID_FORMAT_Y16, - .fcc = V4L2_PIX_FMT_Y16, - }, - { - .name = "BGGR Bayer (BY8 )", - .guid = UVC_GUID_FORMAT_BY8, - .fcc = V4L2_PIX_FMT_SBGGR8, - }, - { - .name = "BGGR Bayer (BA81)", - .guid = UVC_GUID_FORMAT_BA81, - .fcc = V4L2_PIX_FMT_SBGGR8, - }, - { - .name = "GBRG Bayer (GBRG)", - .guid = UVC_GUID_FORMAT_GBRG, - .fcc = V4L2_PIX_FMT_SGBRG8, - }, - { - .name = "GRBG Bayer (GRBG)", - .guid = UVC_GUID_FORMAT_GRBG, - .fcc = V4L2_PIX_FMT_SGRBG8, - }, - { - .name = "RGGB Bayer (RGGB)", - .guid = UVC_GUID_FORMAT_RGGB, - .fcc = V4L2_PIX_FMT_SRGGB8, - }, - { - .name = "RGB565", - .guid = UVC_GUID_FORMAT_RGBP, - .fcc = V4L2_PIX_FMT_RGB565, - }, - { - .name = "BGR 8:8:8 (BGR3)", - .guid = UVC_GUID_FORMAT_BGR3, - .fcc = V4L2_PIX_FMT_BGR24, - }, - { - .name = "BGRA/X 8:8:8:8 (BGR4)", - .guid = UVC_GUID_FORMAT_BGR4, - .fcc = V4L2_PIX_FMT_XBGR32, - }, - { - .name = "H.264", - .guid = UVC_GUID_FORMAT_H264, - .fcc = V4L2_PIX_FMT_H264, - }, - { - .name = "H.265", - .guid = UVC_GUID_FORMAT_H265, - .fcc = V4L2_PIX_FMT_HEVC, - }, - { - .name = "Greyscale 8 L/R (Y8I)", - .guid = UVC_GUID_FORMAT_Y8I, - .fcc = V4L2_PIX_FMT_Y8I, - }, - { - .name = "Greyscale 12 L/R (Y12I)", - .guid = UVC_GUID_FORMAT_Y12I, - .fcc = V4L2_PIX_FMT_Y12I, - }, - { - .name = "Depth data 16-bit (Z16)", - .guid = UVC_GUID_FORMAT_Z16, - .fcc = V4L2_PIX_FMT_Z16, - }, - { - .name = "Bayer 10-bit (SRGGB10P)", - .guid = UVC_GUID_FORMAT_RW10, - .fcc = V4L2_PIX_FMT_SRGGB10P, - }, - { - .name = "Bayer 16-bit (SBGGR16)", - .guid = UVC_GUID_FORMAT_BG16, - .fcc = V4L2_PIX_FMT_SBGGR16, - }, - { - .name = "Bayer 16-bit (SGBRG16)", - .guid = UVC_GUID_FORMAT_GB16, - .fcc = V4L2_PIX_FMT_SGBRG16, - }, - { - .name = "Bayer 16-bit (SRGGB16)", - .guid = UVC_GUID_FORMAT_RG16, - .fcc = V4L2_PIX_FMT_SRGGB16, - }, - { - .name = "Bayer 16-bit (SGRBG16)", - .guid = UVC_GUID_FORMAT_GR16, - .fcc = V4L2_PIX_FMT_SGRBG16, - }, - { - .name = "Depth data 16-bit (Z16)", - .guid = UVC_GUID_FORMAT_INVZ, - .fcc = V4L2_PIX_FMT_Z16, - }, - { - .name = "Greyscale 10-bit (Y10 )", - .guid = UVC_GUID_FORMAT_INVI, - .fcc = V4L2_PIX_FMT_Y10, - }, - { - .name = "IR:Depth 26-bit (INZI)", - .guid = UVC_GUID_FORMAT_INZI, - .fcc = V4L2_PIX_FMT_INZI, - }, - { - .name = "4-bit Depth Confidence (Packed)", - .guid = UVC_GUID_FORMAT_CNF4, - .fcc = V4L2_PIX_FMT_CNF4, - }, - { - .name = "HEVC", - .guid = UVC_GUID_FORMAT_HEVC, - .fcc = V4L2_PIX_FMT_HEVC, - }, -}; - -static inline struct uvc_format_desc *uvc_format_by_guid(const u8 guid[16]) -{ - unsigned int len = ARRAY_SIZE(uvc_fmts); - unsigned int i; - - for (i = 0; i < len; ++i) { - if (memcmp(guid, uvc_fmts[i].guid, 16) == 0) - return &uvc_fmts[i]; - } - - return NULL; -} +struct uvc_format_desc *uvc_format_by_guid(const u8 guid[16]); #endif /* __LINUX_V4L2_UVC_H */ -- cgit v1.2.3 From 8ecb17a86c0fbb86ea9fb4fa26e742600e945794 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Fri, 27 Jan 2023 00:14:55 +0100 Subject: usb: uvc: make uvc_format_desc table const Since the uvc_fmts array can not be modified we declare it const and change every user of the uvc_format_by_guid function aswell. Reviewed-by: Laurent Pinchart Reviewed-by: Daniel Scally Tested-by: Daniel Scally Signed-off-by: Michael Grzeschik Link: https://lore.kernel.org/r/20230126231456.3402323-5-m.grzeschik@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/uvc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/uvc.h b/include/linux/usb/uvc.h index 8cebb46602a1..b0210c5c5406 100644 --- a/include/linux/usb/uvc.h +++ b/include/linux/usb/uvc.h @@ -154,6 +154,6 @@ struct uvc_format_desc { u32 fcc; }; -struct uvc_format_desc *uvc_format_by_guid(const u8 guid[16]); +const struct uvc_format_desc *uvc_format_by_guid(const u8 guid[16]); #endif /* __LINUX_V4L2_UVC_H */ -- cgit v1.2.3 From 2d83eb5d24e1c8dba386928fcbf76d3b581a632d Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Fri, 27 Jan 2023 00:14:56 +0100 Subject: usb: uvc: use v4l2_fill_fmtdesc instead of open coded format name Since v4l2_fill_fmtdesc will be called in the ioctl v4l_enum_fmt anyway. We can set the format description and compressed flag from v4l_fill_fmtdesc and can remove the extra name field in uvc_format_desc. Reviewed-by: Daniel Scally Tested-by: Daniel Scally Signed-off-by: Michael Grzeschik Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/20230126231456.3402323-6-m.grzeschik@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/uvc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/uvc.h b/include/linux/usb/uvc.h index b0210c5c5406..88d96095bcb1 100644 --- a/include/linux/usb/uvc.h +++ b/include/linux/usb/uvc.h @@ -149,7 +149,6 @@ 0x80, 0x00, 0x00, 0xaa, 0x00, 0x38, 0x9b, 0x71} struct uvc_format_desc { - char *name; u8 guid[16]; u32 fcc; }; -- cgit v1.2.3 From 2bf40502badf9bc15a487244dd23ce0b08c306c0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 25 Jan 2023 16:34:25 +0200 Subject: usb: gadget: Use correct APIs and data types for UUID handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have two types for UUIDs depending on the byte ordering. Instead of explaining how bytes should go over the wire, use dedicated APIs and data types. This removes a confusion over the byte ordering. Signed-off-by: Andy Shevchenko Tested-By: Jó Ágila Bitsch Link: https://lore.kernel.org/r/20230125143425.85268-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/webusb.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/webusb.h b/include/linux/usb/webusb.h index b430d84357f3..fe43020b4a48 100644 --- a/include/linux/usb/webusb.h +++ b/include/linux/usb/webusb.h @@ -11,15 +11,12 @@ #include "uapi/linux/usb/ch9.h" /* - * little endian PlatformCapablityUUID for WebUSB + * Little Endian PlatformCapablityUUID for WebUSB * 3408b638-09a9-47a0-8bfd-a0768815b665 - * to identify Platform Device Capability descriptors as referring to WebUSB - * - * the UUID above MUST be sent over the wire as the byte sequence: - * {0x38, 0xB6, 0x08, 0x34, 0xA9, 0x09, 0xA0, 0x47, 0x8B, 0xFD, 0xA0, 0x76, 0x88, 0x15, 0xB6, 0x65}. + * to identify Platform Device Capability descriptors as referring to WebUSB. */ #define WEBUSB_UUID \ - UUID_INIT(0x38b60834, 0xa909, 0xa047, 0x8b, 0xfd, 0xa0, 0x76, 0x88, 0x15, 0xb6, 0x65) + GUID_INIT(0x3408b638, 0x09a9, 0x47a0, 0x8b, 0xfd, 0xa0, 0x76, 0x88, 0x15, 0xb6, 0x65) /* * WebUSB Platform Capability data -- cgit v1.2.3 From 40eaa8c0cbba4a27668c7e01373ccd5b38381e2f Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Mon, 23 Jan 2023 15:53:54 +0900 Subject: soc: apple: rtkit: Add apple_rtkit_idle() function This is yet another low power mode, used by DCP. Reviewed-by: Eric Curtin Reviewed-by: Sven Peter Signed-off-by: Hector Martin --- include/linux/soc/apple/rtkit.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/apple/rtkit.h b/include/linux/soc/apple/rtkit.h index c9cabb679cd1..a2446b45ed7f 100644 --- a/include/linux/soc/apple/rtkit.h +++ b/include/linux/soc/apple/rtkit.h @@ -104,6 +104,11 @@ int apple_rtkit_wake(struct apple_rtkit *rtk); */ int apple_rtkit_shutdown(struct apple_rtkit *rtk); +/* + * Put the co-processor into idle mode + */ +int apple_rtkit_idle(struct apple_rtkit *rtk); + /* * Checks if RTKit is running and ready to handle messages. */ -- cgit v1.2.3 From 4435d63f172851b57a6a0943b73a6b8055a9356f Mon Sep 17 00:00:00 2001 From: Asahi Lina Date: Sat, 21 Jan 2023 16:42:53 +0900 Subject: soc: apple: rtkit: Add a private pointer to apple_rtkit_shmem This allows downstream consumers to keep track of private data for shmem mappings. In particular, the Rust abstraction will use this to safely drop data associated with a mapping when it is unmapped. Signed-off-by: Asahi Lina Reviewed-by: Sven Peter Reviewed-by: Eric Curtin Signed-off-by: Hector Martin --- include/linux/soc/apple/rtkit.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/apple/rtkit.h b/include/linux/soc/apple/rtkit.h index a2446b45ed7f..927b214ef1c6 100644 --- a/include/linux/soc/apple/rtkit.h +++ b/include/linux/soc/apple/rtkit.h @@ -22,6 +22,7 @@ * @size: Size of the shared memory buffer. * @iova: Device VA of shared memory buffer. * @is_mapped: Shared memory buffer is managed by the co-processor. + * @private: Private data pointer for the parent driver. */ struct apple_rtkit_shmem { @@ -30,6 +31,7 @@ struct apple_rtkit_shmem { size_t size; dma_addr_t iova; bool is_mapped; + void *private; }; /* -- cgit v1.2.3 From b3892860f50920ea46342d32bf51323877365082 Mon Sep 17 00:00:00 2001 From: Asahi Lina Date: Sat, 21 Jan 2023 16:41:35 +0900 Subject: soc: apple: rtkit: Export non-devm init/free functions While we normally encourage devm usage by drivers, some consumers (and in particular the upcoming Rust abstractions) might want to manually manage memory. Export the raw functions to make this possible. Signed-off-by: Asahi Lina Reviewed-by: Sven Peter Reviewed-by: Eric Curtin Signed-off-by: Hector Martin --- include/linux/soc/apple/rtkit.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/apple/rtkit.h b/include/linux/soc/apple/rtkit.h index 927b214ef1c6..fc456f75c131 100644 --- a/include/linux/soc/apple/rtkit.h +++ b/include/linux/soc/apple/rtkit.h @@ -79,6 +79,25 @@ struct apple_rtkit *devm_apple_rtkit_init(struct device *dev, void *cookie, const char *mbox_name, int mbox_idx, const struct apple_rtkit_ops *ops); +/* + * Non-devm version of devm_apple_rtkit_init. Must be freed with + * apple_rtkit_free. + * + * @dev: Pointer to the device node this coprocessor is assocated with + * @cookie: opaque cookie passed to all functions defined in rtkit_ops + * @mbox_name: mailbox name used to communicate with the co-processor + * @mbox_idx: mailbox index to be used if mbox_name is NULL + * @ops: pointer to rtkit_ops to be used for this co-processor + */ +struct apple_rtkit *apple_rtkit_init(struct device *dev, void *cookie, + const char *mbox_name, int mbox_idx, + const struct apple_rtkit_ops *ops); + +/* + * Free an instance of apple_rtkit. + */ +void apple_rtkit_free(struct apple_rtkit *rtk); + /* * Reinitialize internal structures. Must only be called with the co-processor * is held in reset. -- cgit v1.2.3 From 2b8e35337605ca581f777cad73d7a6ac2ccbf6ec Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 30 Jan 2023 13:17:46 +0200 Subject: container_of: Update header inclusions The commit 848dba781f19 ("container_of: remove container_of_safe()") removed the code that uses err.h. Replace the inclusion by stddef.h which provides offsetof() definition which is still in use. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230130111746.59830-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/container_of.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/container_of.h b/include/linux/container_of.h index 1d898f9158b4..713890c867be 100644 --- a/include/linux/container_of.h +++ b/include/linux/container_of.h @@ -3,7 +3,7 @@ #define _LINUX_CONTAINER_OF_H #include -#include +#include #define typeof_member(T, m) typeof(((T*)0)->m) -- cgit v1.2.3 From 3bd3bc2ada84229e74f974d4dafcaca59ae8347f Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 23 Jan 2023 16:49:49 +0000 Subject: soundwire: bus: Remove unused reset_page_addr() callback A previous patch removed unnecessary zeroing of the page registers after a paged transaction, so now the reset_page_addr callback is unused and can be removed. Signed-off-by: Richard Fitzgerald Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20230123164949.245898-3-rf@opensource.cirrus.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 3cd2a761911f..a8d74635ea0d 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -838,7 +838,6 @@ struct sdw_defer { * @override_adr: Override value read from firmware (quirk for buggy firmware) * @xfer_msg: Transfer message callback * @xfer_msg_defer: Defer version of transfer message callback - * @reset_page_addr: Reset the SCP page address registers * @set_bus_conf: Set the bus configuration * @pre_bank_switch: Callback for pre bank switch * @post_bank_switch: Callback for post bank switch @@ -854,8 +853,6 @@ struct sdw_master_ops { enum sdw_command_response (*xfer_msg_defer) (struct sdw_bus *bus, struct sdw_msg *msg, struct sdw_defer *defer); - enum sdw_command_response (*reset_page_addr) - (struct sdw_bus *bus, unsigned int dev_num); int (*set_bus_conf)(struct sdw_bus *bus, struct sdw_bus_params *params); int (*pre_bank_switch)(struct sdw_bus *bus); -- cgit v1.2.3 From 4e197ee880c24ecb63f7fe17449b3653bc64b03c Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 31 Jan 2023 09:46:39 +0100 Subject: clk: imx6ul: add ethernet refclock mux support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add ethernet refclock mux support and set it to internal clock by default. This configuration will not affect existing boards. clock tree before this patch: fec1 <- enet1_ref_125m (gate) <- enet1_ref (divider) <-, |- pll6_enet fec2 <- enet2_ref_125m (gate) <- enet2_ref (divider) <-´ after this patch: fec1 <- enet1_ref_sel(mux) <- enet1_ref_125m (gate) <- ... `--<> enet1_ref_pad |- pll6_enet fec2 <- enet2_ref_sel(mux) <- enet2_ref_125m (gate) <- ... `--<> enet2_ref_pad Signed-off-by: Oleksij Rempel Acked-by: Lee Jones Reviewed-by: Abel Vesa Signed-off-by: Abel Vesa Link: https://lore.kernel.org/r/20230131084642.709385-17-o.rempel@pengutronix.de --- include/linux/mfd/syscon/imx6q-iomuxc-gpr.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h index d4b5e527a7a3..09c6b3184bb0 100644 --- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h +++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h @@ -451,8 +451,10 @@ #define IMX6SX_GPR12_PCIE_RX_EQ_2 (0x2 << 0) /* For imx6ul iomux gpr register field define */ -#define IMX6UL_GPR1_ENET1_CLK_DIR (0x1 << 17) -#define IMX6UL_GPR1_ENET2_CLK_DIR (0x1 << 18) +#define IMX6UL_GPR1_ENET2_TX_CLK_DIR BIT(18) +#define IMX6UL_GPR1_ENET1_TX_CLK_DIR BIT(17) +#define IMX6UL_GPR1_ENET2_CLK_SEL BIT(14) +#define IMX6UL_GPR1_ENET1_CLK_SEL BIT(13) #define IMX6UL_GPR1_ENET1_CLK_OUTPUT (0x1 << 17) #define IMX6UL_GPR1_ENET2_CLK_OUTPUT (0x1 << 18) #define IMX6UL_GPR1_ENET_CLK_DIR (0x3 << 17) -- cgit v1.2.3 From dd0b9619a21ef77127e6002f9cb2d254c03ceed1 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 19 Jan 2023 15:32:10 +0800 Subject: soundwire: cadence: use directly bus sdw_defer structure Copying the bus sdw_defer structure into the Cadence internals leads to using stale pointers and kernel oopses on errors. It's just simpler and safer to use the bus sdw_defer structure directly. Link: https://github.com/thesofproject/linux/issues/4056 Signed-off-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20230119073211.85979-4-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index a8d74635ea0d..9e5fc0307284 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -851,8 +851,7 @@ struct sdw_master_ops { enum sdw_command_response (*xfer_msg) (struct sdw_bus *bus, struct sdw_msg *msg); enum sdw_command_response (*xfer_msg_defer) - (struct sdw_bus *bus, struct sdw_msg *msg, - struct sdw_defer *defer); + (struct sdw_bus *bus, struct sdw_msg *msg); int (*set_bus_conf)(struct sdw_bus *bus, struct sdw_bus_params *params); int (*pre_bank_switch)(struct sdw_bus *bus); -- cgit v1.2.3 From 66f95de7c13be5e442d8ed4cf00e13f8dbdc1315 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Thu, 19 Jan 2023 15:32:11 +0800 Subject: soundwire: cadence: further simplify low-level xfer_msg_defer() callback The message pointer is already stored in the bus->defer structure, not need to pass it as an argument. Suggested-by: Ranjani Sridharan Signed-off-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20230119073211.85979-5-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 9e5fc0307284..d09a9857e1de 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -837,7 +837,8 @@ struct sdw_defer { * @read_prop: Read Master properties * @override_adr: Override value read from firmware (quirk for buggy firmware) * @xfer_msg: Transfer message callback - * @xfer_msg_defer: Defer version of transfer message callback + * @xfer_msg_defer: Defer version of transfer message callback. The message is handled with the + * bus struct @sdw_defer * @set_bus_conf: Set the bus configuration * @pre_bank_switch: Callback for pre bank switch * @post_bank_switch: Callback for post bank switch @@ -851,7 +852,7 @@ struct sdw_master_ops { enum sdw_command_response (*xfer_msg) (struct sdw_bus *bus, struct sdw_msg *msg); enum sdw_command_response (*xfer_msg_defer) - (struct sdw_bus *bus, struct sdw_msg *msg); + (struct sdw_bus *bus); int (*set_bus_conf)(struct sdw_bus *bus, struct sdw_bus_params *params); int (*pre_bank_switch)(struct sdw_bus *bus); -- cgit v1.2.3 From 5e6a51787fef20b849682d8c49ec9c2beed5c373 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 24 Jan 2023 15:38:38 +0200 Subject: uuid: Decouple guid_t and uuid_le types and respective macros The guid_t type and respective macros are being used internally only. The uuid_le has its user outside the kernel. Decouple these types and macros, and make guid_t completely internal type to the kernel. Signed-off-by: Andy Shevchenko Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230124133838.22645-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/uuid.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 8cdc0d3567cd..5be158a49e11 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -8,15 +8,25 @@ #ifndef _LINUX_UUID_H_ #define _LINUX_UUID_H_ -#include #include #define UUID_SIZE 16 +typedef struct { + __u8 b[UUID_SIZE]; +} guid_t; + typedef struct { __u8 b[UUID_SIZE]; } uuid_t; +#define GUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ +((guid_t) \ +{{ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ + (b) & 0xff, ((b) >> 8) & 0xff, \ + (c) & 0xff, ((c) >> 8) & 0xff, \ + (d0), (d1), (d2), (d3), (d4), (d5), (d6), (d7) }}) + #define UUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ ((uuid_t) \ {{ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, ((a) >> 8) & 0xff, (a) & 0xff, \ @@ -97,10 +107,12 @@ extern const u8 uuid_index[16]; int guid_parse(const char *uuid, guid_t *u); int uuid_parse(const char *uuid, uuid_t *u); -/* backwards compatibility, don't use in new code */ -static inline int uuid_le_cmp(const guid_t u1, const guid_t u2) +/* MEI UUID type, don't use anywhere else */ +#include + +static inline int uuid_le_cmp(const uuid_le u1, const uuid_le u2) { - return memcmp(&u1, &u2, sizeof(guid_t)); + return memcmp(&u1, &u2, sizeof(uuid_le)); } #endif -- cgit v1.2.3 From d931b83e62b1dd352fc326c0b1cf3be3ef19e113 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Tue, 24 Jan 2023 16:40:47 +0100 Subject: cacheinfo: Make default acpi_get_cache_info() return an error commit bd500361a937 ("ACPI: PPTT: Update acpi_find_last_cache_level() to acpi_get_cache_info()") updates the prototype of acpi_get_cache_info(). The cache 'levels' is update through a pointer and not the return value of the function. If CONFIG_ACPI_PPTT is not defined, acpi_get_cache_info() doesn't update its *levels and *split_levels parameters and returns 0. This can lead to a faulty behaviour. Make acpi_get_cache_info() return an error code if CONFIG_ACPI_PPTT is not defined. Also, In init_cache_level(), if no PPTT is present or CONFIG_ACPI_PPTT is not defined, instead of aborting if acpi_get_cache_info() returns an error code, just continue. This allows to try fetching the cache information from clidr_el1. Signed-off-by: Pierre Gondois Link: https://lore.kernel.org/r/20230124154053.355376-3-pierre.gondois@arm.com Signed-off-by: Greg Kroah-Hartman --- include/linux/cacheinfo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index dfef57077cd0..908e19d17f49 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -100,7 +100,7 @@ static inline int acpi_get_cache_info(unsigned int cpu, unsigned int *levels, unsigned int *split_levels) { - return 0; + return -ENOENT; } #else int acpi_get_cache_info(unsigned int cpu, -- cgit v1.2.3 From 4971c268b85e1c7a734a61622fc0813c86e2362e Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Tue, 31 Jan 2023 18:42:43 +0100 Subject: ima: Align ima_file_mmap() parameters with mmap_file LSM hook Commit 98de59bfe4b2f ("take calculation of final prot in security_mmap_file() into a helper") moved the code to update prot, to be the actual protections applied to the kernel, to a new helper called mmap_prot(). However, while without the helper ima_file_mmap() was getting the updated prot, with the helper ima_file_mmap() gets the original prot, which contains the protections requested by the application. A possible consequence of this change is that, if an application calls mmap() with only PROT_READ, and the kernel applies PROT_EXEC in addition, that application would have access to executable memory without having this event recorded in the IMA measurement list. This situation would occur for example if the application, before mmap(), calls the personality() system call with READ_IMPLIES_EXEC as the first argument. Align ima_file_mmap() parameters with those of the mmap_file LSM hook, so that IMA can receive both the requested prot and the final prot. Since the requested protections are stored in a new variable, and the final protections are stored in the existing variable, this effectively restores the original behavior of the MMAP_CHECK hook. Cc: stable@vger.kernel.org Fixes: 98de59bfe4b2 ("take calculation of final prot in security_mmap_file() into a helper") Signed-off-by: Roberto Sassu Reviewed-by: Stefan Berger Signed-off-by: Mimi Zohar --- include/linux/ima.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ima.h b/include/linux/ima.h index 5a0b2a285a18..d79fee67235e 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -21,7 +21,8 @@ extern int ima_file_check(struct file *file, int mask); extern void ima_post_create_tmpfile(struct user_namespace *mnt_userns, struct inode *inode); extern void ima_file_free(struct file *file); -extern int ima_file_mmap(struct file *file, unsigned long prot); +extern int ima_file_mmap(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags); extern int ima_file_mprotect(struct vm_area_struct *vma, unsigned long prot); extern int ima_load_data(enum kernel_load_data_id id, bool contents); extern int ima_post_load_data(char *buf, loff_t size, @@ -76,7 +77,8 @@ static inline void ima_file_free(struct file *file) return; } -static inline int ima_file_mmap(struct file *file, unsigned long prot) +static inline int ima_file_mmap(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags) { return 0; } -- cgit v1.2.3 From c1085957dece02bda586cbffc1328f3bca09325f Mon Sep 17 00:00:00 2001 From: Yangtao Li Date: Thu, 12 Jan 2023 21:34:43 +0800 Subject: f2fs: clarify compress level bit offset commit 3fde13f817e2 ("f2fs: compress: support compress level") introduce compress level, which macro(COMPRESS_LEVEL_OFFSET) is 8, But use wrong comment about compress level. Let's fix it. Signed-off-by: Yangtao Li Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index ee0d75d9a302..1701f25117ea 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -315,7 +315,7 @@ struct f2fs_inode { __u8 i_log_cluster_size; /* log of cluster size */ __le16 i_compress_flag; /* compress flag */ /* 0 bit: chksum flag - * [10,15] bits: compress level + * [8,15] bits: compress level */ __le32 i_extra_end[0]; /* for attribute size calculation */ } __packed; -- cgit v1.2.3 From 8b3517f88ff2983f52698893519227c10aac90b2 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 1 Feb 2023 12:30:18 +0800 Subject: PCI: loongson: Prevent LS7A MRRS increases Except for isochronous-configured devices, software may set Max_Read_Request_Size (MRRS) to any value up to 4096. If a device issues a read request with size greater than the completer's Max_Payload_Size (MPS), the completer is required to break the response into multiple completions. Instead of correctly responding with multiple completions to a large read request, some LS7A Root Ports respond with a Completer Abort. To prevent this, the MRRS must be limited to an implementation-specific value. The OS cannot detect that value, so rely on BIOS to configure MRRS before booting, and quirk the Root Ports so we never set an MRRS larger than that BIOS value for any downstream device. N.B. Hot-added devices are not configured by BIOS, and they power up with MRRS = 512 bytes, so these devices will be limited to 512 bytes. If the LS7A limit is smaller, those hot-added devices may not work correctly, but per [1], hotplug is not supported with this chipset revision. [1] https://lore.kernel.org/r/073638a7-ae68-2847-ac3d-29e5e760d6af@loongson.cn [bhelgaas: commit log] Link: https://bugzilla.kernel.org/show_bug.cgi?id=216884 Link: https://lore.kernel.org/r/20230201043018.778499-3-chenhuacai@loongson.cn Signed-off-by: Huacai Chen Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index adffd65e84b4..3df2049ec4a8 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -572,6 +572,7 @@ struct pci_host_bridge { void *release_data; unsigned int ignore_reset_delay:1; /* For entire hierarchy */ unsigned int no_ext_tags:1; /* No Extended Tags */ + unsigned int no_inc_mrrs:1; /* No Increase MRRS */ unsigned int native_aer:1; /* OS may use PCIe AER */ unsigned int native_pcie_hotplug:1; /* OS may use PCIe hotplug */ unsigned int native_shpc_hotplug:1; /* OS may use SHPC hotplug */ -- cgit v1.2.3 From 37e98d9bedb50644654fd196e38acad49903fadc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 1 Feb 2023 09:33:49 +0100 Subject: driver core: bus: move lock_class_key into dynamic structure Move the lock_class_key structure out of struct bus_type and into the dynamic structure we create already for all bus_types registered with the kernel. This saves on static space and removes one more writable field in struct bus_type. In the future, the same field can be moved out of the struct class logic because it shares this same private structure. Most everyone will never notice this change, as lockdep is not enabled in real systems so no memory or logic changes are happening for them. Cc: "Rafael J. Wysocki" Acked-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20230201083349.4038660-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 87e4d029c915..e3094db1e9fa 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -112,7 +112,6 @@ struct bus_type { const struct iommu_ops *iommu_ops; struct subsys_private *p; - struct lock_class_key lock_key; bool need_parent_lock; }; -- cgit v1.2.3 From de82f60f9c86b72635ce49f7ab822e6a00a90dca Mon Sep 17 00:00:00 2001 From: Michael Bottini Date: Thu, 19 Jan 2023 19:15:19 -0800 Subject: PCI/ASPM: Add pci_enable_link_state() Add pci_enable_link_state() to allow devices to change the default BIOS configured states. Clears the BIOS default settings then sets the new states and reconfigures the link under the semaphore. Also add PCIE_LINK_STATE_ALL macro for convenience for callers that want to enable all link states. Link: https://lore.kernel.org/r/20230120031522.2304439-2-david.e.box@linux.intel.com Signed-off-by: Michael Bottini Signed-off-by: David E. Box Signed-off-by: Lorenzo Pieralisi Reviewed-by: Kuppuswamy Sathyanarayanan Acked-by: Bjorn Helgaas --- include/linux/pci.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index adffd65e84b4..ea601e6fbbda 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1685,10 +1685,15 @@ extern bool pcie_ports_native; #define PCIE_LINK_STATE_L1_2 BIT(4) #define PCIE_LINK_STATE_L1_1_PCIPM BIT(5) #define PCIE_LINK_STATE_L1_2_PCIPM BIT(6) +#define PCIE_LINK_STATE_ALL (PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1 |\ + PCIE_LINK_STATE_CLKPM | PCIE_LINK_STATE_L1_1 |\ + PCIE_LINK_STATE_L1_2 | PCIE_LINK_STATE_L1_1_PCIPM |\ + PCIE_LINK_STATE_L1_2_PCIPM) #ifdef CONFIG_PCIEASPM int pci_disable_link_state(struct pci_dev *pdev, int state); int pci_disable_link_state_locked(struct pci_dev *pdev, int state); +int pci_enable_link_state(struct pci_dev *pdev, int state); void pcie_no_aspm(void); bool pcie_aspm_support_enabled(void); bool pcie_aspm_enabled(struct pci_dev *pdev); @@ -1697,6 +1702,8 @@ static inline int pci_disable_link_state(struct pci_dev *pdev, int state) { return 0; } static inline int pci_disable_link_state_locked(struct pci_dev *pdev, int state) { return 0; } +static inline int pci_enable_link_state(struct pci_dev *pdev, int state) +{ return 0; } static inline void pcie_no_aspm(void) { } static inline bool pcie_aspm_support_enabled(void) { return false; } static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; } -- cgit v1.2.3 From 1bd9a7b4afd5e0b938868a90b16d514c19808e6c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 24 Jan 2023 19:37:20 +0100 Subject: phy: Remove unused phy_optional_get() There were never any upstream users of this function since its introduction almost 10 years ago. Besides, the dummy for phy_optional_get() should have returned NULL instead of an error code. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/df61992b1d66bccf4e6e1eafae94a7f7d7629f34.1674584626.git.geert+renesas@glider.be Signed-off-by: Vinod Koul --- include/linux/phy/phy.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index b1413757fcc3..1b4f9be21e01 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -250,7 +250,6 @@ static inline void phy_set_bus_width(struct phy *phy, int bus_width) phy->attrs.bus_width = bus_width; } struct phy *phy_get(struct device *dev, const char *string); -struct phy *phy_optional_get(struct device *dev, const char *string); struct phy *devm_phy_get(struct device *dev, const char *string); struct phy *devm_phy_optional_get(struct device *dev, const char *string); struct phy *devm_of_phy_get(struct device *dev, struct device_node *np, @@ -426,12 +425,6 @@ static inline struct phy *phy_get(struct device *dev, const char *string) return ERR_PTR(-ENOSYS); } -static inline struct phy *phy_optional_get(struct device *dev, - const char *string) -{ - return ERR_PTR(-ENOSYS); -} - static inline struct phy *devm_phy_get(struct device *dev, const char *string) { return ERR_PTR(-ENOSYS); -- cgit v1.2.3 From d02aa181ee595c81738b6bd7ebad6025fbee035a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 24 Jan 2023 19:37:22 +0100 Subject: phy: Add devm_of_phy_optional_get() helper Add an optional variant of devm_of_phy_get() that also takes care of printing real errors, so drivers no longer have to open-code this operation. Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/4cd0069bcff424ffc5c3a102397c02370b91985b.1674584626.git.geert+renesas@glider.be Signed-off-by: Vinod Koul --- include/linux/phy/phy.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index 1b4f9be21e01..3a570bc59fc7 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -254,6 +254,8 @@ struct phy *devm_phy_get(struct device *dev, const char *string); struct phy *devm_phy_optional_get(struct device *dev, const char *string); struct phy *devm_of_phy_get(struct device *dev, struct device_node *np, const char *con_id); +struct phy *devm_of_phy_optional_get(struct device *dev, struct device_node *np, + const char *con_id); struct phy *devm_of_phy_get_by_index(struct device *dev, struct device_node *np, int index); void of_phy_put(struct phy *phy); @@ -443,6 +445,13 @@ static inline struct phy *devm_of_phy_get(struct device *dev, return ERR_PTR(-ENOSYS); } +static inline struct phy *devm_of_phy_optional_get(struct device *dev, + struct device_node *np, + const char *con_id) +{ + return NULL; +} + static inline struct phy *devm_of_phy_get_by_index(struct device *dev, struct device_node *np, int index) -- cgit v1.2.3 From f5b3c341a46ec55d93332ee5c254a278af902ffe Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 2 Feb 2023 16:54:12 +0200 Subject: mei: Move uuid_le_cmp() to its only user There is only a single user of uuid_le_cmp() API, let's make it private to that user. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230202145412.87569-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/uuid.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 5be158a49e11..6b1a3efa1e0b 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -110,9 +110,4 @@ int uuid_parse(const char *uuid, uuid_t *u); /* MEI UUID type, don't use anywhere else */ #include -static inline int uuid_le_cmp(const uuid_le u1, const uuid_le u2) -{ - return memcmp(&u1, &u2, sizeof(uuid_le)); -} - #endif -- cgit v1.2.3 From fb6f026b833a71f4701e12b43800e46d7351f7a2 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 10 Jan 2023 19:03:53 +0000 Subject: mm/damon/core: update kernel-doc comments for DAMOS action supports of each DAMON operations set Patch series "mm/damon: trivial fixups". This patchset contains patches for trivial fixups of DAMON's documentation, MAINTAINERS section, and selftests. This patch (of 8): Supports of each DAMOS action are up to DAMON operations set implementation in use, but not well mentioned on the kernel-doc comments. Add the comment. Link: https://lkml.kernel.org/r/20230110190400.119388-1-sj@kernel.org Link: https://lkml.kernel.org/r/20230110190400.119388-2-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Shuah Khan Signed-off-by: Andrew Morton --- include/linux/damon.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index 7907918ad2e0..3fa96d7c9fe4 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -91,6 +91,12 @@ struct damon_target { * @DAMOS_LRU_DEPRIO: Deprioritize the region on its LRU lists. * @DAMOS_STAT: Do nothing but count the stat. * @NR_DAMOS_ACTIONS: Total number of DAMOS actions + * + * The support of each action is up to running &struct damon_operations. + * &enum DAMON_OPS_VADDR and &enum DAMON_OPS_FVADDR supports all actions except + * &enum DAMOS_LRU_PRIO and &enum DAMOS_LRU_DEPRIO. &enum DAMON_OPS_PADDR + * supports only &enum DAMOS_PAGEOUT, &enum DAMOS_LRU_PRIO, &enum + * DAMOS_LRU_DEPRIO, and &DAMOS_STAT. */ enum damos_action { DAMOS_WILLNEED, -- cgit v1.2.3 From 55901e89d2864b5ef9961892470eedf29279d412 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 10 Jan 2023 19:03:54 +0000 Subject: mm/damon/core: update kernel-doc comments for DAMOS filters supports of each DAMON operations set Supports of each DAMOS filter type are up to DAMON operations set implementation in use, but not well mentioned on the kernel-doc comments. Add the comment. Link: https://lkml.kernel.org/r/20230110190400.119388-3-sj@kernel.org Signed-off-by: SeongJae Park Cc: Jonathan Corbet Cc: Shuah Khan Signed-off-by: Andrew Morton --- include/linux/damon.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index 3fa96d7c9fe4..dfb245bb3053 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -227,6 +227,11 @@ struct damos_stat { * @DAMOS_FILTER_TYPE_ANON: Anonymous pages. * @DAMOS_FILTER_TYPE_MEMCG: Specific memcg's pages. * @NR_DAMOS_FILTER_TYPES: Number of filter types. + * + * The support of each filter type is up to running &struct damon_operations. + * &enum DAMON_OPS_PADDR is supporting all filter types, while + * &enum DAMON_OPS_VADDR and &enum DAMON_OPS_FVADDR are not supporting any + * filter types. */ enum damos_filter_type { DAMOS_FILTER_TYPE_ANON, -- cgit v1.2.3 From c5d5546ea06512accc894cd19265c7041a6ac81a Mon Sep 17 00:00:00 2001 From: Vernon Yang Date: Tue, 10 Jan 2023 23:42:11 +0800 Subject: maple_tree: remove the parameter entry of mas_preallocate The parameter entry of mas_preallocate is not used, so drop it. Link: https://lkml.kernel.org/r/20230110154211.1758562-1-vernon2gm@gmail.com Signed-off-by: Vernon Yang Cc: Liam Howlett Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index 815a27661517..a7bf58fd7cc6 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -455,7 +455,7 @@ int mas_store_gfp(struct ma_state *mas, void *entry, gfp_t gfp); void mas_store_prealloc(struct ma_state *mas, void *entry); void *mas_find(struct ma_state *mas, unsigned long max); void *mas_find_rev(struct ma_state *mas, unsigned long min); -int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp); +int mas_preallocate(struct ma_state *mas, gfp_t gfp); bool mas_is_err(struct ma_state *mas); bool mas_nomem(struct ma_state *mas, gfp_t gfp); -- cgit v1.2.3 From 7d4a8be0c4b2b7ffb367929d2b352651f083806b Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Tue, 10 Jan 2023 13:57:22 +1100 Subject: mm/mmu_notifier: remove unused mmu_notifier_range_update_to_read_only export mmu_notifier_range_update_to_read_only() was originally introduced in commit c6d23413f81b ("mm/mmu_notifier: mmu_notifier_range_update_to_read_only() helper") as an optimisation for device drivers that know a range has only been mapped read-only. However there are no users of this feature so remove it. As it is the only user of the struct mmu_notifier_range.vma field remove that also. Link: https://lkml.kernel.org/r/20230110025722.600912-1-apopple@nvidia.com Signed-off-by: Alistair Popple Acked-by: Mike Rapoport (IBM) Reviewed-by: Jason Gunthorpe Reviewed-by: Christoph Hellwig Reviewed-by: Mike Kravetz Cc: Ira Weiny Cc: Jerome Glisse Cc: John Hubbard Cc: Ralph Campbell Signed-off-by: Andrew Morton --- include/linux/mmu_notifier.h | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index d6c06e140277..64a3e051c3c4 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -269,7 +269,6 @@ extern struct lockdep_map __mmu_notifier_invalidate_range_start_map; #endif struct mmu_notifier_range { - struct vm_area_struct *vma; struct mm_struct *mm; unsigned long start; unsigned long end; @@ -514,12 +513,10 @@ static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm) static inline void mmu_notifier_range_init(struct mmu_notifier_range *range, enum mmu_notifier_event event, unsigned flags, - struct vm_area_struct *vma, struct mm_struct *mm, unsigned long start, unsigned long end) { - range->vma = vma; range->event = event; range->mm = mm; range->start = start; @@ -530,10 +527,10 @@ static inline void mmu_notifier_range_init(struct mmu_notifier_range *range, static inline void mmu_notifier_range_init_owner( struct mmu_notifier_range *range, enum mmu_notifier_event event, unsigned int flags, - struct vm_area_struct *vma, struct mm_struct *mm, - unsigned long start, unsigned long end, void *owner) + struct mm_struct *mm, unsigned long start, + unsigned long end, void *owner) { - mmu_notifier_range_init(range, event, flags, vma, mm, start, end); + mmu_notifier_range_init(range, event, flags, mm, start, end); range->owner = owner; } @@ -659,9 +656,9 @@ static inline void _mmu_notifier_range_init(struct mmu_notifier_range *range, range->end = end; } -#define mmu_notifier_range_init(range,event,flags,vma,mm,start,end) \ +#define mmu_notifier_range_init(range,event,flags,mm,start,end) \ _mmu_notifier_range_init(range, start, end) -#define mmu_notifier_range_init_owner(range, event, flags, vma, mm, start, \ +#define mmu_notifier_range_init_owner(range, event, flags, mm, start, \ end, owner) \ _mmu_notifier_range_init(range, start, end) -- cgit v1.2.3 From 94688e8eb453e616098cb930e5f6fed4a6ea2dfa Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:28:47 +0000 Subject: mm: remove folio_pincount_ptr() and head_compound_pincount() We can use folio->_pincount directly, since all users are guarded by tests of compound/large. Link: https://lkml.kernel.org/r/20230111142915.1001531-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: John Hubbard Signed-off-by: Andrew Morton --- include/linux/mm.h | 14 ++------------ include/linux/mm_types.h | 5 ----- 2 files changed, 2 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 76c97cb8ee9a..6d3945207067 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1011,11 +1011,6 @@ static inline void folio_set_compound_dtor(struct folio *folio, void destroy_large_folio(struct folio *folio); -static inline int head_compound_pincount(struct page *head) -{ - return atomic_read(compound_pincount_ptr(head)); -} - static inline void set_compound_order(struct page *page, unsigned int order) { page[1].compound_order = order; @@ -1641,11 +1636,6 @@ static inline struct folio *pfn_folio(unsigned long pfn) return page_folio(pfn_to_page(pfn)); } -static inline atomic_t *folio_pincount_ptr(struct folio *folio) -{ - return &folio_page(folio, 1)->compound_pincount; -} - /** * folio_maybe_dma_pinned - Report if a folio may be pinned for DMA. * @folio: The folio. @@ -1663,7 +1653,7 @@ static inline atomic_t *folio_pincount_ptr(struct folio *folio) * expected to be able to deal gracefully with a false positive. * * For large folios, the result will be exactly correct. That's because - * we have more tracking data available: the compound_pincount is used + * we have more tracking data available: the _pincount field is used * instead of the GUP_PIN_COUNTING_BIAS scheme. * * For more information, please see Documentation/core-api/pin_user_pages.rst. @@ -1674,7 +1664,7 @@ static inline atomic_t *folio_pincount_ptr(struct folio *folio) static inline bool folio_maybe_dma_pinned(struct folio *folio) { if (folio_test_large(folio)) - return atomic_read(folio_pincount_ptr(folio)) > 0; + return atomic_read(&folio->_pincount) > 0; /* * folio_ref_count() is signed. If that refcount overflows, then diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 10b6eb311ede..6ff1d7db00a7 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -443,11 +443,6 @@ static inline atomic_t *subpages_mapcount_ptr(struct page *page) return &page[1].subpages_mapcount; } -static inline atomic_t *compound_pincount_ptr(struct page *page) -{ - return &page[1].compound_pincount; -} - /* * Used for sizing the vmemmap region on some architectures */ -- cgit v1.2.3 From eec20426d48bd7b63c69969a793943ed1a99b731 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:28:48 +0000 Subject: mm: convert head_subpages_mapcount() into folio_nr_pages_mapped() Calling this 'mapcount' is confusing since mapcount is usually the number of times something is mapped; instead this is the number of mapped pages. It's also better to enforce that this is a folio rather than a head page. Move folio_nr_pages_mapped() into mm/internal.h since this is not something we want device drivers or filesystems poking at. Get rid of folio_subpages_mapcount_ptr() and use folio->_nr_pages_mapped directly. Link: https://lkml.kernel.org/r/20230111142915.1001531-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 22 ++-------------------- include/linux/mm_types.h | 12 +++--------- 2 files changed, 5 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 6d3945207067..2bdd08a5b8b4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -843,24 +843,6 @@ static inline int head_compound_mapcount(struct page *head) return atomic_read(compound_mapcount_ptr(head)) + 1; } -/* - * If a 16GB hugetlb page were mapped by PTEs of all of its 4kB sub-pages, - * its subpages_mapcount would be 0x400000: choose the COMPOUND_MAPPED bit - * above that range, instead of 2*(PMD_SIZE/PAGE_SIZE). Hugetlb currently - * leaves subpages_mapcount at 0, but avoid surprise if it participates later. - */ -#define COMPOUND_MAPPED 0x800000 -#define SUBPAGES_MAPPED (COMPOUND_MAPPED - 1) - -/* - * Number of sub-pages mapped by PTE, does not include compound mapcount. - * Must be called only on head of compound page. - */ -static inline int head_subpages_mapcount(struct page *head) -{ - return atomic_read(subpages_mapcount_ptr(head)) & SUBPAGES_MAPPED; -} - /* * The atomic page->_mapcount, starts from -1: so that transitions * both from it and to it can be tracked, using atomic_inc_and_test @@ -920,9 +902,9 @@ static inline bool folio_large_is_mapped(struct folio *folio) { /* * Reading folio_mapcount_ptr() below could be omitted if hugetlb - * participated in incrementing subpages_mapcount when compound mapped. + * participated in incrementing nr_pages_mapped when compound mapped. */ - return atomic_read(folio_subpages_mapcount_ptr(folio)) > 0 || + return atomic_read(&folio->_nr_pages_mapped) > 0 || atomic_read(folio_mapcount_ptr(folio)) >= 0; } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6ff1d7db00a7..4751c67b98a6 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -307,7 +307,7 @@ static inline struct page *encoded_page_ptr(struct encoded_page *page) * @_folio_dtor: Which destructor to use for this folio. * @_folio_order: Do not use directly, call folio_order(). * @_compound_mapcount: Do not use directly, call folio_entire_mapcount(). - * @_subpages_mapcount: Do not use directly, call folio_mapcount(). + * @_nr_pages_mapped: Do not use directly, call folio_mapcount(). * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). * @_flags_2: For alignment. Do not use. @@ -361,7 +361,7 @@ struct folio { unsigned char _folio_dtor; unsigned char _folio_order; atomic_t _compound_mapcount; - atomic_t _subpages_mapcount; + atomic_t _nr_pages_mapped; atomic_t _pincount; #ifdef CONFIG_64BIT unsigned int _folio_nr_pages; @@ -404,7 +404,7 @@ FOLIO_MATCH(compound_head, _head_1); FOLIO_MATCH(compound_dtor, _folio_dtor); FOLIO_MATCH(compound_order, _folio_order); FOLIO_MATCH(compound_mapcount, _compound_mapcount); -FOLIO_MATCH(subpages_mapcount, _subpages_mapcount); +FOLIO_MATCH(subpages_mapcount, _nr_pages_mapped); FOLIO_MATCH(compound_pincount, _pincount); #ifdef CONFIG_64BIT FOLIO_MATCH(compound_nr, _folio_nr_pages); @@ -427,12 +427,6 @@ static inline atomic_t *folio_mapcount_ptr(struct folio *folio) return &tail->compound_mapcount; } -static inline atomic_t *folio_subpages_mapcount_ptr(struct folio *folio) -{ - struct page *tail = &folio->page + 1; - return &tail->subpages_mapcount; -} - static inline atomic_t *compound_mapcount_ptr(struct page *page) { return &page[1].compound_mapcount; -- cgit v1.2.3 From b14224fbea62e5bffd680613376fe1268f4103ba Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:28:50 +0000 Subject: mm: convert total_compound_mapcount() to folio_total_mapcount() Instead of enforcing that the argument must be a head page by naming, enforce it with the compiler by making it a folio. Also rename the counter in struct folio from _compound_mapcount to _entire_mapcount. Link: https://lkml.kernel.org/r/20230111142915.1001531-5-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 +++--- include/linux/mm_types.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2bdd08a5b8b4..bdf83e75bcd6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -871,7 +871,7 @@ static inline int page_mapcount(struct page *page) return head_compound_mapcount(page) + mapcount; } -int total_compound_mapcount(struct page *head); +int folio_total_mapcount(struct folio *folio); /** * folio_mapcount() - Calculate the number of mappings of this folio. @@ -888,14 +888,14 @@ static inline int folio_mapcount(struct folio *folio) { if (likely(!folio_test_large(folio))) return atomic_read(&folio->_mapcount) + 1; - return total_compound_mapcount(&folio->page); + return folio_total_mapcount(folio); } static inline int total_mapcount(struct page *page) { if (likely(!PageCompound(page))) return atomic_read(&page->_mapcount) + 1; - return total_compound_mapcount(compound_head(page)); + return folio_total_mapcount(page_folio(page)); } static inline bool folio_large_is_mapped(struct folio *folio) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 4751c67b98a6..70cbda768308 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -306,7 +306,7 @@ static inline struct page *encoded_page_ptr(struct encoded_page *page) * @_head_1: Points to the folio. Do not use. * @_folio_dtor: Which destructor to use for this folio. * @_folio_order: Do not use directly, call folio_order(). - * @_compound_mapcount: Do not use directly, call folio_entire_mapcount(). + * @_entire_mapcount: Do not use directly, call folio_entire_mapcount(). * @_nr_pages_mapped: Do not use directly, call folio_mapcount(). * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). @@ -360,7 +360,7 @@ struct folio { unsigned long _head_1; unsigned char _folio_dtor; unsigned char _folio_order; - atomic_t _compound_mapcount; + atomic_t _entire_mapcount; atomic_t _nr_pages_mapped; atomic_t _pincount; #ifdef CONFIG_64BIT @@ -403,7 +403,7 @@ FOLIO_MATCH(flags, _flags_1); FOLIO_MATCH(compound_head, _head_1); FOLIO_MATCH(compound_dtor, _folio_dtor); FOLIO_MATCH(compound_order, _folio_order); -FOLIO_MATCH(compound_mapcount, _compound_mapcount); +FOLIO_MATCH(compound_mapcount, _entire_mapcount); FOLIO_MATCH(subpages_mapcount, _nr_pages_mapped); FOLIO_MATCH(compound_pincount, _pincount); #ifdef CONFIG_64BIT -- cgit v1.2.3 From 4d510f3da4c216d4c2695395f67aec38e2aa6cc7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:28:54 +0000 Subject: mm: add folio_add_new_anon_rmap() In contrast to other rmap functions, page_add_new_anon_rmap() is always called with a freshly allocated page. That means it can't be called with a tail page. Turn page_add_new_anon_rmap() into folio_add_new_anon_rmap() and add a page_add_new_anon_rmap() wrapper. Callers can be converted individually. [akpm@linux-foundation.org: fix NOMMU build. page_add_new_anon_rmap() requires CONFIG_MMU] [willy@infradead.org: folio-compat.c needs rmap.h] Link: https://lkml.kernel.org/r/20230111142915.1001531-9-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/rmap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index bd3504d11b15..aa682a2a93ce 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -194,6 +194,8 @@ void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long address, rmap_t flags); void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long address); +void folio_add_new_anon_rmap(struct folio *, struct vm_area_struct *, + unsigned long address); void page_add_file_rmap(struct page *, struct vm_area_struct *, bool compound); void page_remove_rmap(struct page *, struct vm_area_struct *, -- cgit v1.2.3 From c7f84b5723f1a60becd79d895ab214a7d5ee93c1 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:28:57 +0000 Subject: mm: use entire_mapcount in __page_dup_rmap() Remove the use of the compound_mapcount_ptr() wrapper, and add an assertion that we're not passing a tail page if we're duplicating a PMD. Link: https://lkml.kernel.org/r/20230111142915.1001531-12-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/rmap.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index aa682a2a93ce..a6bd1f0a183d 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -208,7 +208,14 @@ void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *, static inline void __page_dup_rmap(struct page *page, bool compound) { - atomic_inc(compound ? compound_mapcount_ptr(page) : &page->_mapcount); + if (compound) { + struct folio *folio = (struct folio *)page; + + VM_BUG_ON_PAGE(compound && !PageHead(page), page); + atomic_inc(&folio->_entire_mapcount); + } else { + atomic_inc(&page->_mapcount); + } } static inline void page_dup_file_rmap(struct page *page, bool compound) -- cgit v1.2.3 From c97eeb8f260dba098ba775e37d216f81f28559a9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:29:00 +0000 Subject: mm: convert page_mapcount() to use folio_entire_mapcount() Remove a use of head_compound_mapcount(). Link: https://lkml.kernel.org/r/20230111142915.1001531-15-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index bdf83e75bcd6..a6afa6c51a4d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -853,22 +853,26 @@ static inline void page_mapcount_reset(struct page *page) atomic_set(&(page)->_mapcount, -1); } -/* - * Mapcount of 0-order page; when compound sub-page, includes - * compound_mapcount of compound_head of page. +/** + * page_mapcount() - Number of times this precise page is mapped. + * @page: The page. + * + * The number of times this page is mapped. If this page is part of + * a large folio, it includes the number of times this page is mapped + * as part of that folio. * - * Result is undefined for pages which cannot be mapped into userspace. + * The result is undefined for pages which cannot be mapped into userspace. * For example SLAB or special types of pages. See function page_has_type(). - * They use this place in struct page differently. + * They use this field in struct page differently. */ static inline int page_mapcount(struct page *page) { int mapcount = atomic_read(&page->_mapcount) + 1; - if (likely(!PageCompound(page))) - return mapcount; - page = compound_head(page); - return head_compound_mapcount(page) + mapcount; + if (unlikely(PageCompound(page))) + mapcount += folio_entire_mapcount(page_folio(page)); + + return mapcount; } int folio_total_mapcount(struct folio *folio); -- cgit v1.2.3 From 1aa4d03b60c0f61a8d96d5d633bf7968dbf6841f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:29:01 +0000 Subject: mm: remove head_compound_mapcount() and _ptr functions folio_mapcount_ptr(), compound_mapcount_ptr() and subpages_mapcount_ptr() are all now unused. Link: https://lkml.kernel.org/r/20230111142915.1001531-16-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 15 +++------------ include/linux/mm_types.h | 16 ---------------- 2 files changed, 3 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index a6afa6c51a4d..7ff6e2410aa3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -831,16 +831,7 @@ static inline int is_vmalloc_or_module_addr(const void *x) static inline int folio_entire_mapcount(struct folio *folio) { VM_BUG_ON_FOLIO(!folio_test_large(folio), folio); - return atomic_read(folio_mapcount_ptr(folio)) + 1; -} - -/* - * Mapcount of compound page as a whole, does not include mapped sub-pages. - * Must be called only on head of compound page. - */ -static inline int head_compound_mapcount(struct page *head) -{ - return atomic_read(compound_mapcount_ptr(head)) + 1; + return atomic_read(&folio->_entire_mapcount) + 1; } /* @@ -905,11 +896,11 @@ static inline int total_mapcount(struct page *page) static inline bool folio_large_is_mapped(struct folio *folio) { /* - * Reading folio_mapcount_ptr() below could be omitted if hugetlb + * Reading _entire_mapcount below could be omitted if hugetlb * participated in incrementing nr_pages_mapped when compound mapped. */ return atomic_read(&folio->_nr_pages_mapped) > 0 || - atomic_read(folio_mapcount_ptr(folio)) >= 0; + atomic_read(&folio->_entire_mapcount) >= 0; } /** diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 70cbda768308..ffcf21fbaaf0 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -421,22 +421,6 @@ FOLIO_MATCH(hugetlb_cgroup_rsvd, _hugetlb_cgroup_rsvd); FOLIO_MATCH(hugetlb_hwpoison, _hugetlb_hwpoison); #undef FOLIO_MATCH -static inline atomic_t *folio_mapcount_ptr(struct folio *folio) -{ - struct page *tail = &folio->page + 1; - return &tail->compound_mapcount; -} - -static inline atomic_t *compound_mapcount_ptr(struct page *page) -{ - return &page[1].compound_mapcount; -} - -static inline atomic_t *subpages_mapcount_ptr(struct page *page) -{ - return &page[1].subpages_mapcount; -} - /* * Used for sizing the vmemmap region on some architectures */ -- cgit v1.2.3 From 5eb5cea11dcbafaa37685bc4e89e1d4ae9c434ea Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:29:02 +0000 Subject: mm: reimplement compound_order() Make compound_order() use struct folio. It can't be turned into a wrapper around folio_order() as a page can be turned into a tail page between a check in compound_order() and the assertion in folio_test_large(). Link: https://lkml.kernel.org/r/20230111142915.1001531-17-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 7ff6e2410aa3..3adc37cebe6f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -719,11 +719,20 @@ int vma_is_stack_for_current(struct vm_area_struct *vma); struct mmu_gather; struct inode; +/* + * compound_order() can be called without holding a reference, which means + * that niceties like page_folio() don't work. These callers should be + * prepared to handle wild return values. For example, PG_head may be + * set before _folio_order is initialised, or this may be a tail page. + * See compaction.c for some good examples. + */ static inline unsigned int compound_order(struct page *page) { - if (!PageHead(page)) + struct folio *folio = (struct folio *)page; + + if (!test_bit(PG_head, &folio->flags)) return 0; - return page[1].compound_order; + return folio->_folio_order; } /** -- cgit v1.2.3 From 21a000fe97a018c6d25be63892afb4fd8210ab57 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:29:03 +0000 Subject: mm: reimplement compound_nr() Turn compound_nr() into a wrapper around folio_nr_pages(). Similarly to compound_order(), casting the struct page directly to struct folio preserves the existing behaviour, while calling page_folio() would change the behaviour. Move thp_nr_pages() down in the file so that compound_nr() can be after folio_nr_pages(). [willy@infradead.org: fix assertion triggering] Link: https://lkml.kernel.org/r/Y8AFgZEEjnUIaCbf@casper.infradead.org Link: https://lkml.kernel.org/r/20230111142915.1001531-18-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Hugh Dickins Signed-off-by: Andrew Morton --- include/linux/mm.h | 49 +++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3adc37cebe6f..3acf09d5b0bd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1005,18 +1005,6 @@ static inline void set_compound_order(struct page *page, unsigned int order) #endif } -/* Returns the number of pages in this potentially compound page. */ -static inline unsigned long compound_nr(struct page *page) -{ - if (!PageHead(page)) - return 1; -#ifdef CONFIG_64BIT - return page[1].compound_nr; -#else - return 1UL << compound_order(page); -#endif -} - /* Returns the number of bytes in this potentially compound page. */ static inline unsigned long page_size(struct page *page) { @@ -1039,16 +1027,6 @@ static inline unsigned int thp_order(struct page *page) return compound_order(page); } -/** - * thp_nr_pages - The number of regular pages in this huge page. - * @page: The head page of a huge page. - */ -static inline int thp_nr_pages(struct page *page) -{ - VM_BUG_ON_PGFLAGS(PageTail(page), page); - return compound_nr(page); -} - /** * thp_size - Size of a transparent huge page. * @page: Head page of a transparent huge page. @@ -1758,6 +1736,33 @@ static inline long folio_nr_pages(struct folio *folio) #endif } +/* + * compound_nr() returns the number of pages in this potentially compound + * page. compound_nr() can be called on a tail page, and is defined to + * return 1 in that case. + */ +static inline unsigned long compound_nr(struct page *page) +{ + struct folio *folio = (struct folio *)page; + + if (!test_bit(PG_head, &folio->flags)) + return 1; +#ifdef CONFIG_64BIT + return folio->_folio_nr_pages; +#else + return 1L << folio->_folio_order; +#endif +} + +/** + * thp_nr_pages - The number of regular pages in this huge page. + * @page: The head page of a huge page. + */ +static inline int thp_nr_pages(struct page *page) +{ + return folio_nr_pages((struct folio *)page); +} + /** * folio_next - Move to the next physical folio. * @folio: The folio we're currently operating on. -- cgit v1.2.3 From bad6da64565846ef5ba85b0b685cfde9db0085dc Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:29:04 +0000 Subject: mm: convert set_compound_page_dtor() and set_compound_order() to folios Replace uses of compound_dtor, compound_order and compound_nr by their folio equivalents. Link: https://lkml.kernel.org/r/20230111142915.1001531-19-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3acf09d5b0bd..836b96e08a14 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -984,8 +984,11 @@ extern compound_page_dtor * const compound_page_dtors[NR_COMPOUND_DTORS]; static inline void set_compound_page_dtor(struct page *page, enum compound_dtor_id compound_dtor) { + struct folio *folio = (struct folio *)page; + VM_BUG_ON_PAGE(compound_dtor >= NR_COMPOUND_DTORS, page); - page[1].compound_dtor = compound_dtor; + VM_BUG_ON_PAGE(!PageHead(page), page); + folio->_folio_dtor = compound_dtor; } static inline void folio_set_compound_dtor(struct folio *folio, @@ -999,9 +1002,11 @@ void destroy_large_folio(struct folio *folio); static inline void set_compound_order(struct page *page, unsigned int order) { - page[1].compound_order = order; + struct folio *folio = (struct folio *)page; + + folio->_folio_order = order; #ifdef CONFIG_64BIT - page[1].compound_nr = 1U << order; + folio->_folio_nr_pages = 1U << order; #endif } -- cgit v1.2.3 From 1c5509be58f636afabbdaf66e7436da8ec0a1828 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:29:08 +0000 Subject: mm: remove 'First tail page' members from struct page All former users now use the folio equivalents, so remove them from the definition of struct page. Link: https://lkml.kernel.org/r/20230111142915.1001531-23-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index ffcf21fbaaf0..94b1707f5d33 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -140,16 +140,6 @@ struct page { }; struct { /* Tail pages of compound page */ unsigned long compound_head; /* Bit zero is set */ - - /* First tail page only */ - unsigned char compound_dtor; - unsigned char compound_order; - atomic_t compound_mapcount; - atomic_t subpages_mapcount; - atomic_t compound_pincount; -#ifdef CONFIG_64BIT - unsigned int compound_nr; /* 1 << compound_order */ -#endif }; struct { /* Second tail page of transparent huge page */ unsigned long _compound_pad_1; /* compound_head */ @@ -401,14 +391,6 @@ FOLIO_MATCH(memcg_data, memcg_data); offsetof(struct page, pg) + sizeof(struct page)) FOLIO_MATCH(flags, _flags_1); FOLIO_MATCH(compound_head, _head_1); -FOLIO_MATCH(compound_dtor, _folio_dtor); -FOLIO_MATCH(compound_order, _folio_order); -FOLIO_MATCH(compound_mapcount, _entire_mapcount); -FOLIO_MATCH(subpages_mapcount, _nr_pages_mapped); -FOLIO_MATCH(compound_pincount, _pincount); -#ifdef CONFIG_64BIT -FOLIO_MATCH(compound_nr, _folio_nr_pages); -#endif #undef FOLIO_MATCH #define FOLIO_MATCH(pg, fl) \ static_assert(offsetof(struct folio, fl) == \ -- cgit v1.2.3 From a8d55327ccc1f999a5fba4eee67ed08bd36493ad Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:29:09 +0000 Subject: doc: correct struct folio kernel-doc Insert appropriate public: and private: markers to make the generated kernel-doc look right. Link: https://lkml.kernel.org/r/20230111142915.1001531-24-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 94b1707f5d33..d458e9b8496c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -292,16 +292,12 @@ static inline struct page *encoded_page_ptr(struct encoded_page *page) * @_refcount: Do not access this member directly. Use folio_ref_count() * to find how many references there are to this folio. * @memcg_data: Memory Control Group data. - * @_flags_1: For large folios, additional page flags. - * @_head_1: Points to the folio. Do not use. * @_folio_dtor: Which destructor to use for this folio. * @_folio_order: Do not use directly, call folio_order(). * @_entire_mapcount: Do not use directly, call folio_entire_mapcount(). * @_nr_pages_mapped: Do not use directly, call folio_mapcount(). * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). - * @_flags_2: For alignment. Do not use. - * @_head_2: Points to the folio. Do not use. * @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h. * @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h. * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h. @@ -348,6 +344,7 @@ struct folio { struct { unsigned long _flags_1; unsigned long _head_1; + /* public: */ unsigned char _folio_dtor; unsigned char _folio_order; atomic_t _entire_mapcount; @@ -356,6 +353,7 @@ struct folio { #ifdef CONFIG_64BIT unsigned int _folio_nr_pages; #endif + /* private: the union with struct page is transitional */ }; struct page __page_1; }; @@ -363,10 +361,12 @@ struct folio { struct { unsigned long _flags_2; unsigned long _head_2; + /* public: */ void *_hugetlb_subpool; void *_hugetlb_cgroup; void *_hugetlb_cgroup_rsvd; void *_hugetlb_hwpoison; + /* private: the union with struct page is transitional */ }; struct page __page_2; }; -- cgit v1.2.3 From 4375a553f46c6cb66d1711d8f514dfdf34ce74b0 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:29:10 +0000 Subject: mm: move page->deferred_list to folio->_deferred_list Remove the entire block of definitions for the second tail page, and add the deferred list to the struct folio. This actually moves _deferred_list to a different offset in struct folio because I don't see a need to include the padding. This lets us use list_for_each_entry_safe() in deferred_split_scan() and avoid a number of calls to compound_head(). Link: https://lkml.kernel.org/r/20230111142915.1001531-25-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 9 ++++----- include/linux/mm_types.h | 14 ++++++++------ 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index a1341fdcf666..aacfcb02606f 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -295,11 +295,10 @@ static inline bool thp_migration_supported(void) static inline struct list_head *page_deferred_list(struct page *page) { - /* - * See organization of tail pages of compound page in - * "struct page" definition. - */ - return &page[2].deferred_list; + struct folio *folio = (struct folio *)page; + + VM_BUG_ON_FOLIO(folio_order(folio) < 2, folio); + return &folio->_deferred_list; } #else /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d458e9b8496c..7eb4d0815a78 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -141,12 +141,6 @@ struct page { struct { /* Tail pages of compound page */ unsigned long compound_head; /* Bit zero is set */ }; - struct { /* Second tail page of transparent huge page */ - unsigned long _compound_pad_1; /* compound_head */ - unsigned long _compound_pad_2; - /* For both global and memcg */ - struct list_head deferred_list; - }; struct { /* Second tail page of hugetlb page */ unsigned long _hugetlb_pad_1; /* compound_head */ void *hugetlb_subpool; @@ -302,6 +296,7 @@ static inline struct page *encoded_page_ptr(struct encoded_page *page) * @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h. * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h. * @_hugetlb_hwpoison: Do not use directly, call raw_hwp_list_head(). + * @_deferred_list: Folios to be split under memory pressure. * * A folio is a physically, virtually and logically contiguous set * of bytes. It is a power-of-two in size, and it is aligned to that @@ -366,6 +361,13 @@ struct folio { void *_hugetlb_cgroup; void *_hugetlb_cgroup_rsvd; void *_hugetlb_hwpoison; + /* private: the union with struct page is transitional */ + }; + struct { + unsigned long _flags_2a; + unsigned long _head_2a; + /* public: */ + struct list_head _deferred_list; /* private: the union with struct page is transitional */ }; struct page __page_2; -- cgit v1.2.3 From 8991de90e99755b13026b1db32d1fa52e94c6a96 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:29:11 +0000 Subject: mm/huge_memory: remove page_deferred_list() Use folio->_deferred_list directly. Link: https://lkml.kernel.org/r/20230111142915.1001531-26-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index aacfcb02606f..b9978978a160 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -293,14 +293,6 @@ static inline bool thp_migration_supported(void) return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION); } -static inline struct list_head *page_deferred_list(struct page *page) -{ - struct folio *folio = (struct folio *)page; - - VM_BUG_ON_FOLIO(folio_order(folio) < 2, folio); - return &folio->_deferred_list; -} - #else /* CONFIG_TRANSPARENT_HUGEPAGE */ #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) -- cgit v1.2.3 From f158ed6195ef949060811fd85086928470651944 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 11 Jan 2023 14:29:13 +0000 Subject: mm: convert deferred_split_huge_page() to deferred_split_folio() Now that both callers use a folio, pass the folio in and save a call to compound_head(). Link: https://lkml.kernel.org/r/20230111142915.1001531-28-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index b9978978a160..70bd867eba94 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -187,7 +187,7 @@ static inline int split_huge_page(struct page *page) { return split_huge_page_to_list(page, NULL); } -void deferred_split_huge_page(struct page *page); +void deferred_split_folio(struct folio *folio); void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long address, bool freeze, struct folio *folio); @@ -340,7 +340,7 @@ static inline int split_huge_page(struct page *page) { return 0; } -static inline void deferred_split_huge_page(struct page *page) {} +static inline void deferred_split_folio(struct folio *folio) {} #define split_huge_pmd(__vma, __pmd, __address) \ do { } while (0) -- cgit v1.2.3 From 6a171c16e62f854e6a7e0f837dbe8f3ace0f00ce Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 11 Jan 2023 14:29:14 +0000 Subject: mm: remove the hugetlb field from struct page Patch series "Get rid of tail page fields". Continue the shrinkage of the struct page definition by getting rid of the 'first tail page' and 'second tail page' fields. I originally did this patch set before Hugh's rewrite of the subpages_mapcount, so it needed substantial updates; hope I didn't miss anything. This patch (of 28): commit dad6a5eb5556(mm,hugetlb: use folio fields in second tail page) added a transitional hugetlb field to struct page and struct folio to make room for another int in the first tail of a compound page. Hugetlb folio conversions have changed all page users of this field to use the fields within the folio so struct page no longer needs this hugetlb specific field. Link: https://lkml.kernel.org/r/20230111142915.1001531-1-willy@infradead.org Link: https://lkml.kernel.org/r/20230111142915.1001531-29-willy@infradead.org Signed-off-by: Sidhartha Kumar Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 7eb4d0815a78..452920467223 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -141,14 +141,6 @@ struct page { struct { /* Tail pages of compound page */ unsigned long compound_head; /* Bit zero is set */ }; - struct { /* Second tail page of hugetlb page */ - unsigned long _hugetlb_pad_1; /* compound_head */ - void *hugetlb_subpool; - void *hugetlb_cgroup; - void *hugetlb_cgroup_rsvd; - void *hugetlb_hwpoison; - /* No more space on 32-bit: use third tail if more */ - }; struct { /* Page table pages */ unsigned long _pt_pad_1; /* compound_head */ pgtable_t pmd_huge_pte; /* protected by page->ptl */ @@ -399,10 +391,6 @@ FOLIO_MATCH(compound_head, _head_1); offsetof(struct page, pg) + 2 * sizeof(struct page)) FOLIO_MATCH(flags, _flags_2); FOLIO_MATCH(compound_head, _head_2); -FOLIO_MATCH(hugetlb_subpool, _hugetlb_subpool); -FOLIO_MATCH(hugetlb_cgroup, _hugetlb_cgroup); -FOLIO_MATCH(hugetlb_cgroup_rsvd, _hugetlb_cgroup_rsvd); -FOLIO_MATCH(hugetlb_hwpoison, _hugetlb_hwpoison); #undef FOLIO_MATCH /* -- cgit v1.2.3 From 2ff6cecee669bf0fc63eadebac8cfc81f74b9a4c Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Thu, 12 Jan 2023 14:46:03 -0600 Subject: mm/memory-failure: convert hugetlb_clear_page_hwpoison to folios Change hugetlb_clear_page_hwpoison() to folio_clear_hugetlb_hwpoison() by changing the function to take in a folio. This converts one use of ClearPageHWPoison and HPageRawHwpUnreliable to their folio equivalents. Link: https://lkml.kernel.org/r/20230112204608.80136-4-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Acked-by: Naoya Horiguchi Cc: Matthew Wilcox Cc: Miaohe Lin Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 7d6413c3b8f5..cf60fe741c1d 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -878,9 +878,9 @@ extern int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn); #ifdef CONFIG_MEMORY_FAILURE -extern void hugetlb_clear_page_hwpoison(struct page *hpage); +extern void folio_clear_hugetlb_hwpoison(struct folio *folio); #else -static inline void hugetlb_clear_page_hwpoison(struct page *hpage) +static inline void folio_clear_hugetlb_hwpoison(struct folio *folio) { } #endif -- cgit v1.2.3 From 8115612883978069fee8793f873a627ff5868718 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 12 Jan 2023 12:39:28 +0000 Subject: mm: pagevec: add folio_batch_reinit() Patch series "update mlock to use folios", v4. This series updates mlock to use folios, converting the internal interface to using folios exclusively and exposing the folio interface externally. As a product of this we move to using a folio batch rather than a pagevec for mlock folios, which brings it in line with the core folio batches contained in mm/swap.c. This patch (of 5): This performs the same task as pagevec_reinit(), only modifying a folio batch rather than a pagevec. Link: https://lkml.kernel.org/r/cover.1673526881.git.lstoakes@gmail.com Link: https://lkml.kernel.org/r/9018cecacb39e34c883540f997f9be8281153613.1673526881.git.lstoakes@gmail.com Signed-off-by: Lorenzo Stoakes Acked-by: Vlastimil Babka Cc: Christian Brauner Cc: Geert Uytterhoeven Cc: Hugh Dickins Cc: Joel Fernandes (Google) Cc: Jonathan Corbet Cc: Liam R. Howlett Cc: Matthew Wilcox Cc: Mike Rapoport (IBM) Cc: William Kucharski Signed-off-by: Andrew Morton --- include/linux/pagevec.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 215eb6c3bdc9..2a6f61a0c10a 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -103,6 +103,11 @@ static inline void folio_batch_init(struct folio_batch *fbatch) fbatch->percpu_pvec_drained = false; } +static inline void folio_batch_reinit(struct folio_batch *fbatch) +{ + fbatch->nr = 0; +} + static inline unsigned int folio_batch_count(struct folio_batch *fbatch) { return fbatch->nr; -- cgit v1.2.3 From 950fe885a89770619e315f9b46301eebf0aab7b3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 13 Jan 2023 18:10:26 +0100 Subject: mm: remove __HAVE_ARCH_PTE_SWP_EXCLUSIVE __HAVE_ARCH_PTE_SWP_EXCLUSIVE is now supported by all architectures that support swp PTEs, so let's drop it. Link: https://lkml.kernel.org/r/20230113171026.582290-27-david@redhat.com Signed-off-by: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 29 ----------------------------- 1 file changed, 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 1159b25b0542..5fd45454c073 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1064,35 +1064,6 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #define arch_start_context_switch(prev) do {} while (0) #endif -/* - * When replacing an anonymous page by a real (!non) swap entry, we clear - * PG_anon_exclusive from the page and instead remember whether the flag was - * set in the swp pte. During fork(), we have to mark the entry as !exclusive - * (possibly shared). On swapin, we use that information to restore - * PG_anon_exclusive, which is very helpful in cases where we might have - * additional (e.g., FOLL_GET) references on a page and wouldn't be able to - * detect exclusivity. - * - * These functions don't apply to non-swap entries (e.g., migration, hwpoison, - * ...). - */ -#ifndef __HAVE_ARCH_PTE_SWP_EXCLUSIVE -static inline pte_t pte_swp_mkexclusive(pte_t pte) -{ - return pte; -} - -static inline int pte_swp_exclusive(pte_t pte) -{ - return false; -} - -static inline pte_t pte_swp_clear_exclusive(pte_t pte) -{ - return pte; -} -#endif - #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) -- cgit v1.2.3 From 6189eb82f0aec8a877190bf52e629c687ed02773 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Fri, 13 Jan 2023 15:42:53 +0000 Subject: mm/page_ext: do not allocate space for page_ext->flags if not needed There is 8 byte page_ext->flags field allocated per page whenever CONFIG_PAGE_EXTENSION is enabled. However, not every user of page_ext uses flags. Therefore, check whether flags is needed at least by one user and if so allocate space for it. For example when page_table_check is enabled, on a machine with 128G of memory before the fix: [ 2.244288] allocated 536870912 bytes of page_ext after the fix: [ 2.160154] allocated 268435456 bytes of page_ext Also, add a kernel-doc comment before page_ext_operations that describes the fields, and remove check if need() is set, as that is now a required field. [pasha.tatashin@soleen.com: address comments from Mike Rapoport] Link: https://lkml.kernel.org/r/20230117202103.1412449-1-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20230113154253.92480-1-pasha.tatashin@soleen.com Signed-off-by: Pasha Tatashin Acked-by: David Hildenbrand Acked-by: Vlastimil Babka Acked-by: David Rientjes Reviewed-by: Mike Rapoport (IBM) Cc: Charan Teja Kalla Cc: Li Zhe Cc: Michal Hocko Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/page_ext.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index 22be4582faae..67314f648aeb 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -7,15 +7,33 @@ #include struct pglist_data; + +/** + * struct page_ext_operations - per page_ext client operations + * @offset: Offset to the client's data within page_ext. Offset is returned to + * the client by page_ext_init. + * @size: The size of the client data within page_ext. + * @need: Function that returns true if client requires page_ext. + * @init: (optional) Called to initialize client once page_exts are allocated. + * @need_shared_flags: True when client is using shared page_ext->flags + * field. + * + * Each Page Extension client must define page_ext_operations in + * page_ext_ops array. + */ struct page_ext_operations { size_t offset; size_t size; bool (*need)(void); void (*init)(void); + bool need_shared_flags; }; #ifdef CONFIG_PAGE_EXTENSION +/* + * The page_ext_flags users must set need_shared_flags to true. + */ enum page_ext_flags { PAGE_EXT_OWNER, PAGE_EXT_OWNER_ALLOCATED, -- cgit v1.2.3 From 2973d8229b78d3f148e0c45916a1e8b237dc6167 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 13 Jan 2023 11:12:17 +0000 Subject: mm: discard __GFP_ATOMIC __GFP_ATOMIC serves little purpose. Its main effect is to set ALLOC_HARDER which adds a few little boosts to increase the chance of an allocation succeeding, one of which is to lower the water-mark at which it will succeed. It is *always* paired with __GFP_HIGH which sets ALLOC_HIGH which also adjusts this watermark. It is probable that other users of __GFP_HIGH should benefit from the other little bonuses that __GFP_ATOMIC gets. __GFP_ATOMIC also gives a warning if used with __GFP_DIRECT_RECLAIM. There is little point to this. We already get a might_sleep() warning if __GFP_DIRECT_RECLAIM is set. __GFP_ATOMIC allows the "watermark_boost" to be side-stepped. It is probable that testing ALLOC_HARDER is a better fit here. __GFP_ATOMIC is used by tegra-smmu.c to check if the allocation might sleep. This should test __GFP_DIRECT_RECLAIM instead. This patch: - removes __GFP_ATOMIC - allows __GFP_HIGH allocations to ignore watermark boosting as well as GFP_ATOMIC requests. - makes other adjustments as suggested by the above. The net result is not change to GFP_ATOMIC allocations. Other allocations that use __GFP_HIGH will benefit from a few different extra privileges. This affects: xen, dm, md, ntfs3 the vermillion frame buffer hibernation ksm swap all of which likely produce more benefit than cost if these selected allocation are more likely to succeed quickly. [mgorman: Minor adjustments to rework on top of a series] Link: https://lkml.kernel.org/r/163712397076.13692.4727608274002939094@noble.neil.brown.name Link: https://lkml.kernel.org/r/20230113111217.14134-7-mgorman@techsingularity.net Signed-off-by: NeilBrown Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Matthew Wilcox Cc: Thierry Reding Signed-off-by: Andrew Morton --- include/linux/gfp_types.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp_types.h b/include/linux/gfp_types.h index d88c46ca82e1..5088637fe5c2 100644 --- a/include/linux/gfp_types.h +++ b/include/linux/gfp_types.h @@ -31,7 +31,7 @@ typedef unsigned int __bitwise gfp_t; #define ___GFP_IO 0x40u #define ___GFP_FS 0x80u #define ___GFP_ZERO 0x100u -#define ___GFP_ATOMIC 0x200u +/* 0x200u unused */ #define ___GFP_DIRECT_RECLAIM 0x400u #define ___GFP_KSWAPD_RECLAIM 0x800u #define ___GFP_WRITE 0x1000u @@ -116,11 +116,8 @@ typedef unsigned int __bitwise gfp_t; * * %__GFP_HIGH indicates that the caller is high-priority and that granting * the request is necessary before the system can make forward progress. - * For example, creating an IO context to clean pages. - * - * %__GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is - * high priority. Users are typically interrupt handlers. This may be - * used in conjunction with %__GFP_HIGH + * For example creating an IO context to clean pages and requests + * from atomic context. * * %__GFP_MEMALLOC allows access to all memory. This should only be used when * the caller guarantees the allocation will allow more memory to be freed @@ -135,7 +132,6 @@ typedef unsigned int __bitwise gfp_t; * %__GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves. * This takes precedence over the %__GFP_MEMALLOC flag if both are set. */ -#define __GFP_ATOMIC ((__force gfp_t)___GFP_ATOMIC) #define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) #define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC) #define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) @@ -329,7 +325,7 @@ typedef unsigned int __bitwise gfp_t; * version does not attempt reclaim/compaction at all and is by default used * in page fault path, while the non-light is used by khugepaged. */ -#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) +#define GFP_ATOMIC (__GFP_HIGH|__GFP_KSWAPD_RECLAIM) #define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) #define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT) #define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) -- cgit v1.2.3 From 2cf1338454a8a9a0b3c1271ccb521afa2d6ae241 Mon Sep 17 00:00:00 2001 From: David Stevens Date: Fri, 13 Jan 2023 11:30:11 +0900 Subject: mm: fix khugepaged with shmem_enabled=advise Pass vm_flags as a parameter to shmem_is_huge, rather than reading the flags from the vm_area_struct in question. This allows the updated flags from hugepage_madvise to be passed to the check, which is necessary because madvise does not update the vm_area_struct's flags until after hugepage_madvise returns. This fixes an issue when shmem_enabled=madvise, where MADV_HUGEPAGE on shmem was not able to register the mm_struct with khugepaged. Prior to cd89fb065099, the mm_struct was registered by MADV_HUGEPAGE regardless of the value of shmem_enabled (which was only checked when scanning vmas). Link: https://lkml.kernel.org/r/20230113023011.1784015-1-stevensd@google.com Fixes: cd89fb065099 ("mm,thp,shmem: make khugepaged obey tmpfs mount flags") Signed-off-by: David Stevens Cc: David Stevens Cc: Hugh Dickins Cc: Rik van Riel Signed-off-by: Andrew Morton --- include/linux/shmem_fs.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index d500ea967dc7..d09d54be4ffd 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -92,14 +92,8 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); int shmem_unuse(unsigned int type); -extern bool shmem_is_huge(struct vm_area_struct *vma, struct inode *inode, - pgoff_t index, bool shmem_huge_force); -static inline bool shmem_huge_enabled(struct vm_area_struct *vma, - bool shmem_huge_force) -{ - return shmem_is_huge(vma, file_inode(vma->vm_file), vma->vm_pgoff, - shmem_huge_force); -} +extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, + struct mm_struct *mm, unsigned long vm_flags); extern unsigned long shmem_swap_usage(struct vm_area_struct *vma); extern unsigned long shmem_partial_swap_usage(struct address_space *mapping, pgoff_t start, pgoff_t end); -- cgit v1.2.3 From ee7a5906ff08e435ed95ec9fe7c7eed2c11015d2 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Wed, 4 Jan 2023 13:14:26 -0800 Subject: pagemap: add filemap_grab_folio() Patch series "Convert to filemap_get_folios_tag()", v5. This patch series replaces find_get_pages_range_tag() with filemap_get_folios_tag(). This also allows the removal of multiple calls to compound_head() throughout. It also makes a good chunk of the straightforward conversions to folios, and takes the opportunity to introduce a function that grabs a folio from the pagecache. This patch (of 23): Add function filemap_grab_folio() to grab a folio from the page cache. This function is meant to serve as a folio replacement for grab_cache_page, and is used to facilitate the removal of find_get_pages_range_tag(). Link: https://lkml.kernel.org/r/20230104211448.4804-1-vishal.moola@gmail.com Link: https://lkml.kernel.org/r/20230104211448.4804-2-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 29e1f9e76eb6..468183be67be 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -546,6 +546,26 @@ static inline struct folio *filemap_lock_folio(struct address_space *mapping, return __filemap_get_folio(mapping, index, FGP_LOCK, 0); } +/** + * filemap_grab_folio - grab a folio from the page cache + * @mapping: The address space to search + * @index: The page index + * + * Looks up the page cache entry at @mapping & @index. If no folio is found, + * a new folio is created. The folio is locked, marked as accessed, and + * returned. + * + * Return: A found or created folio. NULL if no folio is found and failed to + * create a folio. + */ +static inline struct folio *filemap_grab_folio(struct address_space *mapping, + pgoff_t index) +{ + return __filemap_get_folio(mapping, index, + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, + mapping_gfp_mask(mapping)); +} + /** * find_get_page - find and get a page reference * @mapping: the address_space to search -- cgit v1.2.3 From 247f9e1feef4e57911510c8f82348efb4491ea0e Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Wed, 4 Jan 2023 13:14:27 -0800 Subject: filemap: add filemap_get_folios_tag() This is the equivalent of find_get_pages_range_tag(), except for folios instead of pages. One noteable difference is filemap_get_folios_tag() does not take in a maximum pages argument. It instead tries to fill a folio batch and stops either once full (15 folios) or reaching the end of the search range. The new function supports large folios, the initial function did not since all callers don't use large folios. Link: https://lkml.kernel.org/r/20230104211448.4804-3-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Reviewed-by: Matthew Wilcow (Oracle) Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 468183be67be..bb3c1d51b1cb 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -739,6 +739,8 @@ unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch); unsigned filemap_get_folios_contig(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch); +unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start, + pgoff_t end, xa_mark_t tag, struct folio_batch *fbatch); unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, pgoff_t end, xa_mark_t tag, unsigned int nr_pages, struct page **pages); -- cgit v1.2.3 From c5792d9384113de4085dfbce6940e2a853debb67 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Wed, 4 Jan 2023 13:14:48 -0800 Subject: filemap: remove find_get_pages_range_tag() All callers to find_get_pages_range_tag(), find_get_pages_tag(), pagevec_lookup_range_tag(), and pagevec_lookup_tag() have been removed. Link: https://lkml.kernel.org/r/20230104211448.4804-24-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 10 ---------- include/linux/pagevec.h | 8 -------- 2 files changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index bb3c1d51b1cb..9f1081683771 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -741,16 +741,6 @@ unsigned filemap_get_folios_contig(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch); unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start, pgoff_t end, xa_mark_t tag, struct folio_batch *fbatch); -unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, - pgoff_t end, xa_mark_t tag, unsigned int nr_pages, - struct page **pages); -static inline unsigned find_get_pages_tag(struct address_space *mapping, - pgoff_t *index, xa_mark_t tag, unsigned int nr_pages, - struct page **pages) -{ - return find_get_pages_range_tag(mapping, index, (pgoff_t)-1, tag, - nr_pages, pages); -} struct page *grab_cache_page_write_begin(struct address_space *mapping, pgoff_t index); diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 2a6f61a0c10a..f582f7213ea5 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -26,14 +26,6 @@ struct pagevec { }; void __pagevec_release(struct pagevec *pvec); -unsigned pagevec_lookup_range_tag(struct pagevec *pvec, - struct address_space *mapping, pgoff_t *index, pgoff_t end, - xa_mark_t tag); -static inline unsigned pagevec_lookup_tag(struct pagevec *pvec, - struct address_space *mapping, pgoff_t *index, xa_mark_t tag) -{ - return pagevec_lookup_range_tag(pvec, mapping, index, (pgoff_t)-1, tag); -} static inline void pagevec_init(struct pagevec *pvec) { -- cgit v1.2.3 From 6bc56a4d855303705802c5ede4625973637484c7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 16 Jan 2023 19:18:09 +0000 Subject: mm: add vma_alloc_zeroed_movable_folio() Replace alloc_zeroed_user_highpage_movable(). The main difference is returning a folio containing a single page instead of returning the page, but take the opportunity to rename the function to match other allocation functions a little better and rewrite the documentation to place more emphasis on the zeroing rather than the highmem aspect. Link: https://lkml.kernel.org/r/20230116191813.2145215-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Zi Yan Signed-off-by: Andrew Morton --- include/linux/highmem.h | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index d7097b8158f2..e22509420ac6 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -207,31 +207,30 @@ static inline void clear_user_highpage(struct page *page, unsigned long vaddr) } #endif -#ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE +#ifndef vma_alloc_zeroed_movable_folio /** - * alloc_zeroed_user_highpage_movable - Allocate a zeroed HIGHMEM page for a VMA that the caller knows can move - * @vma: The VMA the page is to be allocated for - * @vaddr: The virtual address the page will be inserted into + * vma_alloc_zeroed_movable_folio - Allocate a zeroed page for a VMA. + * @vma: The VMA the page is to be allocated for. + * @vaddr: The virtual address the page will be inserted into. * - * Returns: The allocated and zeroed HIGHMEM page + * This function will allocate a page suitable for inserting into this + * VMA at this virtual address. It may be allocated from highmem or + * the movable zone. An architecture may provide its own implementation. * - * This function will allocate a page for a VMA that the caller knows will - * be able to migrate in the future using move_pages() or reclaimed - * - * An architecture may override this function by defining - * __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE and providing their own - * implementation. + * Return: A folio containing one allocated and zeroed page or NULL if + * we are out of memory. */ -static inline struct page * -alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, +static inline +struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma, unsigned long vaddr) { - struct page *page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); + struct folio *folio; - if (page) - clear_user_highpage(page, vaddr); + folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vaddr, false); + if (folio) + clear_user_highpage(&folio->page, vaddr); - return page; + return folio; } #endif -- cgit v1.2.3 From 9cfb816b1c6c99f4b3c1d4a0fb096162cd17ec71 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 16 Jan 2023 19:25:06 +0000 Subject: mm/fs: convert inode_attach_wb() to take a folio Patch series "Writeback folio conversions". Remove more calls to compound_head() by passing folios around instead of pages. This patch (of 2): The only caller of inode_attach_wb() which doesn't pass NULL already has a folio, so convert the whole call-chain to take folios. Link: https://lkml.kernel.org/r/20230116192507.2146150-1-willy@infradead.org Link: https://lkml.kernel.org/r/20230116192507.2146150-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/writeback.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 2554b71765e9..3f1491b07474 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -207,7 +207,7 @@ static inline void wait_on_inode(struct inode *inode) #include #include -void __inode_attach_wb(struct inode *inode, struct page *page); +void __inode_attach_wb(struct inode *inode, struct folio *folio); void wbc_attach_and_unlock_inode(struct writeback_control *wbc, struct inode *inode) __releases(&inode->i_lock); @@ -222,16 +222,16 @@ bool cleanup_offline_cgwb(struct bdi_writeback *wb); /** * inode_attach_wb - associate an inode with its wb * @inode: inode of interest - * @page: page being dirtied (may be NULL) + * @folio: folio being dirtied (may be NULL) * * If @inode doesn't have its wb, associate it with the wb matching the - * memcg of @page or, if @page is NULL, %current. May be called w/ or w/o + * memcg of @folio or, if @folio is NULL, %current. May be called w/ or w/o * @inode->i_lock. */ -static inline void inode_attach_wb(struct inode *inode, struct page *page) +static inline void inode_attach_wb(struct inode *inode, struct folio *folio) { if (!inode->i_wb) - __inode_attach_wb(inode, page); + __inode_attach_wb(inode, folio); } /** @@ -290,7 +290,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) #else /* CONFIG_CGROUP_WRITEBACK */ -static inline void inode_attach_wb(struct inode *inode, struct page *page) +static inline void inode_attach_wb(struct inode *inode, struct folio *folio) { } -- cgit v1.2.3 From 75376c6fb93b99e94192cfff48222d11819ee917 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 16 Jan 2023 19:25:07 +0000 Subject: mm: convert mem_cgroup_css_from_page() to mem_cgroup_css_from_folio() Only one caller doesn't have a folio, so move the page_folio() call to that one caller from mem_cgroup_css_from_folio(). Link: https://lkml.kernel.org/r/20230116192507.2146150-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e605fc885f08..35478695cabf 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -890,7 +890,7 @@ static inline bool mm_match_cgroup(struct mm_struct *mm, return match; } -struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page); +struct cgroup_subsys_state *mem_cgroup_css_from_folio(struct folio *folio); ino_t page_cgroup_ino(struct page *page); static inline bool mem_cgroup_online(struct mem_cgroup *memcg) -- cgit v1.2.3 From 7ec7096b8577d3b899c1dae456a414f2d08c7ddb Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Tue, 17 Jan 2023 20:46:17 +0000 Subject: mm/page_ext: init page_ext early if there are no deferred struct pages page_ext must be initialized after all struct pages are initialized. Therefore, page_ext is initialized after page_alloc_init_late(), and can optionally be initialized earlier via early_page_ext kernel parameter which as a side effect also disables deferred struct pages. Allow to automatically init page_ext early when there are no deferred struct pages in order to be able to use page_ext during kernel boot and track for example page allocations early. [pasha.tatashin@soleen.com: fix build with CONFIG_PAGE_EXTENSION=n] Link: https://lkml.kernel.org/r/20230118155251.2522985-1-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20230117204617.1553748-1-pasha.tatashin@soleen.com Signed-off-by: Pasha Tatashin Acked-by: Mike Rapoport (IBM) Acked-by: Vlastimil Babka Cc: Charan Teja Kalla Cc: David Hildenbrand Cc: Li Zhe Cc: Michal Hocko Signed-off-by: Andrew Morton --- include/linux/page_ext.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index 67314f648aeb..bc2e39090a1f 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -29,6 +29,8 @@ struct page_ext_operations { bool need_shared_flags; }; +extern bool deferred_struct_pages; + #ifdef CONFIG_PAGE_EXTENSION /* -- cgit v1.2.3 From 04bac040bc71b4b37550eed5854f34ca161756f9 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 18 Jan 2023 09:40:39 -0800 Subject: mm/hugetlb: convert get_hwpoison_huge_page() to folios Straightforward conversion of get_hwpoison_huge_page() to get_hwpoison_hugetlb_folio(). Reduces two references to a head page in memory-failure.c [arnd@arndb.de: fix get_hwpoison_hugetlb_folio() stub] Link: https://lkml.kernel.org/r/20230119111920.635260-1-arnd@kernel.org Link: https://lkml.kernel.org/r/20230118174039.14247-1-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Signed-off-by: Arnd Bergmann Acked-by: Naoya Horiguchi Reviewed-by: Matthew Wilcox (Oracle) Cc: Mike Kravetz Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index cf60fe741c1d..a51e6daacac6 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -172,7 +172,7 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to, long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); int isolate_hugetlb(struct page *page, struct list_head *list); -int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison); +int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison); int get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared); void putback_active_hugepage(struct page *page); @@ -418,7 +418,7 @@ static inline int isolate_hugetlb(struct page *page, struct list_head *list) return -EBUSY; } -static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison) +static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison) { return 0; } -- cgit v1.2.3 From 5649d113ffce9f532a9ecc5ab96a93e02efbf283 Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Wed, 18 Jan 2023 20:13:03 +0800 Subject: swap_state: update shadow_nodes for anonymous page Shadow_nodes is for shadow nodes reclaiming of workingset handling, it is updated when page cache add or delete since long time ago workingset only supported page cache. But when workingset supports anonymous page detection, we missied updating shadow nodes for it. This caused that shadow nodes of anonymous page will never be reclaimd by scan_shadow_nodes() even they use much memory and system memory is tense. So update shadow_nodes of anonymous page when swap cache is add or delete by calling xas_set_update(..workingset_update_node). Link: https://lkml.kernel.org/r/202301182013032211005@zte.com.cn Fixes: aae466b0052e ("mm/swap: implement workingset detection for anonymous LRU") Signed-off-by: Yang Yang Reviewed-by: Ran Xiaokai Cc: Bagas Sanjaya Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- include/linux/xarray.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 44dd6d6e01bc..741703b45f61 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1643,7 +1643,8 @@ static inline void xas_set_order(struct xa_state *xas, unsigned long index, * @update: Function to call when updating a node. * * The XArray can notify a caller after it has updated an xa_node. - * This is advanced functionality and is only needed by the page cache. + * This is advanced functionality and is only needed by the page + * cache and swap cache. */ static inline void xas_set_update(struct xa_state *xas, xa_update_node_t update) { -- cgit v1.2.3 From b507808ebce23561d4ff8c2aa1fb949fe402bc61 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 19 Jan 2023 16:03:43 +0000 Subject: mm: implement memory-deny-write-execute as a prctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "mm: In-kernel support for memory-deny-write-execute (MDWE)", v2. The background to this is that systemd has a configuration option called MemoryDenyWriteExecute [2], implemented as a SECCOMP BPF filter. Its aim is to prevent a user task from inadvertently creating an executable mapping that is (or was) writeable. Since such BPF filter is stateless, it cannot detect mappings that were previously writeable but subsequently changed to read-only. Therefore the filter simply rejects any mprotect(PROT_EXEC). The side-effect is that on arm64 with BTI support (Branch Target Identification), the dynamic loader cannot change an ELF section from PROT_EXEC to PROT_EXEC|PROT_BTI using mprotect(). For libraries, it can resort to unmapping and re-mapping but for the main executable it does not have a file descriptor. The original bug report in the Red Hat bugzilla - [3] - and subsequent glibc workaround for libraries - [4]. This series adds in-kernel support for this feature as a prctl PR_SET_MDWE, that is inherited on fork(). The prctl denies PROT_WRITE | PROT_EXEC mappings. Like the systemd BPF filter it also denies adding PROT_EXEC to mappings. However unlike the BPF filter it only denies it if the mapping didn't previous have PROT_EXEC. This allows to PROT_EXEC -> PROT_EXEC | PROT_BTI with mprotect(), which is a problem with the BPF filter. This patch (of 2): The aim of such policy is to prevent a user task from creating an executable mapping that is also writeable. An example of mmap() returning -EACCESS if the policy is enabled: mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, flags, 0, 0); Similarly, mprotect() would return -EACCESS below: addr = mmap(0, size, PROT_READ | PROT_EXEC, flags, 0, 0); mprotect(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC); The BPF filter that systemd MDWE uses is stateless, and disallows mprotect() with PROT_EXEC completely. This new prctl allows PROT_EXEC to be enabled if it was already PROT_EXEC, which allows the following case: addr = mmap(0, size, PROT_READ | PROT_EXEC, flags, 0, 0); mprotect(addr, size, PROT_READ | PROT_EXEC | PROT_BTI); where PROT_BTI enables branch tracking identification on arm64. Link: https://lkml.kernel.org/r/20230119160344.54358-1-joey.gouly@arm.com Link: https://lkml.kernel.org/r/20230119160344.54358-2-joey.gouly@arm.com Signed-off-by: Joey Gouly Co-developed-by: Catalin Marinas Signed-off-by: Catalin Marinas Cc: Alexander Viro Cc: Jeremy Linton Cc: Kees Cook Cc: Lennart Poettering Cc: Mark Brown Cc: nd Cc: Shuah Khan Cc: Szabolcs Nagy Cc: Topi Miettinen Cc: Zbigniew Jędrzejewski-Szmek Cc: David Hildenbrand Signed-off-by: Andrew Morton --- include/linux/mman.h | 34 ++++++++++++++++++++++++++++++++++ include/linux/sched/coredump.h | 6 +++++- 2 files changed, 39 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mman.h b/include/linux/mman.h index 58b3abd457a3..cee1e4b566d8 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -156,4 +156,38 @@ calc_vm_flag_bits(unsigned long flags) } unsigned long vm_commit_limit(void); + +/* + * Denies creating a writable executable mapping or gaining executable permissions. + * + * This denies the following: + * + * a) mmap(PROT_WRITE | PROT_EXEC) + * + * b) mmap(PROT_WRITE) + * mprotect(PROT_EXEC) + * + * c) mmap(PROT_WRITE) + * mprotect(PROT_READ) + * mprotect(PROT_EXEC) + * + * But allows the following: + * + * d) mmap(PROT_READ | PROT_EXEC) + * mmap(PROT_READ | PROT_EXEC | PROT_BTI) + */ +static inline bool map_deny_write_exec(struct vm_area_struct *vma, unsigned long vm_flags) +{ + if (!test_bit(MMF_HAS_MDWE, ¤t->mm->flags)) + return false; + + if ((vm_flags & VM_EXEC) && (vm_flags & VM_WRITE)) + return true; + + if (!(vma->vm_flags & VM_EXEC) && (vm_flags & VM_EXEC)) + return true; + + return false; +} + #endif /* _LINUX_MMAN_H */ diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 8270ad7ae14c..0e17ae7fbfd3 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -81,9 +81,13 @@ static inline int get_dumpable(struct mm_struct *mm) * lifecycle of this mm, just for simplicity. */ #define MMF_HAS_PINNED 27 /* FOLL_PIN has run, never cleared */ + +#define MMF_HAS_MDWE 28 +#define MMF_HAS_MDWE_MASK (1 << MMF_HAS_MDWE) + #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ - MMF_DISABLE_THP_MASK) + MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK) #endif /* _LINUX_SCHED_COREDUMP_H */ -- cgit v1.2.3 From 6b3f013bb90e737b06c7955571407190b4c760ce Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Thu, 19 Jan 2023 01:38:29 +0000 Subject: mm/damon: update comments in damon.h for damon_attrs Patch series "mm/damon: misc fixes". This patchset contains three miscellaneous simple fixes for DAMON online tuning. This patch (of 3): Commit cbeaa77b0449 ("mm/damon/core: use a dedicated struct for monitoring attributes") moved monitoring intervals from damon_ctx to a new struct, damon_attrs, but a comment in the header file has not updated for the change. Update it. Link: https://lkml.kernel.org/r/20230119013831.1911-1-sj@kernel.org Link: https://lkml.kernel.org/r/20230119013831.1911-2-sj@kernel.org Fixes: cbeaa77b0449 ("mm/damon/core: use a dedicated struct for monitoring attributes") Signed-off-by: SeongJae Park Cc: Brendan Higgins Signed-off-by: Andrew Morton --- include/linux/damon.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/damon.h b/include/linux/damon.h index dfb245bb3053..d5d4d19928e0 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -354,10 +354,10 @@ struct damon_ctx; * users should register the low level operations for their target address * space and usecase via the &damon_ctx.ops. Then, the monitoring thread * (&damon_ctx.kdamond) calls @init and @prepare_access_checks before starting - * the monitoring, @update after each &damon_ctx.ops_update_interval, and + * the monitoring, @update after each &damon_attrs.ops_update_interval, and * @check_accesses, @target_valid and @prepare_access_checks after each - * &damon_ctx.sample_interval. Finally, @reset_aggregated is called after each - * &damon_ctx.aggr_interval. + * &damon_attrs.sample_interval. Finally, @reset_aggregated is called after + * each &damon_attrs.aggr_interval. * * Each &struct damon_operations instance having valid @id can be registered * via damon_register_ops() and selected by damon_select_ops() later. -- cgit v1.2.3 From 36c7b4db7c942ae9e1b111f0c6b468c8b2e33842 Mon Sep 17 00:00:00 2001 From: "T.J. Alumbaugh" Date: Wed, 18 Jan 2023 00:18:24 +0000 Subject: mm: multi-gen LRU: section for memcg LRU Move memcg LRU code into a dedicated section. Improve the design doc to outline its architecture. Link: https://lkml.kernel.org/r/20230118001827.1040870-5-talumbau@google.com Signed-off-by: T.J. Alumbaugh Cc: Yu Zhao Signed-off-by: Andrew Morton --- include/linux/mm_inline.h | 17 ----------------- include/linux/mmzone.h | 13 ++----------- 2 files changed, 2 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 26dcbda07e92..de1e622dd366 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -122,18 +122,6 @@ static inline bool lru_gen_in_fault(void) return current->in_lru_fault; } -#ifdef CONFIG_MEMCG -static inline int lru_gen_memcg_seg(struct lruvec *lruvec) -{ - return READ_ONCE(lruvec->lrugen.seg); -} -#else -static inline int lru_gen_memcg_seg(struct lruvec *lruvec) -{ - return 0; -} -#endif - static inline int lru_gen_from_seq(unsigned long seq) { return seq % MAX_NR_GENS; @@ -309,11 +297,6 @@ static inline bool lru_gen_in_fault(void) return false; } -static inline int lru_gen_memcg_seg(struct lruvec *lruvec) -{ - return 0; -} - static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming) { return false; diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 815c7c2edf45..977be526c939 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -368,15 +368,6 @@ struct page_vma_mapped_walk; #define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF) #define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF) -/* see the comment on MEMCG_NR_GENS */ -enum { - MEMCG_LRU_NOP, - MEMCG_LRU_HEAD, - MEMCG_LRU_TAIL, - MEMCG_LRU_OLD, - MEMCG_LRU_YOUNG, -}; - #ifdef CONFIG_LRU_GEN enum { @@ -557,7 +548,7 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg); void lru_gen_online_memcg(struct mem_cgroup *memcg); void lru_gen_offline_memcg(struct mem_cgroup *memcg); void lru_gen_release_memcg(struct mem_cgroup *memcg); -void lru_gen_rotate_memcg(struct lruvec *lruvec, int op); +void lru_gen_soft_reclaim(struct lruvec *lruvec); #else /* !CONFIG_MEMCG */ @@ -608,7 +599,7 @@ static inline void lru_gen_release_memcg(struct mem_cgroup *memcg) { } -static inline void lru_gen_rotate_memcg(struct lruvec *lruvec, int op) +static inline void lru_gen_soft_reclaim(struct lruvec *lruvec) { } -- cgit v1.2.3 From 44b8f8bf2438bfee3aceae4d647a7460213ff340 Mon Sep 17 00:00:00 2001 From: Jiaqi Yan Date: Fri, 20 Jan 2023 03:46:20 +0000 Subject: mm: memory-failure: add memory failure stats to sysfs Patch series "Introduce per NUMA node memory error statistics", v2. Background ========== In the RFC for Kernel Support of Memory Error Detection [1], one advantage of software-based scanning over hardware patrol scrubber is the ability to make statistics visible to system administrators. The statistics include 2 categories: * Memory error statistics, for example, how many memory error are encountered, how many of them are recovered by the kernel. Note these memory errors are non-fatal to kernel: during the machine check exception (MCE) handling kernel already classified MCE's severity to be unnecessary to panic (but either action required or optional). * Scanner statistics, for example how many times the scanner have fully scanned a NUMA node, how many errors are first detected by the scanner. The memory error statistics are useful to userspace and actually not specific to scanner detected memory errors, and are the focus of this patchset. Motivation ========== Memory error stats are important to userspace but insufficient in kernel today. Datacenter administrators can better monitor a machine's memory health with the visible stats. For example, while memory errors are inevitable on servers with 10+ TB memory, starting server maintenance when there are only 1~2 recovered memory errors could be overreacting; in cloud production environment maintenance usually means live migrate all the workload running on the server and this usually causes nontrivial disruption to the customer. Providing insight into the scope of memory errors on a system helps to determine the appropriate follow-up action. In addition, the kernel's existing memory error stats need to be standardized so that userspace can reliably count on their usefulness. Today kernel provides following memory error info to userspace, but they are not sufficient or have disadvantages: * HardwareCorrupted in /proc/meminfo: number of bytes poisoned in total, not per NUMA node stats though * ras:memory_failure_event: only available after explicitly enabled * /dev/mcelog provides many useful info about the MCEs, but doesn't capture how memory_failure recovered memory MCEs * kernel logs: userspace needs to process log text Exposing memory error stats is also a good start for the in-kernel memory error detector. Today the data source of memory error stats are either direct memory error consumption, or hardware patrol scrubber detection (either signaled as UCNA or SRAO). Once in-kernel memory scanner is implemented, it will be the main source as it is usually configured to scan memory DIMMs constantly and faster than hardware patrol scrubber. How Implemented =============== As Naoya pointed out [2], exposing memory error statistics to userspace is useful independent of software or hardware scanner. Therefore we implement the memory error statistics independent of the in-kernel memory error detector. It exposes the following per NUMA node memory error counters: /sys/devices/system/node/node${X}/memory_failure/total /sys/devices/system/node/node${X}/memory_failure/recovered /sys/devices/system/node/node${X}/memory_failure/ignored /sys/devices/system/node/node${X}/memory_failure/failed /sys/devices/system/node/node${X}/memory_failure/delayed These counters describe how many raw pages are poisoned and after the attempted recoveries by the kernel, their resolutions: how many are recovered, ignored, failed, or delayed respectively. This approach can be easier to extend for future use cases than /proc/meminfo, trace event, and log. The following math holds for the statistics: * total = recovered + ignored + failed + delayed These memory error stats are reset during machine boot. The 1st commit introduces these sysfs entries. The 2nd commit populates memory error stats every time memory_failure attempts memory error recovery. The 3rd commit adds documentations for introduced stats. [1] https://lore.kernel.org/linux-mm/7E670362-C29E-4626-B546-26530D54F937@gmail.com/T/#mc22959244f5388891c523882e61163c6e4d703af [2] https://lore.kernel.org/linux-mm/7E670362-C29E-4626-B546-26530D54F937@gmail.com/T/#m52d8d7a333d8536bd7ce74253298858b1c0c0ac6 This patch (of 3): Today kernel provides following memory error info to userspace, but each has its own disadvantage * HardwareCorrupted in /proc/meminfo: number of bytes poisoned in total, not per NUMA node stats though * ras:memory_failure_event: only available after explicitly enabled * /dev/mcelog provides many useful info about the MCEs, but doesn't capture how memory_failure recovered memory MCEs * kernel logs: userspace needs to process log text Exposes per NUMA node memory error stats as sysfs entries: /sys/devices/system/node/node${X}/memory_failure/total /sys/devices/system/node/node${X}/memory_failure/recovered /sys/devices/system/node/node${X}/memory_failure/ignored /sys/devices/system/node/node${X}/memory_failure/failed /sys/devices/system/node/node${X}/memory_failure/delayed These counters describe how many raw pages are poisoned and after the attempted recoveries by the kernel, their resolutions: how many are recovered, ignored, failed, or delayed respectively. The following math holds for the statistics: * total = recovered + ignored + failed + delayed Link: https://lkml.kernel.org/r/20230120034622.2698268-1-jiaqiyan@google.com Link: https://lkml.kernel.org/r/20230120034622.2698268-2-jiaqiyan@google.com Signed-off-by: Jiaqi Yan Acked-by: David Rientjes Acked-by: Naoya Horiguchi Cc: Kefeng Wang Cc: Tony Luck Cc: Yang Shi Signed-off-by: Andrew Morton --- include/linux/mm.h | 5 +++++ include/linux/mmzone.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 836b96e08a14..c9db257f09b3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3455,6 +3455,11 @@ enum mf_action_page_type { MF_MSG_UNKNOWN, }; +/* + * Sysfs entries for memory failure handling statistics. + */ +extern const struct attribute_group memory_failure_attr_group; + #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS) extern void clear_huge_page(struct page *page, unsigned long addr_hint, diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 977be526c939..9fb1b03b83b2 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1212,6 +1212,31 @@ struct deferred_split { }; #endif +#ifdef CONFIG_MEMORY_FAILURE +/* + * Per NUMA node memory failure handling statistics. + */ +struct memory_failure_stats { + /* + * Number of raw pages poisoned. + * Cases not accounted: memory outside kernel control, offline page, + * arch-specific memory_failure (SGX), hwpoison_filter() filtered + * error events, and unpoison actions from hwpoison_unpoison. + */ + unsigned long total; + /* + * Recovery results of poisoned raw pages handled by memory_failure, + * in sync with mf_result. + * total = ignored + failed + delayed + recovered. + * total * PAGE_SIZE * #nodes = /proc/meminfo/HardwareCorrupted. + */ + unsigned long ignored; + unsigned long failed; + unsigned long delayed; + unsigned long recovered; +}; +#endif + /* * On NUMA machines, each NUMA node would have a pg_data_t to describe * it's memory layout. On UMA machines there is a single pglist_data which @@ -1357,6 +1382,9 @@ typedef struct pglist_data { #ifdef CONFIG_NUMA struct memory_tier __rcu *memtier; #endif +#ifdef CONFIG_MEMORY_FAILURE + struct memory_failure_stats mf_stats; +#endif } pg_data_t; #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) -- cgit v1.2.3 From 3222d8c2a7f888bf38b845b125e9470b12108a4d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 25 Jan 2023 14:34:36 +0100 Subject: block: remove ->rw_page The ->rw_page method is a special purpose bypass of the usual bio handling path that is limited to single-page reads and writes and synchronous which causes a lot of extra code in the drivers, callers and the block layer. The only remaining user is the MM swap code. Switch that swap code to simply submit a single-vec on-stack bio an synchronously wait on it based on a newly added QUEUE_FLAG_SYNCHRONOUS flag set by the drivers that currently implement ->rw_page instead. While this touches one extra cache line and executes extra code, it simplifies the block layer and drivers and ensures that all feastures are properly supported by all drivers, e.g. right now ->rw_page bypassed cgroup writeback entirely. [akpm@linux-foundation.org: fix comment typo, per Dan] Link: https://lkml.kernel.org/r/20230125133436.447864-8-hch@lst.de Signed-off-by: Christoph Hellwig Reviewed-by: Dan Williams Cc: Dave Jiang Cc: Ira Weiny Cc: Jens Axboe Cc: Keith Busch Cc: Minchan Kim Cc: Sergey Senozhatsky Cc: Vishal Verma Signed-off-by: Andrew Morton --- include/linux/blkdev.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 43d4e073b111..c5e59965b145 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -554,6 +554,7 @@ struct request_queue { #define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */ #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ +#define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ @@ -1250,6 +1251,12 @@ static inline bool bdev_nonrot(struct block_device *bdev) return blk_queue_nonrot(bdev_get_queue(bdev)); } +static inline bool bdev_synchronous(struct block_device *bdev) +{ + return test_bit(QUEUE_FLAG_SYNCHRONOUS, + &bdev_get_queue(bdev)->queue_flags); +} + static inline bool bdev_stable_writes(struct block_device *bdev) { return test_bit(QUEUE_FLAG_STABLE_WRITES, @@ -1382,7 +1389,6 @@ struct block_device_operations { unsigned int flags); int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); - int (*rw_page)(struct block_device *, sector_t, struct page *, enum req_op); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); unsigned int (*check_events) (struct gendisk *disk, @@ -1417,10 +1423,6 @@ extern int blkdev_compat_ptr_ioctl(struct block_device *, fmode_t, #define blkdev_compat_ptr_ioctl NULL #endif -extern int bdev_read_page(struct block_device *, sector_t, struct page *); -extern int bdev_write_page(struct block_device *, sector_t, struct page *, - struct writeback_control *); - static inline void blk_wake_io_task(struct task_struct *waiter) { /* -- cgit v1.2.3 From 00cdf76012ab78b225345e8cf77d5391b4680b45 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 26 Jan 2023 20:15:52 +0000 Subject: mm: add memcpy_from_file_folio() This is the equivalent of memcpy_from_page(). It differs in that it takes the position in a file instead of offset in a folio, it accepts the total number of bytes to be copied (instead of the number of bytes to be copied from this folio) and it returns how many bytes were copied from the folio, rather than making the caller calculate that and then checking if the caller got it right. [akpm@linux-foundation.org: fix typo in comment] Link: https://lkml.kernel.org/r/20230126201552.1681588-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: "Fabio M. De Francesco" Cc: Ira Weiny Signed-off-by: Andrew Morton --- include/linux/highmem.h | 29 +++++++++++++++++++++++++++++ include/linux/page-flags.h | 1 + 2 files changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index e22509420ac6..348701dae77f 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -413,6 +413,35 @@ static inline void memzero_page(struct page *page, size_t offset, size_t len) kunmap_local(addr); } +/** + * memcpy_from_file_folio - Copy some bytes from a file folio. + * @to: The destination buffer. + * @folio: The folio to copy from. + * @pos: The position in the file. + * @len: The maximum number of bytes to copy. + * + * Copy up to @len bytes from this folio. This may be limited by PAGE_SIZE + * if the folio comes from HIGHMEM, and by the size of the folio. + * + * Return: The number of bytes copied from the folio. + */ +static inline size_t memcpy_from_file_folio(char *to, struct folio *folio, + loff_t pos, size_t len) +{ + size_t offset = offset_in_folio(folio, pos); + char *from = kmap_local_folio(folio, offset); + + if (folio_test_highmem(folio)) + len = min_t(size_t, len, PAGE_SIZE - offset); + else + len = min(len, folio_size(folio) - offset); + + memcpy(to, from, len); + kunmap_local(from); + + return len; +} + /** * folio_zero_segments() - Zero two byte ranges in a folio. * @folio: The folio to write to. diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 69e93a0c1277..a7e3a3405520 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -531,6 +531,7 @@ PAGEFLAG(Readahead, readahead, PF_NO_COMPOUND) * available at this point. */ #define PageHighMem(__p) is_highmem_idx(page_zonenum(__p)) +#define folio_test_highmem(__f) is_highmem_idx(folio_zonenum(__f)) #else PAGEFLAG_FALSE(HighMem, highmem) #endif -- cgit v1.2.3 From d585bdbeb79aa13b8a9bbe952d90f5252f7fe909 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 26 Jan 2023 20:12:54 +0000 Subject: fs: convert writepage_t callback to pass a folio Patch series "Convert writepage_t to use a folio". More folioisation. I split out the mpage work from everything else because it completely dominated the patch, but some implementations I just converted outright. This patch (of 2): We always write back an entire folio, but that's currently passed as the head page. Convert all filesystems that use write_cache_pages() to expect a folio instead of a page. Link: https://lkml.kernel.org/r/20230126201255.1681189-1-willy@infradead.org Link: https://lkml.kernel.org/r/20230126201255.1681189-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Christoph Hellwig Signed-off-by: Andrew Morton --- include/linux/writeback.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 3f1491b07474..46020373e155 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -366,7 +366,7 @@ int balance_dirty_pages_ratelimited_flags(struct address_space *mapping, bool wb_over_bg_thresh(struct bdi_writeback *wb); -typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, +typedef int (*writepage_t)(struct folio *folio, struct writeback_control *wbc, void *data); void tag_pages_for_writeback(struct address_space *mapping, -- cgit v1.2.3 From 6338bb05c15f88e2f4beae296ef389224837758c Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Mon, 12 Dec 2022 11:46:44 +0900 Subject: error-injection: remove EI_ETYPE_NONE Patch series "error-injection: Clarify the requirements of error injectable functions". Patches for clarifying the requirement of error injectable functions and to remove the confusing EI_ETYPE_NONE. This patch (of 2): Since the EI_ETYPE_NONE is confusing type, replace it with appropriate errno. The EI_ETYPE_NONE has been introduced for a dummy (error) value, but it can mislead people that they can use ALLOW_ERROR_INJECTION(func, NONE). So remove it from the EI_ETYPE and use appropriate errno instead. [akpm@linux-foundation.org: include/linux/error-injection.h needs errno.h] Link: https://lkml.kernel.org/r/167081319306.387937.10079195394503045678.stgit@devnote3 Link: https://lkml.kernel.org/r/167081320421.387937.4259807348852421112.stgit@devnote3 Fixes: 663faf9f7bee ("error-injection: Add injectable error types") Signed-off-by: Masami Hiramatsu (Google) Cc: Alexei Starovoitov Cc: Borislav Petkov (AMD) Cc: Chris Mason Cc: Christoph Hellwig Cc: Florent Revest Cc: Greg Kroah-Hartman Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Kees Cook Cc: KP Singh Cc: Mark Rutland Cc: Peter Zijlstra Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- include/linux/error-injection.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/error-injection.h b/include/linux/error-injection.h index 635a95caf29f..20e738f4eae8 100644 --- a/include/linux/error-injection.h +++ b/include/linux/error-injection.h @@ -3,6 +3,7 @@ #define _LINUX_ERROR_INJECTION_H #include +#include #include #ifdef CONFIG_FUNCTION_ERROR_INJECTION @@ -19,7 +20,7 @@ static inline bool within_error_injection_list(unsigned long addr) static inline int get_injectable_error_type(unsigned long addr) { - return EI_ETYPE_NONE; + return -EOPNOTSUPP; } #endif -- cgit v1.2.3 From 88ad32a799ddc92eafd2ae204cb43f04ac20a05c Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Fri, 16 Dec 2022 16:04:40 +0100 Subject: include/linux/percpu_counter.h: race in uniprocessor percpu_counter_add() The percpu interface is supposed to be preempt and irq safe. But: The uniprocessor implementation of percpu_counter_add() is not irq safe: if an interrupt happens during the +=, then the result is undefined. Therefore: switch from preempt_disable() to local_irq_save(). This prevents interrupts from interrupting the +=, and as a side effect prevents preemption. Link: https://lkml.kernel.org/r/20221216150441.200533-2-manfred@colorfullife.com Signed-off-by: Manfred Spraul Cc: "Sun, Jiebin" Cc: <1vier1@web.de> Cc: Alexander Sverdlin Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- include/linux/percpu_counter.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index a3aae8d57a42..521a733e21a9 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -152,9 +152,11 @@ __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch) static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { - preempt_disable(); + unsigned long flags; + + local_irq_save(flags); fbc->count += amount; - preempt_enable(); + local_irq_restore(flags); } /* non-SMP percpu_counter_add_local is the same with percpu_counter_add */ -- cgit v1.2.3 From 9456d539acde9f92a52ffe477b4b86e35d214d1a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 3 Jan 2023 14:19:37 +0200 Subject: util_macros.h: add missing inclusion The header is the direct user of definitions from the math.h, include it. Link: https://lkml.kernel.org/r/20230103121937.32085-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Signed-off-by: Andrew Morton --- include/linux/util_macros.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/util_macros.h b/include/linux/util_macros.h index 72299f261b25..b641ec00be3e 100644 --- a/include/linux/util_macros.h +++ b/include/linux/util_macros.h @@ -2,6 +2,8 @@ #ifndef _LINUX_HELPER_MACROS_H_ #define _LINUX_HELPER_MACROS_H_ +#include + #define __find_closest(x, a, as, op) \ ({ \ typeof(as) __fc_i, __fc_as = (as) - 1; \ -- cgit v1.2.3 From 7e99f8b69c11c104933b9bc8fda226ebfb8aaaa5 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Wed, 4 Jan 2023 15:38:47 +0100 Subject: kexec: factor out kexec_load_permitted Both syscalls (kexec and kexec_file) do the same check, let's factor it out. Link: https://lkml.kernel.org/r/20221114-disable-kexec-reset-v6-2-6a8531a09b9a@chromium.org Signed-off-by: Ricardo Ribalda Reviewed-by: Steven Rostedt (Google) Acked-by: Baoquan He Cc: Bagas Sanjaya Cc: "Eric W. Biederman" Cc: Guilherme G. Piccoli Cc: Joel Fernandes (Google) Cc: Jonathan Corbet Cc: Philipp Rudo Cc: Ross Zwisler Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton --- include/linux/kexec.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 5dd4343c1bbe..f18a3c9e813b 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -403,7 +403,8 @@ extern int kimage_crash_copy_vmcoreinfo(struct kimage *image); extern struct kimage *kexec_image; extern struct kimage *kexec_crash_image; -extern int kexec_load_disabled; + +bool kexec_load_permitted(void); #ifndef kexec_flush_icache_page #define kexec_flush_icache_page(page) -- cgit v1.2.3 From a42aaad2e47b23d63037bfc0130e33fc0f74cd71 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Wed, 4 Jan 2023 15:38:48 +0100 Subject: kexec: introduce sysctl parameters kexec_load_limit_* kexec allows replacing the current kernel with a different one. This is usually a source of concerns for sysadmins that want to harden a system. Linux already provides a way to disable loading new kexec kernel via kexec_load_disabled, but that control is very coard, it is all or nothing and does not make distinction between a panic kexec and a normal kexec. This patch introduces new sysctl parameters, with finer tuning to specify how many times a kexec kernel can be loaded. The sysadmin can set different limits for kexec panic and kexec reboot kernels. The value can be modified at runtime via sysctl, but only with a stricter value. With these new parameters on place, a system with loadpin and verity enabled, using the following kernel parameters: sysctl.kexec_load_limit_reboot=0 sysct.kexec_load_limit_panic=1 can have a good warranty that if initrd tries to load a panic kernel, a malitious user will have small chances to replace that kernel with a different one, even if they can trigger timeouts on the disk where the panic kernel lives. Link: https://lkml.kernel.org/r/20221114-disable-kexec-reset-v6-3-6a8531a09b9a@chromium.org Signed-off-by: Ricardo Ribalda Reviewed-by: Steven Rostedt (Google) Acked-by: Baoquan He Cc: Bagas Sanjaya Cc: "Eric W. Biederman" Cc: Guilherme G. Piccoli # Steam Deck Cc: Joel Fernandes (Google) Cc: Jonathan Corbet Cc: Philipp Rudo Cc: Ross Zwisler Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton --- include/linux/kexec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index f18a3c9e813b..6883c5922701 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -404,7 +404,7 @@ extern int kimage_crash_copy_vmcoreinfo(struct kimage *image); extern struct kimage *kexec_image; extern struct kimage *kexec_crash_image; -bool kexec_load_permitted(void); +bool kexec_load_permitted(int kexec_image_type); #ifndef kexec_flush_icache_page #define kexec_flush_icache_page(page) -- cgit v1.2.3 From d82e6ae67ac2f9d6ba51690353c477b340bba6b5 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 31 Jan 2023 15:37:29 +0800 Subject: iommu/vt-d: Remove include/linux/intel-svm.h There's no need to have a public header for Intel SVA implementation. The device driver should interact with Intel SVA implementation via the IOMMU generic APIs. Reviewed-by: Kevin Tian Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230109014955.147068-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/intel-svm.h | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 include/linux/intel-svm.h (limited to 'include/linux') diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h deleted file mode 100644 index f9a0d44f6fdb..000000000000 --- a/include/linux/intel-svm.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright © 2015 Intel Corporation. - * - * Authors: David Woodhouse - */ - -#ifndef __INTEL_SVM_H__ -#define __INTEL_SVM_H__ - -/* Page Request Queue depth */ -#define PRQ_ORDER 4 -#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20) -#define PRQ_DEPTH ((0x1000 << PRQ_ORDER) >> 5) - -#endif /* __INTEL_SVM_H__ */ -- cgit v1.2.3 From 4db96bfe9d7772d6ddedd62ce478895999043fd7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 31 Jan 2023 15:37:34 +0800 Subject: iommu/vt-d: Support size of the register set in DRHD A new field, which indicates the size of the remapping hardware register set for this remapping unit, is introduced in the DMA-remapping hardware unit definition (DRHD) structure with the VT-d Spec 4.0. With this information, SW doesn't need to 'guess' the size of the register set anymore. Update the struct acpi_dmar_hardware_unit to reflect the field. Store the size of the register set in struct dmar_drhd_unit for each dmar device. The 'size' information is ResvZ for the old BIOS and platforms. Fall back to the old guessing method. There is nothing changed. Signed-off-by: Kan Liang Link: https://lore.kernel.org/r/20230128200428.1459118-2-kan.liang@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- include/linux/dmar.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index d81a51978d01..725d5e6acec0 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -39,6 +39,7 @@ struct dmar_drhd_unit { struct list_head list; /* list of drhd units */ struct acpi_dmar_header *hdr; /* ACPI header */ u64 reg_base_addr; /* register base address*/ + unsigned long reg_size; /* size of register set */ struct dmar_dev_scope *devices;/* target device array */ int devices_cnt; /* target device count */ u16 segment; /* PCI domain */ -- cgit v1.2.3 From 46284c6ceb5e4dfddcb00dafb7c2f3c1437fdca4 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 31 Jan 2023 15:37:38 +0800 Subject: iommu/vt-d: Support cpumask for IOMMU perfmon The perf subsystem assumes that all counters are by default per-CPU. So the user space tool reads a counter from each CPU. However, the IOMMU counters are system-wide and can be read from any CPU. Here we use a CPU mask to restrict counting to one CPU to handle the issue. (with CPU hotplug notifier to choose a different CPU if the chosen one is taken off-line). The CPU is exposed to /sys/bus/event_source/devices/dmar*/cpumask for the user space perf tool. Signed-off-by: Kan Liang Link: https://lore.kernel.org/r/20230128200428.1459118-6-kan.liang@linux.intel.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 6c6859bfc454..f2ea348ce3b0 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -221,6 +221,7 @@ enum cpuhp_state { CPUHP_AP_PERF_X86_CQM_ONLINE, CPUHP_AP_PERF_X86_CSTATE_ONLINE, CPUHP_AP_PERF_X86_IDXD_ONLINE, + CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE, CPUHP_AP_PERF_S390_CF_ONLINE, CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, -- cgit v1.2.3 From 0c05e7bd2d017a3a9a0f4e9a19ad4acf1f616f12 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 25 Jan 2023 10:54:01 -0800 Subject: livepatch,x86: Clear relocation targets on a module removal Josh reported a bug: When the object to be patched is a module, and that module is rmmod'ed and reloaded, it fails to load with: module: x86/modules: Skipping invalid relocation target, existing value is nonzero for type 2, loc 00000000ba0302e9, val ffffffffa03e293c livepatch: failed to initialize patch 'livepatch_nfsd' for module 'nfsd' (-8) livepatch: patch 'livepatch_nfsd' failed for module 'nfsd', refusing to load module 'nfsd' The livepatch module has a relocation which references a symbol in the _previous_ loading of nfsd. When apply_relocate_add() tries to replace the old relocation with a new one, it sees that the previous one is nonzero and it errors out. He also proposed three different solutions. We could remove the error check in apply_relocate_add() introduced by commit eda9cec4c9a1 ("x86/module: Detect and skip invalid relocations"). However the check is useful for detecting corrupted modules. We could also deny the patched modules to be removed. If it proved to be a major drawback for users, we could still implement a different approach. The solution would also complicate the existing code a lot. We thus decided to reverse the relocation patching (clear all relocation targets on x86_64). The solution is not universal and is too much arch-specific, but it may prove to be simpler in the end. Reported-by: Josh Poimboeuf Originally-by: Miroslav Benes Signed-off-by: Song Liu Acked-by: Miroslav Benes Reviewed-by: Petr Mladek Acked-by: Josh Poimboeuf Reviewed-by: Joe Lawrence Tested-by: Joe Lawrence Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20230125185401.279042-2-song@kernel.org --- include/linux/moduleloader.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 9e09d11ffe5b..2ba4bc834a4f 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -72,6 +72,23 @@ int apply_relocate_add(Elf_Shdr *sechdrs, unsigned int symindex, unsigned int relsec, struct module *mod); +#ifdef CONFIG_LIVEPATCH +/* + * Some architectures (namely x86_64 and ppc64) perform sanity checks when + * applying relocations. If a patched module gets unloaded and then later + * reloaded (and re-patched), klp re-applies relocations to the replacement + * function(s). Any leftover relocations from the previous loading of the + * patched module might trigger the sanity checks. + * + * To prevent that, when unloading a patched module, clear out any relocations + * that might trigger arch-specific sanity checks on a future module reload. + */ +void clear_relocate_add(Elf_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me); +#endif #else static inline int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, -- cgit v1.2.3 From 234fa51db95f3236a049557db735606908747f38 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 3 Feb 2023 17:39:38 +0100 Subject: efi: Drop minimum EFI version check at boot We currently pass a minimum major version to the generic EFI helper that checks the system table magic and version, and refuse to boot if the value is lower. The motivation for this check is unknown, and even the code that uses major version 2 as the minimum (ARM, arm64 and RISC-V) should make it past this check without problems, and boot to a point where we have access to a console or some other means to inform the user that the firmware's major revision number made us unhappy. (Revision 2.0 of the UEFI specification was released in January 2006, whereas ARM, arm64 and RISC-V support where added in 2009, 2013 and 2017, respectively, so checking for major version 2 or higher is completely arbitrary) So just drop the check. Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 507390dda8b9..9d455d502ac9 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -721,8 +721,7 @@ static inline void efi_esrt_init(void) { } extern int efi_config_parse_tables(const efi_config_table_t *config_tables, int count, const efi_config_table_type_t *arch_tables); -extern int efi_systab_check_header(const efi_table_hdr_t *systab_hdr, - int min_major_version); +extern int efi_systab_check_header(const efi_table_hdr_t *systab_hdr); extern void efi_systab_report_header(const efi_table_hdr_t *systab_hdr, unsigned long fw_vendor); extern u64 efi_get_iobase (void); -- cgit v1.2.3 From c9a397cee9f5c93a7f48e18038b14057044db6ba Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 18 Jan 2023 13:50:28 -0400 Subject: vfio: Support VFIO_NOIOMMU with iommufd Add a small amount of emulation to vfio_compat to accept the SET_IOMMU to VFIO_NOIOMMU_IOMMU and have vfio just ignore iommufd if it is working on a no-iommu enabled device. Move the enable_unsafe_noiommu_mode module out of container.c into vfio_main.c so that it is always available even if VFIO_CONTAINER=n. This passes Alex's mini-test: https://github.com/awilliam/tests/blob/master/vfio-noiommu-pci-device-open.c Link: https://lore.kernel.org/r/0-v3-480cd64a16f7+1ad0-iommufd_noiommu_jgg@nvidia.com Reviewed-by: Kevin Tian Acked-by: Alex Williamson Signed-off-by: Jason Gunthorpe --- include/linux/iommufd.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 650d45629647..c0b5b3ac34f1 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -57,7 +57,9 @@ void iommufd_access_unpin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length); int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, void *data, size_t len, unsigned int flags); -int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id); +int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id); +int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx); +int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx); #else /* !CONFIG_IOMMUFD */ static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) { @@ -89,8 +91,12 @@ static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long return -EOPNOTSUPP; } -static inline int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, - u32 *out_ioas_id) +static inline int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx) +{ + return -EOPNOTSUPP; +} + +static inline int iommufd_vfio_compat_set_no_iommu(struct iommufd_ctx *ictx) { return -EOPNOTSUPP; } -- cgit v1.2.3 From b0048092f7d3921d56f2c5bfa32062fac5e7500b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 3 Feb 2023 11:26:49 -0800 Subject: efi/cper, cxl: Remove cxl_err.h While going to create include/linux/cxl.h for some cross-subsystem CXL definitions I noticed that include/linux/cxl_err.h was already present. That header has no reason to be global, and it duplicates the RAS Capability Structure definitions in drivers/cxl/cxl.h. A follow-on patch can consider unifying the CXL native error tracing with the CPER error printing. Also fixed up the spec reference as the latest released spec is v3.0. Cc: Smita Koralahalli Signed-off-by: Dan Williams Reviewed-by: Ira Weiny Signed-off-by: Ard Biesheuvel --- include/linux/cxl_err.h | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 include/linux/cxl_err.h (limited to 'include/linux') diff --git a/include/linux/cxl_err.h b/include/linux/cxl_err.h deleted file mode 100644 index 629e1bdeda44..000000000000 --- a/include/linux/cxl_err.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2022 Advanced Micro Devices, Inc. - * - * Author: Smita Koralahalli - */ - -#ifndef LINUX_CXL_ERR_H -#define LINUX_CXL_ERR_H - -/* CXL RAS Capability Structure, CXL v3.1 sec 8.2.4.16 */ -struct cxl_ras_capability_regs { - u32 uncor_status; - u32 uncor_mask; - u32 uncor_severity; - u32 cor_status; - u32 cor_mask; - u32 cap_control; - u32 header_log[16]; -}; - -#endif //__CXL_ERR_ -- cgit v1.2.3 From cf1d2ffcc6f17b422239f6ab34b078945d07f9aa Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 1 Feb 2023 09:48:12 +0100 Subject: efi: Discover BTI support in runtime services regions Add the generic plumbing to detect whether or not the runtime code regions were constructed with BTI/IBT landing pads by the firmware, permitting the OS to enable enforcement when mapping these regions into the OS's address space. Signed-off-by: Ard Biesheuvel Reviewed-by: Kees Cook --- include/linux/efi.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 9d455d502ac9..df88786b5947 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -584,11 +584,15 @@ typedef struct { #define EFI_INVALID_TABLE_ADDR (~0UL) +// BIT0 implies that Runtime code includes the forward control flow guard +// instruction, such as X86 CET-IBT or ARM BTI. +#define EFI_MEMORY_ATTRIBUTES_FLAGS_RT_FORWARD_CONTROL_FLOW_GUARD 0x1 + typedef struct { u32 version; u32 num_entries; u32 desc_size; - u32 reserved; + u32 flags; efi_memory_desc_t entry[0]; } efi_memory_attributes_table_t; @@ -751,7 +755,7 @@ extern unsigned long efi_mem_attr_table; * argument in the page tables referred to by the * first argument. */ -typedef int (*efi_memattr_perm_setter)(struct mm_struct *, efi_memory_desc_t *); +typedef int (*efi_memattr_perm_setter)(struct mm_struct *, efi_memory_desc_t *, bool); extern int efi_memattr_init(void); extern int efi_memattr_apply_permissions(struct mm_struct *mm, -- cgit v1.2.3 From 8d9ef69487e114f80e20ffbec14ca8684953fef0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 26 Jan 2023 16:38:21 +0100 Subject: mfd: intel_soc_pmic_chtwc: Add Lenovo Yoga Tab 3 X90F to intel_cht_wc_models The drivers for various CHT Whiskey Cove PMIC child-devices need to know the model, since they have model specific behavior. The DMI match table for this is shared between the child-device-drivers inside the MFD driver. Add the Lenovo Yoga Tab 3 X90F, which is a previously unknown tablet model with a CHT Whiskey Cove PMIC, to the intel_cht_wc_models enum and to the DMI match table. Signed-off-by: Hans de Goede Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230126153823.22146-2-hdegoede@redhat.com --- include/linux/mfd/intel_soc_pmic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/intel_soc_pmic.h b/include/linux/mfd/intel_soc_pmic.h index 945bde1fe55c..9ba2c1a8d836 100644 --- a/include/linux/mfd/intel_soc_pmic.h +++ b/include/linux/mfd/intel_soc_pmic.h @@ -18,6 +18,7 @@ enum intel_cht_wc_models { INTEL_CHT_WC_GPD_WIN_POCKET, INTEL_CHT_WC_XIAOMI_MIPAD2, INTEL_CHT_WC_LENOVO_YOGABOOK1, + INTEL_CHT_WC_LENOVO_YT3_X90, }; /** -- cgit v1.2.3 From 58ef4ece1e41ac525db3e79529909683325d85df Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 31 Jan 2023 20:18:51 -0800 Subject: soc: qcom: pmic_glink: Introduce base PMIC GLINK driver The PMIC GLINK service runs on one of the co-processors of some modern Qualcomm platforms and implements USB-C and battery managements. It uses a message based protocol over GLINK for communication with the OS, hence the name. The driver implemented provides the rpmsg device for communication and uses auxiliary bus to spawn off individual devices in respective subsystem. The auxiliary devices are spawned off from a platform_device, so that the drm_bridge is available early, to allow the DisplayPort driver to probe even before the remoteproc has spun up. Signed-off-by: Bjorn Andersson Tested-by: Konrad Dybcio # SM8350 PDX215 Reviewed-by: Neil Armstrong Tested-by: Neil Armstrong # on SM8550-MTP & SM8450-HDK Signed-off-by: Bjorn Andersson Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230201041853.1934355-3-quic_bjorande@quicinc.com --- include/linux/soc/qcom/pmic_glink.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 include/linux/soc/qcom/pmic_glink.h (limited to 'include/linux') diff --git a/include/linux/soc/qcom/pmic_glink.h b/include/linux/soc/qcom/pmic_glink.h new file mode 100644 index 000000000000..fd124aa18c81 --- /dev/null +++ b/include/linux/soc/qcom/pmic_glink.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022, Linaro Ltd + */ +#ifndef __SOC_QCOM_PMIC_GLINK_H__ +#define __SOC_QCOM_PMIC_GLINK_H__ + +struct pmic_glink; +struct pmic_glink_client; + +#define PMIC_GLINK_OWNER_BATTMGR 32778 +#define PMIC_GLINK_OWNER_USBC 32779 +#define PMIC_GLINK_OWNER_USBC_PAN 32780 + +#define PMIC_GLINK_REQ_RESP 1 +#define PMIC_GLINK_NOTIFY 2 + +struct pmic_glink_hdr { + __le32 owner; + __le32 type; + __le32 opcode; +}; + +int pmic_glink_send(struct pmic_glink_client *client, void *data, size_t len); + +struct pmic_glink_client *devm_pmic_glink_register_client(struct device *dev, + unsigned int id, + void (*cb)(const void *, size_t, void *), + void (*pdr)(void *, int), + void *priv); + +#endif -- cgit v1.2.3 From fb6211f1584aad12c267c3333273f42f69438ced Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B3=20=C3=81gila=20Bitsch?= Date: Sat, 4 Feb 2023 13:15:01 +0100 Subject: usb: gadget: add doc to struct usb_composite_dev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added documentation to new struct members for WebUSB: * bcd_webusb_version * b_webusb_vendor_code * landing_page * use_webusb to avoid warnings in the build of htmldocs Fixes: 93c473948c58 ("usb: gadget: add WebUSB landing page support") Reported-by: Stephen Rothwell Signed-off-by: Jó Ágila Bitsch Link: https://lore.kernel.org/r/Y95MRZZz3yC5lETB@jo-einhundert Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/composite.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 91d22c3ed458..7ef8cea67f50 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -432,6 +432,10 @@ static inline struct usb_composite_driver *to_cdriver( * @qw_sign: qwSignature part of the OS string * @b_vendor_code: bMS_VendorCode part of the OS string * @use_os_string: false by default, interested gadgets set it + * @bcd_webusb_version: 0x0100 by default, WebUSB specification version + * @b_webusb_vendor_code: 0x0 by default, vendor code for WebUSB + * @landing_page: empty by default, landing page to announce in WebUSB + * @use_webusb:: false by default, interested gadgets set it * @os_desc_config: the configuration to be used with OS descriptors * @setup_pending: true when setup request is queued but not completed * @os_desc_pending: true when os_desc request is queued but not completed -- cgit v1.2.3 From eca0edaf6caa66b6eb26277a7dce5d7296cedfca Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Fri, 3 Feb 2023 09:22:21 -0300 Subject: module.h: Document klp_modinfo struct using kdoc Previously the documentation existed only in Documentation/livepatch directory. Signed-off-by: Marcos Paulo de Souza Signed-off-by: Luis Chamberlain --- include/linux/module.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 8c5909c0076c..6449ea59c074 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -352,6 +352,14 @@ struct mod_kallsyms { }; #ifdef CONFIG_LIVEPATCH +/** + * struct klp_modinfo - Elf information preserved from the livepatch module + * + * @hdr: Elf header + * @sechdrs: Section header table + * @secstrings: String table for the section headers + * @symndx: The symbol table section index + */ struct klp_modinfo { Elf_Ehdr hdr; Elf_Shdr *sechdrs; -- cgit v1.2.3 From 20f6d4f2a474fc2dc502358dcee3c9b18304ec1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 4 Feb 2023 05:47:03 +0000 Subject: of: make of_node_ktype constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit ee6d3dd4ed48 ("driver core: make kobj_type constant.") the driver core allows the usage of const struct kobj_type. Take advantage of this to constify the structure definition to prevent modification at runtime. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20230204-kobj_type-of-v1-1-5910c8ecb7a3@weissschuh.net Signed-off-by: Rob Herring --- include/linux/of.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 8b9f94386dc3..8bb348666709 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -101,7 +101,7 @@ struct of_reconfig_data { }; /* initialize a node */ -extern struct kobj_type of_node_ktype; +extern const struct kobj_type of_node_ktype; extern const struct fwnode_operations of_fwnode_ops; static inline void of_node_init(struct device_node *node) { -- cgit v1.2.3 From 7390609b0121a1b982c5ecdfcd72dc328e5784ee Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 6 Feb 2023 13:43:42 +0000 Subject: net: add helper eth_addr_add() Add a helper to add an offset to a ethernet address. This comes in handy if you have a base ethernet address for multiple interfaces. Signed-off-by: Michael Walle Reviewed-by: Andrew Lunn Acked-by: Jakub Kicinski Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20230206134356.839737-9-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/etherdevice.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index a541f0c4f146..224645f17c33 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -507,6 +507,20 @@ static inline void eth_addr_inc(u8 *addr) u64_to_ether_addr(u, addr); } +/** + * eth_addr_add() - Add (or subtract) an offset to/from the given MAC address. + * + * @offset: Offset to add. + * @addr: Pointer to a six-byte array containing Ethernet address to increment. + */ +static inline void eth_addr_add(u8 *addr, long offset) +{ + u64 u = ether_addr_to_u64(addr); + + u += offset; + u64_to_ether_addr(u, addr); +} + /** * is_etherdev_addr - Tell if given Ethernet address belongs to the device. * @dev: Pointer to a device structure -- cgit v1.2.3 From c5d264d4b527c96ae8903376a4b195df47b05203 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 6 Feb 2023 13:43:43 +0000 Subject: of: base: add of_parse_phandle_with_optional_args() Add a new variant of the of_parse_phandle_with_args() which treats the cells name as optional. If it's missing, it is assumed that the phandle has no arguments. Up until now, a nvmem node didn't have any arguments, so all the device trees haven't any '#*-cells' property. But there is a need for an additional argument for the phandle, for which we need a '#*-cells' property. Therefore, we need to support nvmem nodes with and without this property. Signed-off-by: Michael Walle Reviewed-by: Rob Herring Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20230206134356.839737-10-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/of.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 8b9f94386dc3..98c252d2d851 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1008,6 +1008,31 @@ static inline int of_parse_phandle_with_fixed_args(const struct device_node *np, index, out_args); } +/** + * of_parse_phandle_with_optional_args() - Find a node pointed by phandle in a list + * @np: pointer to a device tree node containing a list + * @list_name: property name that contains a list + * @cells_name: property name that specifies phandles' arguments count + * @index: index of a phandle to parse out + * @out_args: optional pointer to output arguments structure (will be filled) + * + * Same as of_parse_phandle_with_args() except that if the cells_name property + * is not found, cell_count of 0 is assumed. + * + * This is used to useful, if you have a phandle which didn't have arguments + * before and thus doesn't have a '#*-cells' property but is now migrated to + * having arguments while retaining backwards compatibility. + */ +static inline int of_parse_phandle_with_optional_args(const struct device_node *np, + const char *list_name, + const char *cells_name, + int index, + struct of_phandle_args *out_args) +{ + return __of_parse_phandle_with_args(np, list_name, cells_name, + 0, index, out_args); +} + /** * of_property_count_u8_elems - Count the number of u8 elements in a property * -- cgit v1.2.3 From 5d8e6e6c10a3d37486d263b16ddc15991a7e4a88 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 6 Feb 2023 13:43:46 +0000 Subject: nvmem: core: add an index parameter to the cell Sometimes a cell can represend multiple values. For example, a base ethernet address stored in the NVMEM can be expanded into multiple discreet ones by adding an offset. For this use case, introduce an index parameter which is then used to distiguish between values. This parameter will then be passed to the post process hook which can then use it to create different values during reading. At the moment, there is only support for the device tree path. You can add the index to the phandle, e.g. &net { nvmem-cells = <&base_mac_address 2>; nvmem-cell-names = "mac-address"; }; &nvmem_provider { base_mac_address: base-mac-address@0 { #nvmem-cell-cells = <1>; reg = <0 6>; }; }; Signed-off-by: Michael Walle Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20230206134356.839737-13-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-provider.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index bb15c9234e21..55181d837969 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -20,8 +20,8 @@ typedef int (*nvmem_reg_read_t)(void *priv, unsigned int offset, typedef int (*nvmem_reg_write_t)(void *priv, unsigned int offset, void *val, size_t bytes); /* used for vendor specific post processing of cell data */ -typedef int (*nvmem_cell_post_process_t)(void *priv, const char *id, unsigned int offset, - void *buf, size_t bytes); +typedef int (*nvmem_cell_post_process_t)(void *priv, const char *id, int index, + unsigned int offset, void *buf, size_t bytes); enum nvmem_type { NVMEM_TYPE_UNKNOWN = 0, -- cgit v1.2.3 From fbd03d27776c6121a483921601418e3c8f0ff37e Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 6 Feb 2023 13:43:47 +0000 Subject: nvmem: core: move struct nvmem_cell_info to nvmem-provider.h struct nvmem_cell_info is used to describe a cell. Thus this should really be in the nvmem-provider's header. There are two (unused) nvmem access methods which use the nvmem_cell_info to describe the cell to be accesses. One can argue, that they will create a cell before accessing, thus they are both a provider and a consumer. struct nvmem_cell_info will get used more and more by nvmem-providers, don't force them to also include the consumer header, although they are not. Signed-off-by: Michael Walle Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20230206134356.839737-14-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-consumer.h | 10 +--------- include/linux/nvmem-provider.h | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h index 980f9c9ac0bc..1f62f7ba71ca 100644 --- a/include/linux/nvmem-consumer.h +++ b/include/linux/nvmem-consumer.h @@ -18,15 +18,7 @@ struct device_node; /* consumer cookie */ struct nvmem_cell; struct nvmem_device; - -struct nvmem_cell_info { - const char *name; - unsigned int offset; - unsigned int bytes; - unsigned int bit_offset; - unsigned int nbits; - struct device_node *np; -}; +struct nvmem_cell_info; /** * struct nvmem_cell_lookup - cell lookup entry diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index 55181d837969..a953a3a59535 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -14,7 +14,6 @@ #include struct nvmem_device; -struct nvmem_cell_info; typedef int (*nvmem_reg_read_t)(void *priv, unsigned int offset, void *val, size_t bytes); typedef int (*nvmem_reg_write_t)(void *priv, unsigned int offset, @@ -47,6 +46,24 @@ struct nvmem_keepout { unsigned char value; }; +/** + * struct nvmem_cell_info - NVMEM cell description + * @name: Name. + * @offset: Offset within the NVMEM device. + * @bytes: Length of the cell. + * @bit_offset: Bit offset if cell is smaller than a byte. + * @nbits: Number of bits. + * @np: Optional device_node pointer. + */ +struct nvmem_cell_info { + const char *name; + unsigned int offset; + unsigned int bytes; + unsigned int bit_offset; + unsigned int nbits; + struct device_node *np; +}; + /** * struct nvmem_config - NVMEM device configuration * -- cgit v1.2.3 From 2ded6830d376d5e7bf43d59f7f7fdf1a59abc676 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 6 Feb 2023 13:43:49 +0000 Subject: nvmem: core: add nvmem_add_one_cell() Add a new function to add exactly one cell. This will be used by the nvmem layout drivers to add custom cells. In contrast to the nvmem_add_cells(), this has the advantage that we don't have to assemble a list of cells on runtime. Signed-off-by: Michael Walle Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20230206134356.839737-16-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/linux/nvmem-provider.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h index a953a3a59535..0262b86194eb 100644 --- a/include/linux/nvmem-provider.h +++ b/include/linux/nvmem-provider.h @@ -153,6 +153,9 @@ struct nvmem_device *devm_nvmem_register(struct device *dev, void nvmem_add_cell_table(struct nvmem_cell_table *table); void nvmem_del_cell_table(struct nvmem_cell_table *table); +int nvmem_add_one_cell(struct nvmem_device *nvmem, + const struct nvmem_cell_info *info); + #else static inline struct nvmem_device *nvmem_register(const struct nvmem_config *c) @@ -170,6 +173,11 @@ devm_nvmem_register(struct device *dev, const struct nvmem_config *c) static inline void nvmem_add_cell_table(struct nvmem_cell_table *table) {} static inline void nvmem_del_cell_table(struct nvmem_cell_table *table) {} +static inline int nvmem_add_one_cell(struct nvmem_device *nvmem, + const struct nvmem_cell_info *info) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_NVMEM */ #endif /* ifndef _LINUX_NVMEM_PROVIDER_H */ -- cgit v1.2.3 From 15a7cf8caabee4613764abe7814dd3162cb64137 Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Mon, 6 Feb 2023 16:17:57 +0000 Subject: usb: gadget: configfs: Support arbitrary string descriptors Add a framework to allow users to define arbitrary string descriptors for a USB Gadget. This is modelled as a new type of config item rather than as hardcoded attributes so as to be as flexible as possible. Signed-off-by: Daniel Scally Link: https://lore.kernel.org/r/20230206161802.892954-7-dan.scally@ideasonboard.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/gadget.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index dc3092cea99e..00750f7020f3 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -15,6 +15,7 @@ #ifndef __LINUX_USB_GADGET_H #define __LINUX_USB_GADGET_H +#include #include #include #include @@ -821,6 +822,16 @@ int usb_gadget_get_string(const struct usb_gadget_strings *table, int id, u8 *bu /* check if the given language identifier is valid */ bool usb_validate_langid(u16 langid); +struct gadget_string { + struct config_item item; + struct list_head list; + char string[USB_MAX_STRING_LEN]; + struct usb_string usb_string; +}; + +#define to_gadget_string(str_item)\ +container_of(str_item, struct gadget_string, item) + /*-------------------------------------------------------------------------*/ /* utility to simplify managing config descriptors */ -- cgit v1.2.3 From c033563220e0f7a82f4ae8d698284cced94fd6cf Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Mon, 6 Feb 2023 16:17:58 +0000 Subject: usb: gadget: configfs: Attach arbitrary strings to cdev Attach any arbitrary strings that are defined to the composite dev. We handle the old-style manufacturer, product and serialnumbers strings in the same function for simplicity. Signed-off-by: Daniel Scally Link: https://lore.kernel.org/r/20230206161802.892954-8-dan.scally@ideasonboard.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/composite.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 7ef8cea67f50..608dc962748b 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -494,6 +494,7 @@ struct usb_composite_dev { struct usb_composite_driver *driver; u8 next_string_id; char *def_manufacturer; + struct usb_string *usb_strings; /* the gadget driver won't enable the data pullup * while the deactivation count is nonzero. -- cgit v1.2.3 From 585e351ff359c032ea7ab48d999b252ba09f8051 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Sat, 4 Feb 2023 17:15:02 -0800 Subject: perf: RISC-V: Define helper functions expose hpm counter width and count KVM module needs to know how many hardware counters and the counter width that the platform supports. Otherwise, it will not be able to show optimal value of virtual counters to the guest. The virtual hardware counters also need to have the same width as the logical hardware counters for simplicity. However, there shouldn't be mapping between virtual hardware counters and logical hardware counters. As we don't support hetergeneous harts or counters with different width as of now, the implementation relies on the counter width of the first available programmable counter. Reviewed-by: Anup Patel Reviewed-by: Andrew Jones Signed-off-by: Atish Patra Signed-off-by: Anup Patel --- include/linux/perf/riscv_pmu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index e17e86ad6f3a..a1c3f7771481 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -73,6 +73,9 @@ void riscv_pmu_legacy_skip_init(void); static inline void riscv_pmu_legacy_skip_init(void) {}; #endif struct riscv_pmu *riscv_pmu_alloc(void); +#ifdef CONFIG_RISCV_PMU_SBI +int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr); +#endif #endif /* CONFIG_RISCV_PMU */ -- cgit v1.2.3 From 8929283a687bb4b71ec9d3f1e827aecf829c6b1a Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Sat, 4 Feb 2023 17:15:03 -0800 Subject: perf: RISC-V: Improve privilege mode filtering for perf Currently, the host driver doesn't have any method to identify if the requested perf event is from kvm or bare metal. As KVM runs in HS mode, there are no separate hypervisor privilege mode to distinguish between the attributes for guest/host. Improve the privilege mode filtering by using the event specific config1 field. Reviewed-by: Andrew Jones Reviewed-by: Anup Patel Signed-off-by: Atish Patra Signed-off-by: Anup Patel --- include/linux/perf/riscv_pmu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index a1c3f7771481..43fc892aa7d9 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -26,6 +26,8 @@ #define RISCV_PMU_STOP_FLAG_RESET 1 +#define RISCV_PMU_CONFIG1_GUEST_EVENTS 0x1 + struct cpu_hw_events { /* currently enabled events */ int n_events; -- cgit v1.2.3 From a9c4bdd505630469f93f5efedfc7a9ca254996c8 Mon Sep 17 00:00:00 2001 From: Linyu Yuan Date: Mon, 30 Jan 2023 15:54:09 +0800 Subject: tracing: Acquire buffer from temparary trace sequence there is one dwc3 trace event declare as below, DECLARE_EVENT_CLASS(dwc3_log_event, TP_PROTO(u32 event, struct dwc3 *dwc), TP_ARGS(event, dwc), TP_STRUCT__entry( __field(u32, event) __field(u32, ep0state) __dynamic_array(char, str, DWC3_MSG_MAX) ), TP_fast_assign( __entry->event = event; __entry->ep0state = dwc->ep0state; ), TP_printk("event (%08x): %s", __entry->event, dwc3_decode_event(__get_str(str), DWC3_MSG_MAX, __entry->event, __entry->ep0state)) ); the problem is when trace function called, it will allocate up to DWC3_MSG_MAX bytes from trace event buffer, but never fill the buffer during fast assignment, it only fill the buffer when output function are called, so this means if output function are not called, the buffer will never used. add __get_buf(len) which acquiree buffer from iter->tmp_seq when trace output function called, it allow user write string to acquired buffer. the mentioned dwc3 trace event will changed as below, DECLARE_EVENT_CLASS(dwc3_log_event, TP_PROTO(u32 event, struct dwc3 *dwc), TP_ARGS(event, dwc), TP_STRUCT__entry( __field(u32, event) __field(u32, ep0state) ), TP_fast_assign( __entry->event = event; __entry->ep0state = dwc->ep0state; ), TP_printk("event (%08x): %s", __entry->event, dwc3_decode_event(__get_buf(DWC3_MSG_MAX), DWC3_MSG_MAX, __entry->event, __entry->ep0state)) );. Link: https://lore.kernel.org/linux-trace-kernel/1675065249-23368-1-git-send-email-quic_linyyuan@quicinc.com Cc: Masami Hiramatsu Signed-off-by: Linyu Yuan Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_seq.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 0c4c7587d6c3..6be92bf559fe 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -95,6 +95,7 @@ extern void trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, extern int trace_seq_hex_dump(struct trace_seq *s, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii); +char *trace_seq_acquire(struct trace_seq *s, unsigned int len); #else /* CONFIG_TRACING */ static inline __printf(2, 3) @@ -139,6 +140,10 @@ static inline int trace_seq_path(struct trace_seq *s, const struct path *path) { return 0; } +static inline char *trace_seq_acquire(struct trace_seq *s, unsigned int len) +{ + return NULL; +} #endif /* CONFIG_TRACING */ #endif /* _LINUX_TRACE_SEQ_H */ -- cgit v1.2.3 From d503b8f7474fe7ac616518f7fc49773cbab49f36 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 7 Feb 2023 12:28:52 -0500 Subject: tracing: Add trace_array_puts() to write into instance Add a generic trace_array_puts() that can be used to "trace_puts()" into an allocated trace_array instance. This is just another variant of trace_array_printk(). Link: https://lkml.kernel.org/r/20230207173026.584717290@goodmis.org Cc: Masami Hiramatsu Cc: Andrew Morton Reviewed-by: Ross Zwisler Signed-off-by: Steven Rostedt (Google) --- include/linux/trace.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/trace.h b/include/linux/trace.h index 80ffda871749..2a70a447184c 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -33,6 +33,18 @@ struct trace_array; int register_ftrace_export(struct trace_export *export); int unregister_ftrace_export(struct trace_export *export); +/** + * trace_array_puts - write a constant string into the trace buffer. + * @tr: The trace array to write to + * @str: The constant string to write + */ +#define trace_array_puts(tr, str) \ + ({ \ + str ? __trace_array_puts(tr, _THIS_IP_, str, strlen(str)) : -1; \ + }) +int __trace_array_puts(struct trace_array *tr, unsigned long ip, + const char *str, int size); + void trace_printk_init_buffers(void); __printf(3, 4) int trace_array_printk(struct trace_array *tr, unsigned long ip, -- cgit v1.2.3 From 5bad5d55d884d57acba92a3309cde4cbb26dfefc Mon Sep 17 00:00:00 2001 From: Wang Yong Date: Thu, 2 Feb 2023 08:13:42 +0000 Subject: KVM: update code comment in struct kvm_vcpu Commit c5b077549136 ("KVM: Convert the kvm->vcpus array to a xarray") changed kvm->vcpus array to a xarray, so update the code comment of kvm_vcpu->vcpu_idx accordingly. Signed-off-by: Wang Yong Link: https://lore.kernel.org/r/20230202081342.856687-1-yongw.kernel@gmail.com Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a7d6a6111f5e..8ada23756b0e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -324,7 +324,7 @@ struct kvm_vcpu { #endif int cpu; int vcpu_id; /* id given by userspace at creation */ - int vcpu_idx; /* index in kvm->vcpus array */ + int vcpu_idx; /* index into kvm->vcpu_array */ int ____srcu_idx; /* Don't use this directly. You've been warned. */ #ifdef CONFIG_PROVE_RCU int srcu_depth; -- cgit v1.2.3 From 67cad5c67019c38126b749621665b6723d3ae7e6 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Mon, 6 Feb 2023 17:41:57 -0800 Subject: driver core: fw_devlink: Add DL_FLAG_CYCLE support to device links fw_devlink uses DL_FLAG_SYNC_STATE_ONLY device link flag for two purposes: 1. To allow a parent device to proxy its child device's dependency on a supplier so that the supplier doesn't get its sync_state() callback before the child device/consumer can be added and probed. In this usage scenario, we need to ignore cycles for ensure correctness of sync_state() callbacks. 2. When there are dependency cycles in firmware, we don't know which of those dependencies are valid. So, we have to ignore them all wrt probe ordering while still making sure the sync_state() callbacks come correctly. However, when detecting dependency cycles, there can be multiple dependency cycles between two devices that we need to detect. For example: A -> B -> A and A -> C -> B -> A. To detect multiple cycles correct, we need to be able to differentiate DL_FLAG_SYNC_STATE_ONLY device links used for (1) vs (2) above. To allow this differentiation, add a DL_FLAG_CYCLE that can be use to mark use case (2). We can then use the DL_FLAG_CYCLE to decide which DL_FLAG_SYNC_STATE_ONLY device links to follow when looking for dependency cycles. Fixes: 2de9d8e0d2fe ("driver core: fw_devlink: Improve handling of cyclic dependencies") Signed-off-by: Saravana Kannan Tested-by: Colin Foster Tested-by: Sudeep Holla Tested-by: Douglas Anderson Tested-by: Geert Uytterhoeven Tested-by: Luca Weiss # qcom/sm7225-fairphone-fp4 Link: https://lore.kernel.org/r/20230207014207.1678715-6-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 90aaf308c259..1508e637bb26 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -328,6 +328,7 @@ enum device_link_state { #define DL_FLAG_MANAGED BIT(6) #define DL_FLAG_SYNC_STATE_ONLY BIT(7) #define DL_FLAG_INFERRED BIT(8) +#define DL_FLAG_CYCLE BIT(9) /** * enum dl_dev_state - Device driver presence tracking information. -- cgit v1.2.3 From 6a6dfdf8b3ff337be5a447e9f4e71969f18370ad Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Mon, 6 Feb 2023 17:41:58 -0800 Subject: driver core: fw_devlink: Allow marking a fwnode link as being part of a cycle To improve detection and handling of dependency cycles, we need to be able to mark fwnode links as being part of cycles. fwnode links marked as being part of a cycle should not block their consumers from probing. Fixes: 2de9d8e0d2fe ("driver core: fw_devlink: Improve handling of cyclic dependencies") Signed-off-by: Saravana Kannan Tested-by: Colin Foster Tested-by: Sudeep Holla Tested-by: Douglas Anderson Tested-by: Geert Uytterhoeven Tested-by: Luca Weiss # qcom/sm7225-fairphone-fp4 Link: https://lore.kernel.org/r/20230207014207.1678715-7-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 89b9bdfca925..fdf2ee0285b7 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -18,7 +18,7 @@ struct fwnode_operations; struct device; /* - * fwnode link flags + * fwnode flags * * LINKS_ADDED: The fwnode has already be parsed to add fwnode links. * NOT_DEVICE: The fwnode will never be populated as a struct device. @@ -36,6 +36,7 @@ struct device; #define FWNODE_FLAG_INITIALIZED BIT(2) #define FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD BIT(3) #define FWNODE_FLAG_BEST_EFFORT BIT(4) +#define FWNODE_FLAG_VISITED BIT(5) struct fwnode_handle { struct fwnode_handle *secondary; @@ -46,11 +47,19 @@ struct fwnode_handle { u8 flags; }; +/* + * fwnode link flags + * + * CYCLE: The fwnode link is part of a cycle. Don't defer probe. + */ +#define FWLINK_FLAG_CYCLE BIT(0) + struct fwnode_link { struct fwnode_handle *supplier; struct list_head s_hook; struct fwnode_handle *consumer; struct list_head c_hook; + u8 flags; }; /** -- cgit v1.2.3 From cd115c0409f283edde94bd5a9a42dc42bee0aba8 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Mon, 6 Feb 2023 17:41:59 -0800 Subject: driver core: fw_devlink: Consolidate device link flag computation Consolidate the code that computes the flags to be used when creating a device link from a fwnode link. Fixes: 2de9d8e0d2fe ("driver core: fw_devlink: Improve handling of cyclic dependencies") Signed-off-by: Saravana Kannan Tested-by: Colin Foster Tested-by: Sudeep Holla Tested-by: Douglas Anderson Tested-by: Geert Uytterhoeven Tested-by: Luca Weiss # qcom/sm7225-fairphone-fp4 Link: https://lore.kernel.org/r/20230207014207.1678715-8-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/fwnode.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index fdf2ee0285b7..5700451b300f 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -207,7 +207,6 @@ static inline void fwnode_dev_initialized(struct fwnode_handle *fwnode, fwnode->flags &= ~FWNODE_FLAG_INITIALIZED; } -extern u32 fw_devlink_get_flags(void); extern bool fw_devlink_is_strict(void); int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup); void fwnode_links_purge(struct fwnode_handle *fwnode); -- cgit v1.2.3 From 3bf90eca76c98c55c975fa817799789b9176f9f3 Mon Sep 17 00:00:00 2001 From: Elliot Berman Date: Fri, 3 Feb 2023 13:09:52 -0800 Subject: firmware: qcom_scm: Move qcom_scm.h to include/linux/firmware/qcom/ Move include/linux/qcom_scm.h to include/linux/firmware/qcom/qcom_scm.h. This removes 1 of a few remaining Qualcomm-specific headers into a more approciate subdirectory under include/. Suggested-by: Bjorn Andersson Signed-off-by: Elliot Berman Reviewed-by: Guru Das Srinagesh Acked-by: Mukesh Ojha Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230203210956.3580811-1-quic_eberman@quicinc.com --- include/linux/firmware/qcom/qcom_scm.h | 125 +++++++++++++++++++++++++++++++++ include/linux/qcom_scm.h | 125 --------------------------------- 2 files changed, 125 insertions(+), 125 deletions(-) create mode 100644 include/linux/firmware/qcom/qcom_scm.h delete mode 100644 include/linux/qcom_scm.h (limited to 'include/linux') diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h new file mode 100644 index 000000000000..1e449a5d7f5c --- /dev/null +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2010-2015, 2018-2019 The Linux Foundation. All rights reserved. + * Copyright (C) 2015 Linaro Ltd. + */ +#ifndef __QCOM_SCM_H +#define __QCOM_SCM_H + +#include +#include +#include + +#include + +#define QCOM_SCM_VERSION(major, minor) (((major) << 16) | ((minor) & 0xFF)) +#define QCOM_SCM_CPU_PWR_DOWN_L2_ON 0x0 +#define QCOM_SCM_CPU_PWR_DOWN_L2_OFF 0x1 +#define QCOM_SCM_HDCP_MAX_REQ_CNT 5 + +struct qcom_scm_hdcp_req { + u32 addr; + u32 val; +}; + +struct qcom_scm_vmperm { + int vmid; + int perm; +}; + +enum qcom_scm_ocmem_client { + QCOM_SCM_OCMEM_UNUSED_ID = 0x0, + QCOM_SCM_OCMEM_GRAPHICS_ID, + QCOM_SCM_OCMEM_VIDEO_ID, + QCOM_SCM_OCMEM_LP_AUDIO_ID, + QCOM_SCM_OCMEM_SENSORS_ID, + QCOM_SCM_OCMEM_OTHER_OS_ID, + QCOM_SCM_OCMEM_DEBUG_ID, +}; + +enum qcom_scm_sec_dev_id { + QCOM_SCM_MDSS_DEV_ID = 1, + QCOM_SCM_OCMEM_DEV_ID = 5, + QCOM_SCM_PCIE0_DEV_ID = 11, + QCOM_SCM_PCIE1_DEV_ID = 12, + QCOM_SCM_GFX_DEV_ID = 18, + QCOM_SCM_UFS_DEV_ID = 19, + QCOM_SCM_ICE_DEV_ID = 20, +}; + +enum qcom_scm_ice_cipher { + QCOM_SCM_ICE_CIPHER_AES_128_XTS = 0, + QCOM_SCM_ICE_CIPHER_AES_128_CBC = 1, + QCOM_SCM_ICE_CIPHER_AES_256_XTS = 3, + QCOM_SCM_ICE_CIPHER_AES_256_CBC = 4, +}; + +#define QCOM_SCM_PERM_READ 0x4 +#define QCOM_SCM_PERM_WRITE 0x2 +#define QCOM_SCM_PERM_EXEC 0x1 +#define QCOM_SCM_PERM_RW (QCOM_SCM_PERM_READ | QCOM_SCM_PERM_WRITE) +#define QCOM_SCM_PERM_RWX (QCOM_SCM_PERM_RW | QCOM_SCM_PERM_EXEC) + +extern bool qcom_scm_is_available(void); + +extern int qcom_scm_set_cold_boot_addr(void *entry); +extern int qcom_scm_set_warm_boot_addr(void *entry); +extern void qcom_scm_cpu_power_down(u32 flags); +extern int qcom_scm_set_remote_state(u32 state, u32 id); + +struct qcom_scm_pas_metadata { + void *ptr; + dma_addr_t phys; + ssize_t size; +}; + +extern int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, + size_t size, + struct qcom_scm_pas_metadata *ctx); +void qcom_scm_pas_metadata_release(struct qcom_scm_pas_metadata *ctx); +extern int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, + phys_addr_t size); +extern int qcom_scm_pas_auth_and_reset(u32 peripheral); +extern int qcom_scm_pas_shutdown(u32 peripheral); +extern bool qcom_scm_pas_supported(u32 peripheral); + +extern int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val); +extern int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); + +extern bool qcom_scm_restore_sec_cfg_available(void); +extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); +extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); +extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); +extern int qcom_scm_iommu_set_cp_pool_size(u32 spare, u32 size); +extern int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size, + u32 cp_nonpixel_start, + u32 cp_nonpixel_size); +extern int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, + unsigned int *src, + const struct qcom_scm_vmperm *newvm, + unsigned int dest_cnt); + +extern bool qcom_scm_ocmem_lock_available(void); +extern int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, + u32 size, u32 mode); +extern int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, u32 offset, + u32 size); + +extern bool qcom_scm_ice_available(void); +extern int qcom_scm_ice_invalidate_key(u32 index); +extern int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size, + enum qcom_scm_ice_cipher cipher, + u32 data_unit_size); + +extern bool qcom_scm_hdcp_available(void); +extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, + u32 *resp); + +extern int qcom_scm_iommu_set_pt_format(u32 sec_id, u32 ctx_num, u32 pt_fmt); +extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en); + +extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, + u64 limit_node, u32 node_id, u64 version); +extern int qcom_scm_lmh_profile_change(u32 profile_id); +extern bool qcom_scm_lmh_dcvsh_available(void); + +#endif diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h deleted file mode 100644 index 1e449a5d7f5c..000000000000 --- a/include/linux/qcom_scm.h +++ /dev/null @@ -1,125 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* Copyright (c) 2010-2015, 2018-2019 The Linux Foundation. All rights reserved. - * Copyright (C) 2015 Linaro Ltd. - */ -#ifndef __QCOM_SCM_H -#define __QCOM_SCM_H - -#include -#include -#include - -#include - -#define QCOM_SCM_VERSION(major, minor) (((major) << 16) | ((minor) & 0xFF)) -#define QCOM_SCM_CPU_PWR_DOWN_L2_ON 0x0 -#define QCOM_SCM_CPU_PWR_DOWN_L2_OFF 0x1 -#define QCOM_SCM_HDCP_MAX_REQ_CNT 5 - -struct qcom_scm_hdcp_req { - u32 addr; - u32 val; -}; - -struct qcom_scm_vmperm { - int vmid; - int perm; -}; - -enum qcom_scm_ocmem_client { - QCOM_SCM_OCMEM_UNUSED_ID = 0x0, - QCOM_SCM_OCMEM_GRAPHICS_ID, - QCOM_SCM_OCMEM_VIDEO_ID, - QCOM_SCM_OCMEM_LP_AUDIO_ID, - QCOM_SCM_OCMEM_SENSORS_ID, - QCOM_SCM_OCMEM_OTHER_OS_ID, - QCOM_SCM_OCMEM_DEBUG_ID, -}; - -enum qcom_scm_sec_dev_id { - QCOM_SCM_MDSS_DEV_ID = 1, - QCOM_SCM_OCMEM_DEV_ID = 5, - QCOM_SCM_PCIE0_DEV_ID = 11, - QCOM_SCM_PCIE1_DEV_ID = 12, - QCOM_SCM_GFX_DEV_ID = 18, - QCOM_SCM_UFS_DEV_ID = 19, - QCOM_SCM_ICE_DEV_ID = 20, -}; - -enum qcom_scm_ice_cipher { - QCOM_SCM_ICE_CIPHER_AES_128_XTS = 0, - QCOM_SCM_ICE_CIPHER_AES_128_CBC = 1, - QCOM_SCM_ICE_CIPHER_AES_256_XTS = 3, - QCOM_SCM_ICE_CIPHER_AES_256_CBC = 4, -}; - -#define QCOM_SCM_PERM_READ 0x4 -#define QCOM_SCM_PERM_WRITE 0x2 -#define QCOM_SCM_PERM_EXEC 0x1 -#define QCOM_SCM_PERM_RW (QCOM_SCM_PERM_READ | QCOM_SCM_PERM_WRITE) -#define QCOM_SCM_PERM_RWX (QCOM_SCM_PERM_RW | QCOM_SCM_PERM_EXEC) - -extern bool qcom_scm_is_available(void); - -extern int qcom_scm_set_cold_boot_addr(void *entry); -extern int qcom_scm_set_warm_boot_addr(void *entry); -extern void qcom_scm_cpu_power_down(u32 flags); -extern int qcom_scm_set_remote_state(u32 state, u32 id); - -struct qcom_scm_pas_metadata { - void *ptr; - dma_addr_t phys; - ssize_t size; -}; - -extern int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, - size_t size, - struct qcom_scm_pas_metadata *ctx); -void qcom_scm_pas_metadata_release(struct qcom_scm_pas_metadata *ctx); -extern int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, - phys_addr_t size); -extern int qcom_scm_pas_auth_and_reset(u32 peripheral); -extern int qcom_scm_pas_shutdown(u32 peripheral); -extern bool qcom_scm_pas_supported(u32 peripheral); - -extern int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val); -extern int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); - -extern bool qcom_scm_restore_sec_cfg_available(void); -extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); -extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); -extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); -extern int qcom_scm_iommu_set_cp_pool_size(u32 spare, u32 size); -extern int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size, - u32 cp_nonpixel_start, - u32 cp_nonpixel_size); -extern int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, - unsigned int *src, - const struct qcom_scm_vmperm *newvm, - unsigned int dest_cnt); - -extern bool qcom_scm_ocmem_lock_available(void); -extern int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset, - u32 size, u32 mode); -extern int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id, u32 offset, - u32 size); - -extern bool qcom_scm_ice_available(void); -extern int qcom_scm_ice_invalidate_key(u32 index); -extern int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size, - enum qcom_scm_ice_cipher cipher, - u32 data_unit_size); - -extern bool qcom_scm_hdcp_available(void); -extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, - u32 *resp); - -extern int qcom_scm_iommu_set_pt_format(u32 sec_id, u32 ctx_num, u32 pt_fmt); -extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en); - -extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, - u64 limit_node, u32 node_id, u64 version); -extern int qcom_scm_lmh_profile_change(u32 profile_id); -extern bool qcom_scm_lmh_dcvsh_available(void); - -#endif -- cgit v1.2.3 From e0766ea4c8f8f94a605e291aa3672a703dc1d2e4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 8 Feb 2023 12:13:12 +0100 Subject: driver core: bus: constantify the bus_find_* functions All of the bus find and iterator functions do not modify the struct bus_type passed to them, so mark them as constant to enforce this rule. Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20230208111330.439504-4-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index e3094db1e9fa..f0c8bf91b07a 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -149,9 +149,9 @@ int device_match_acpi_handle(struct device *dev, const void *handle); int device_match_any(struct device *dev, const void *unused); /* iterator helpers for buses */ -int bus_for_each_dev(struct bus_type *bus, struct device *start, void *data, +int bus_for_each_dev(const struct bus_type *bus, struct device *start, void *data, int (*fn)(struct device *dev, void *data)); -struct device *bus_find_device(struct bus_type *bus, struct device *start, +struct device *bus_find_device(const struct bus_type *bus, struct device *start, const void *data, int (*match)(struct device *dev, const void *data)); /** @@ -161,7 +161,7 @@ struct device *bus_find_device(struct bus_type *bus, struct device *start, * @start: Device to begin with * @name: name of the device to match */ -static inline struct device *bus_find_device_by_name(struct bus_type *bus, +static inline struct device *bus_find_device_by_name(const struct bus_type *bus, struct device *start, const char *name) { @@ -175,7 +175,7 @@ static inline struct device *bus_find_device_by_name(struct bus_type *bus, * @np: of_node of the device to match. */ static inline struct device * -bus_find_device_by_of_node(struct bus_type *bus, const struct device_node *np) +bus_find_device_by_of_node(const struct bus_type *bus, const struct device_node *np) { return bus_find_device(bus, NULL, np, device_match_of_node); } @@ -187,7 +187,7 @@ bus_find_device_by_of_node(struct bus_type *bus, const struct device_node *np) * @fwnode: fwnode of the device to match. */ static inline struct device * -bus_find_device_by_fwnode(struct bus_type *bus, const struct fwnode_handle *fwnode) +bus_find_device_by_fwnode(const struct bus_type *bus, const struct fwnode_handle *fwnode) { return bus_find_device(bus, NULL, fwnode, device_match_fwnode); } @@ -198,7 +198,7 @@ bus_find_device_by_fwnode(struct bus_type *bus, const struct fwnode_handle *fwno * @bus: bus type * @devt: device type of the device to match. */ -static inline struct device *bus_find_device_by_devt(struct bus_type *bus, +static inline struct device *bus_find_device_by_devt(const struct bus_type *bus, dev_t devt) { return bus_find_device(bus, NULL, &devt, device_match_devt); @@ -211,7 +211,7 @@ static inline struct device *bus_find_device_by_devt(struct bus_type *bus, * @cur: device to begin the search with. */ static inline struct device * -bus_find_next_device(struct bus_type *bus,struct device *cur) +bus_find_next_device(const struct bus_type *bus,struct device *cur) { return bus_find_device(bus, cur, NULL, device_match_any); } @@ -226,19 +226,19 @@ struct acpi_device; * @adev: ACPI COMPANION device to match. */ static inline struct device * -bus_find_device_by_acpi_dev(struct bus_type *bus, const struct acpi_device *adev) +bus_find_device_by_acpi_dev(const struct bus_type *bus, const struct acpi_device *adev) { return bus_find_device(bus, NULL, adev, device_match_acpi_dev); } #else static inline struct device * -bus_find_device_by_acpi_dev(struct bus_type *bus, const void *adev) +bus_find_device_by_acpi_dev(const struct bus_type *bus, const void *adev) { return NULL; } #endif -int bus_for_each_drv(struct bus_type *bus, struct device_driver *start, +int bus_for_each_drv(const struct bus_type *bus, struct device_driver *start, void *data, int (*fn)(struct device_driver *, void *)); void bus_sort_breadthfirst(struct bus_type *bus, int (*compare)(const struct device *a, -- cgit v1.2.3 From 0396f2863f7af3c588033d270f7d979d11cd4708 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 8 Feb 2023 12:13:13 +0100 Subject: driver core: bus: convert bus_create/remove_file to be constant bus_create_file() and bus_remove_file() can be made to take a constant bus pointer, as it should not be modifying anything in the bus structure. Make this change and move the functions to use the internal subsys_get/put() logic as well, to prevent the use of the back-pointer in struct bus_type. Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20230208111330.439504-5-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index f0c8bf91b07a..f6537f5fc535 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -135,9 +135,8 @@ struct bus_attribute { #define BUS_ATTR_WO(_name) \ struct bus_attribute bus_attr_##_name = __ATTR_WO(_name) -extern int __must_check bus_create_file(struct bus_type *, - struct bus_attribute *); -extern void bus_remove_file(struct bus_type *, struct bus_attribute *); +int __must_check bus_create_file(const struct bus_type *bus, struct bus_attribute *attr); +void bus_remove_file(const struct bus_type *bus, struct bus_attribute *attr); /* Generic device matching functions that all busses can use to match with */ int device_match_name(struct device *dev, const void *name); -- cgit v1.2.3 From d2bf38c088e0d5467a0e8a2055d6f95dff5c2125 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 8 Feb 2023 12:13:26 +0100 Subject: driver core: remove private pointer from struct bus_type Now that the driver code has been refactored to not rely on the pointer from a struct bus_type to the private structure it can be safely removed from the structure entirely. This will allow most bus_type structures to now be marked as const. Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20230208111330.439504-18-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index f6537f5fc535..22bf63469275 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -66,8 +66,6 @@ struct fwnode_handle; * @iommu_ops: IOMMU specific operations for this bus, used to attach IOMMU * driver implementations to a bus and allow the driver to do * bus-specific setup - * @p: The private data of the driver core, only the driver core can - * touch this. * @lock_key: Lock class key for use by the lock validator * @need_parent_lock: When probing or removing a device on this bus, the * device core should lock the device's parent. @@ -111,8 +109,6 @@ struct bus_type { const struct iommu_ops *iommu_ops; - struct subsys_private *p; - bool need_parent_lock; }; -- cgit v1.2.3 From bc8b7931012f0511f016e2f048d1f4222db8e08f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 8 Feb 2023 12:13:27 +0100 Subject: driver core: bus: constify bus_register/unregister_notifier() The bus_register_notifier() and bus_unregister_notifier() functions should be taking a const * to bus_type, not just a * so fix that up. Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20230208111330.439504-19-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 22bf63469275..c0a034ff59b7 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -246,9 +246,9 @@ void bus_sort_breadthfirst(struct bus_type *bus, */ struct notifier_block; -extern int bus_register_notifier(struct bus_type *bus, +extern int bus_register_notifier(const struct bus_type *bus, struct notifier_block *nb); -extern int bus_unregister_notifier(struct bus_type *bus, +extern int bus_unregister_notifier(const struct bus_type *bus, struct notifier_block *nb); /** -- cgit v1.2.3 From f91482be9b480dfce2616f5ba3fa548b8ed41efb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 8 Feb 2023 12:13:28 +0100 Subject: driver core: bus: constify bus_get_kset() The bus_get_kset() function should be taking a const * to bus_type, not just a * so fix that up. Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20230208111330.439504-20-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index c0a034ff59b7..425d79d6cf69 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -281,6 +281,6 @@ enum bus_notifier_event { BUS_NOTIFY_DRIVER_NOT_BOUND, }; -extern struct kset *bus_get_kset(struct bus_type *bus); +extern struct kset *bus_get_kset(const struct bus_type *bus); #endif -- cgit v1.2.3 From ad8685d0f61a6fc1dc2e5874f4924ff5028c5954 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 8 Feb 2023 12:13:30 +0100 Subject: driver core: bus: constify bus_unregister() The bus_unregister() function can now take a const * to bus_type, not just a * so fix that up. Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20230208111330.439504-22-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 425d79d6cf69..31be18608f83 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -114,7 +114,7 @@ struct bus_type { extern int __must_check bus_register(struct bus_type *bus); -extern void bus_unregister(struct bus_type *bus); +extern void bus_unregister(const struct bus_type *bus); extern int __must_check bus_rescan_devices(struct bus_type *bus); -- cgit v1.2.3 From 491581f40e4c52c4b36c83e8c75b2210ed7c358a Mon Sep 17 00:00:00 2001 From: Elliot Berman Date: Fri, 3 Feb 2023 13:01:32 -0800 Subject: soc: qcom: geni-se: Move qcom-geni-se.h to linux/soc/qcom/geni-se.h Move include/linux/qcom-geni-se.h to include/linux/soc/qcom/geni-se.h. This removes 1 of a few remaining Qualcomm-specific headers into a more approciate subdirectory under include/. Signed-off-by: Elliot Berman Acked-by: Bjorn Andersson Acked-by: Wolfram Sang # for I2C Reviewed-by: Guru Das Srinagesh Link: https://lore.kernel.org/r/20230203210133.3552796-1-quic_eberman@quicinc.com Signed-off-by: Greg Kroah-Hartman --- include/linux/qcom-geni-se.h | 482 --------------------------------------- include/linux/soc/qcom/geni-se.h | 482 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 482 insertions(+), 482 deletions(-) delete mode 100644 include/linux/qcom-geni-se.h create mode 100644 include/linux/soc/qcom/geni-se.h (limited to 'include/linux') diff --git a/include/linux/qcom-geni-se.h b/include/linux/qcom-geni-se.h deleted file mode 100644 index 400213daa461..000000000000 --- a/include/linux/qcom-geni-se.h +++ /dev/null @@ -1,482 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (c) 2017-2018, The Linux Foundation. All rights reserved. - */ - -#ifndef _LINUX_QCOM_GENI_SE -#define _LINUX_QCOM_GENI_SE - -#include - -/** - * enum geni_se_xfer_mode: Transfer modes supported by Serial Engines - * - * @GENI_SE_INVALID: Invalid mode - * @GENI_SE_FIFO: FIFO mode. Data is transferred with SE FIFO - * by programmed IO method - * @GENI_SE_DMA: Serial Engine DMA mode. Data is transferred - * with SE by DMAengine internal to SE - * @GENI_GPI_DMA: GPI DMA mode. Data is transferred using a DMAengine - * configured by a firmware residing on a GSI engine. This DMA name is - * interchangeably used as GSI or GPI which seem to imply the same DMAengine - */ - -enum geni_se_xfer_mode { - GENI_SE_INVALID, - GENI_SE_FIFO, - GENI_SE_DMA, - GENI_GPI_DMA, -}; - -/* Protocols supported by GENI Serial Engines */ -enum geni_se_protocol_type { - GENI_SE_NONE, - GENI_SE_SPI, - GENI_SE_UART, - GENI_SE_I2C, - GENI_SE_I3C, -}; - -struct geni_wrapper; -struct clk; - -enum geni_icc_path_index { - GENI_TO_CORE, - CPU_TO_GENI, - GENI_TO_DDR -}; - -struct geni_icc_path { - struct icc_path *path; - unsigned int avg_bw; -}; - -/** - * struct geni_se - GENI Serial Engine - * @base: Base Address of the Serial Engine's register block - * @dev: Pointer to the Serial Engine device - * @wrapper: Pointer to the parent QUP Wrapper core - * @clk: Handle to the core serial engine clock - * @num_clk_levels: Number of valid clock levels in clk_perf_tbl - * @clk_perf_tbl: Table of clock frequency input to serial engine clock - * @icc_paths: Array of ICC paths for SE - */ -struct geni_se { - void __iomem *base; - struct device *dev; - struct geni_wrapper *wrapper; - struct clk *clk; - unsigned int num_clk_levels; - unsigned long *clk_perf_tbl; - struct geni_icc_path icc_paths[3]; -}; - -/* Common SE registers */ -#define GENI_FORCE_DEFAULT_REG 0x20 -#define SE_GENI_STATUS 0x40 -#define GENI_SER_M_CLK_CFG 0x48 -#define GENI_SER_S_CLK_CFG 0x4c -#define GENI_IF_DISABLE_RO 0x64 -#define GENI_FW_REVISION_RO 0x68 -#define SE_GENI_CLK_SEL 0x7c -#define SE_GENI_DMA_MODE_EN 0x258 -#define SE_GENI_M_CMD0 0x600 -#define SE_GENI_M_CMD_CTRL_REG 0x604 -#define SE_GENI_M_IRQ_STATUS 0x610 -#define SE_GENI_M_IRQ_EN 0x614 -#define SE_GENI_M_IRQ_CLEAR 0x618 -#define SE_GENI_S_CMD0 0x630 -#define SE_GENI_S_CMD_CTRL_REG 0x634 -#define SE_GENI_S_IRQ_STATUS 0x640 -#define SE_GENI_S_IRQ_EN 0x644 -#define SE_GENI_S_IRQ_CLEAR 0x648 -#define SE_GENI_TX_FIFOn 0x700 -#define SE_GENI_RX_FIFOn 0x780 -#define SE_GENI_TX_FIFO_STATUS 0x800 -#define SE_GENI_RX_FIFO_STATUS 0x804 -#define SE_GENI_TX_WATERMARK_REG 0x80c -#define SE_GENI_RX_WATERMARK_REG 0x810 -#define SE_GENI_RX_RFR_WATERMARK_REG 0x814 -#define SE_GENI_IOS 0x908 -#define SE_DMA_TX_IRQ_STAT 0xc40 -#define SE_DMA_TX_IRQ_CLR 0xc44 -#define SE_DMA_TX_FSM_RST 0xc58 -#define SE_DMA_RX_IRQ_STAT 0xd40 -#define SE_DMA_RX_IRQ_CLR 0xd44 -#define SE_DMA_RX_LEN_IN 0xd54 -#define SE_DMA_RX_FSM_RST 0xd58 -#define SE_HW_PARAM_0 0xe24 -#define SE_HW_PARAM_1 0xe28 - -/* GENI_FORCE_DEFAULT_REG fields */ -#define FORCE_DEFAULT BIT(0) - -/* GENI_STATUS fields */ -#define M_GENI_CMD_ACTIVE BIT(0) -#define S_GENI_CMD_ACTIVE BIT(12) - -/* GENI_SER_M_CLK_CFG/GENI_SER_S_CLK_CFG */ -#define SER_CLK_EN BIT(0) -#define CLK_DIV_MSK GENMASK(15, 4) -#define CLK_DIV_SHFT 4 - -/* GENI_IF_DISABLE_RO fields */ -#define FIFO_IF_DISABLE (BIT(0)) - -/* GENI_FW_REVISION_RO fields */ -#define FW_REV_PROTOCOL_MSK GENMASK(15, 8) -#define FW_REV_PROTOCOL_SHFT 8 - -/* GENI_CLK_SEL fields */ -#define CLK_SEL_MSK GENMASK(2, 0) - -/* SE_GENI_DMA_MODE_EN */ -#define GENI_DMA_MODE_EN BIT(0) - -/* GENI_M_CMD0 fields */ -#define M_OPCODE_MSK GENMASK(31, 27) -#define M_OPCODE_SHFT 27 -#define M_PARAMS_MSK GENMASK(26, 0) - -/* GENI_M_CMD_CTRL_REG */ -#define M_GENI_CMD_CANCEL BIT(2) -#define M_GENI_CMD_ABORT BIT(1) -#define M_GENI_DISABLE BIT(0) - -/* GENI_S_CMD0 fields */ -#define S_OPCODE_MSK GENMASK(31, 27) -#define S_OPCODE_SHFT 27 -#define S_PARAMS_MSK GENMASK(26, 0) - -/* GENI_S_CMD_CTRL_REG */ -#define S_GENI_CMD_CANCEL BIT(2) -#define S_GENI_CMD_ABORT BIT(1) -#define S_GENI_DISABLE BIT(0) - -/* GENI_M_IRQ_EN fields */ -#define M_CMD_DONE_EN BIT(0) -#define M_CMD_OVERRUN_EN BIT(1) -#define M_ILLEGAL_CMD_EN BIT(2) -#define M_CMD_FAILURE_EN BIT(3) -#define M_CMD_CANCEL_EN BIT(4) -#define M_CMD_ABORT_EN BIT(5) -#define M_TIMESTAMP_EN BIT(6) -#define M_RX_IRQ_EN BIT(7) -#define M_GP_SYNC_IRQ_0_EN BIT(8) -#define M_GP_IRQ_0_EN BIT(9) -#define M_GP_IRQ_1_EN BIT(10) -#define M_GP_IRQ_2_EN BIT(11) -#define M_GP_IRQ_3_EN BIT(12) -#define M_GP_IRQ_4_EN BIT(13) -#define M_GP_IRQ_5_EN BIT(14) -#define M_IO_DATA_DEASSERT_EN BIT(22) -#define M_IO_DATA_ASSERT_EN BIT(23) -#define M_RX_FIFO_RD_ERR_EN BIT(24) -#define M_RX_FIFO_WR_ERR_EN BIT(25) -#define M_RX_FIFO_WATERMARK_EN BIT(26) -#define M_RX_FIFO_LAST_EN BIT(27) -#define M_TX_FIFO_RD_ERR_EN BIT(28) -#define M_TX_FIFO_WR_ERR_EN BIT(29) -#define M_TX_FIFO_WATERMARK_EN BIT(30) -#define M_SEC_IRQ_EN BIT(31) -#define M_COMMON_GENI_M_IRQ_EN (GENMASK(6, 1) | \ - M_IO_DATA_DEASSERT_EN | \ - M_IO_DATA_ASSERT_EN | M_RX_FIFO_RD_ERR_EN | \ - M_RX_FIFO_WR_ERR_EN | M_TX_FIFO_RD_ERR_EN | \ - M_TX_FIFO_WR_ERR_EN) - -/* GENI_S_IRQ_EN fields */ -#define S_CMD_DONE_EN BIT(0) -#define S_CMD_OVERRUN_EN BIT(1) -#define S_ILLEGAL_CMD_EN BIT(2) -#define S_CMD_FAILURE_EN BIT(3) -#define S_CMD_CANCEL_EN BIT(4) -#define S_CMD_ABORT_EN BIT(5) -#define S_GP_SYNC_IRQ_0_EN BIT(8) -#define S_GP_IRQ_0_EN BIT(9) -#define S_GP_IRQ_1_EN BIT(10) -#define S_GP_IRQ_2_EN BIT(11) -#define S_GP_IRQ_3_EN BIT(12) -#define S_GP_IRQ_4_EN BIT(13) -#define S_GP_IRQ_5_EN BIT(14) -#define S_IO_DATA_DEASSERT_EN BIT(22) -#define S_IO_DATA_ASSERT_EN BIT(23) -#define S_RX_FIFO_RD_ERR_EN BIT(24) -#define S_RX_FIFO_WR_ERR_EN BIT(25) -#define S_RX_FIFO_WATERMARK_EN BIT(26) -#define S_RX_FIFO_LAST_EN BIT(27) -#define S_COMMON_GENI_S_IRQ_EN (GENMASK(5, 1) | GENMASK(13, 9) | \ - S_RX_FIFO_RD_ERR_EN | S_RX_FIFO_WR_ERR_EN) - -/* GENI_/TX/RX/RX_RFR/_WATERMARK_REG fields */ -#define WATERMARK_MSK GENMASK(5, 0) - -/* GENI_TX_FIFO_STATUS fields */ -#define TX_FIFO_WC GENMASK(27, 0) - -/* GENI_RX_FIFO_STATUS fields */ -#define RX_LAST BIT(31) -#define RX_LAST_BYTE_VALID_MSK GENMASK(30, 28) -#define RX_LAST_BYTE_VALID_SHFT 28 -#define RX_FIFO_WC_MSK GENMASK(24, 0) - -/* SE_GENI_IOS fields */ -#define IO2_DATA_IN BIT(1) -#define RX_DATA_IN BIT(0) - -/* SE_DMA_TX_IRQ_STAT Register fields */ -#define TX_DMA_DONE BIT(0) -#define TX_EOT BIT(1) -#define TX_SBE BIT(2) -#define TX_RESET_DONE BIT(3) - -/* SE_DMA_RX_IRQ_STAT Register fields */ -#define RX_DMA_DONE BIT(0) -#define RX_EOT BIT(1) -#define RX_SBE BIT(2) -#define RX_RESET_DONE BIT(3) -#define RX_FLUSH_DONE BIT(4) -#define RX_DMA_PARITY_ERR BIT(5) -#define RX_DMA_BREAK GENMASK(8, 7) -#define RX_GENI_GP_IRQ GENMASK(10, 5) -#define RX_GENI_CANCEL_IRQ BIT(11) -#define RX_GENI_GP_IRQ_EXT GENMASK(13, 12) - -/* SE_HW_PARAM_0 fields */ -#define TX_FIFO_WIDTH_MSK GENMASK(29, 24) -#define TX_FIFO_WIDTH_SHFT 24 -#define TX_FIFO_DEPTH_MSK GENMASK(21, 16) -#define TX_FIFO_DEPTH_SHFT 16 - -/* SE_HW_PARAM_1 fields */ -#define RX_FIFO_WIDTH_MSK GENMASK(29, 24) -#define RX_FIFO_WIDTH_SHFT 24 -#define RX_FIFO_DEPTH_MSK GENMASK(21, 16) -#define RX_FIFO_DEPTH_SHFT 16 - -#define HW_VER_MAJOR_MASK GENMASK(31, 28) -#define HW_VER_MAJOR_SHFT 28 -#define HW_VER_MINOR_MASK GENMASK(27, 16) -#define HW_VER_MINOR_SHFT 16 -#define HW_VER_STEP_MASK GENMASK(15, 0) - -#define GENI_SE_VERSION_MAJOR(ver) ((ver & HW_VER_MAJOR_MASK) >> HW_VER_MAJOR_SHFT) -#define GENI_SE_VERSION_MINOR(ver) ((ver & HW_VER_MINOR_MASK) >> HW_VER_MINOR_SHFT) -#define GENI_SE_VERSION_STEP(ver) (ver & HW_VER_STEP_MASK) - -/* QUP SE VERSION value for major number 2 and minor number 5 */ -#define QUP_SE_VERSION_2_5 0x20050000 - -/* - * Define bandwidth thresholds that cause the underlying Core 2X interconnect - * clock to run at the named frequency. These baseline values are recommended - * by the hardware team, and are not dynamically scaled with GENI bandwidth - * beyond basic on/off. - */ -#define CORE_2X_19_2_MHZ 960 -#define CORE_2X_50_MHZ 2500 -#define CORE_2X_100_MHZ 5000 -#define CORE_2X_150_MHZ 7500 -#define CORE_2X_200_MHZ 10000 -#define CORE_2X_236_MHZ 16383 - -#define GENI_DEFAULT_BW Bps_to_icc(1000) - -#if IS_ENABLED(CONFIG_QCOM_GENI_SE) - -u32 geni_se_get_qup_hw_version(struct geni_se *se); - -/** - * geni_se_read_proto() - Read the protocol configured for a serial engine - * @se: Pointer to the concerned serial engine. - * - * Return: Protocol value as configured in the serial engine. - */ -static inline u32 geni_se_read_proto(struct geni_se *se) -{ - u32 val; - - val = readl_relaxed(se->base + GENI_FW_REVISION_RO); - - return (val & FW_REV_PROTOCOL_MSK) >> FW_REV_PROTOCOL_SHFT; -} - -/** - * geni_se_setup_m_cmd() - Setup the primary sequencer - * @se: Pointer to the concerned serial engine. - * @cmd: Command/Operation to setup in the primary sequencer. - * @params: Parameter for the sequencer command. - * - * This function is used to configure the primary sequencer with the - * command and its associated parameters. - */ -static inline void geni_se_setup_m_cmd(struct geni_se *se, u32 cmd, u32 params) -{ - u32 m_cmd; - - m_cmd = (cmd << M_OPCODE_SHFT) | (params & M_PARAMS_MSK); - writel(m_cmd, se->base + SE_GENI_M_CMD0); -} - -/** - * geni_se_setup_s_cmd() - Setup the secondary sequencer - * @se: Pointer to the concerned serial engine. - * @cmd: Command/Operation to setup in the secondary sequencer. - * @params: Parameter for the sequencer command. - * - * This function is used to configure the secondary sequencer with the - * command and its associated parameters. - */ -static inline void geni_se_setup_s_cmd(struct geni_se *se, u32 cmd, u32 params) -{ - u32 s_cmd; - - s_cmd = readl_relaxed(se->base + SE_GENI_S_CMD0); - s_cmd &= ~(S_OPCODE_MSK | S_PARAMS_MSK); - s_cmd |= (cmd << S_OPCODE_SHFT); - s_cmd |= (params & S_PARAMS_MSK); - writel(s_cmd, se->base + SE_GENI_S_CMD0); -} - -/** - * geni_se_cancel_m_cmd() - Cancel the command configured in the primary - * sequencer - * @se: Pointer to the concerned serial engine. - * - * This function is used to cancel the currently configured command in the - * primary sequencer. - */ -static inline void geni_se_cancel_m_cmd(struct geni_se *se) -{ - writel_relaxed(M_GENI_CMD_CANCEL, se->base + SE_GENI_M_CMD_CTRL_REG); -} - -/** - * geni_se_cancel_s_cmd() - Cancel the command configured in the secondary - * sequencer - * @se: Pointer to the concerned serial engine. - * - * This function is used to cancel the currently configured command in the - * secondary sequencer. - */ -static inline void geni_se_cancel_s_cmd(struct geni_se *se) -{ - writel_relaxed(S_GENI_CMD_CANCEL, se->base + SE_GENI_S_CMD_CTRL_REG); -} - -/** - * geni_se_abort_m_cmd() - Abort the command configured in the primary sequencer - * @se: Pointer to the concerned serial engine. - * - * This function is used to force abort the currently configured command in the - * primary sequencer. - */ -static inline void geni_se_abort_m_cmd(struct geni_se *se) -{ - writel_relaxed(M_GENI_CMD_ABORT, se->base + SE_GENI_M_CMD_CTRL_REG); -} - -/** - * geni_se_abort_s_cmd() - Abort the command configured in the secondary - * sequencer - * @se: Pointer to the concerned serial engine. - * - * This function is used to force abort the currently configured command in the - * secondary sequencer. - */ -static inline void geni_se_abort_s_cmd(struct geni_se *se) -{ - writel_relaxed(S_GENI_CMD_ABORT, se->base + SE_GENI_S_CMD_CTRL_REG); -} - -/** - * geni_se_get_tx_fifo_depth() - Get the TX fifo depth of the serial engine - * @se: Pointer to the concerned serial engine. - * - * This function is used to get the depth i.e. number of elements in the - * TX fifo of the serial engine. - * - * Return: TX fifo depth in units of FIFO words. - */ -static inline u32 geni_se_get_tx_fifo_depth(struct geni_se *se) -{ - u32 val; - - val = readl_relaxed(se->base + SE_HW_PARAM_0); - - return (val & TX_FIFO_DEPTH_MSK) >> TX_FIFO_DEPTH_SHFT; -} - -/** - * geni_se_get_tx_fifo_width() - Get the TX fifo width of the serial engine - * @se: Pointer to the concerned serial engine. - * - * This function is used to get the width i.e. word size per element in the - * TX fifo of the serial engine. - * - * Return: TX fifo width in bits - */ -static inline u32 geni_se_get_tx_fifo_width(struct geni_se *se) -{ - u32 val; - - val = readl_relaxed(se->base + SE_HW_PARAM_0); - - return (val & TX_FIFO_WIDTH_MSK) >> TX_FIFO_WIDTH_SHFT; -} - -/** - * geni_se_get_rx_fifo_depth() - Get the RX fifo depth of the serial engine - * @se: Pointer to the concerned serial engine. - * - * This function is used to get the depth i.e. number of elements in the - * RX fifo of the serial engine. - * - * Return: RX fifo depth in units of FIFO words - */ -static inline u32 geni_se_get_rx_fifo_depth(struct geni_se *se) -{ - u32 val; - - val = readl_relaxed(se->base + SE_HW_PARAM_1); - - return (val & RX_FIFO_DEPTH_MSK) >> RX_FIFO_DEPTH_SHFT; -} - -void geni_se_init(struct geni_se *se, u32 rx_wm, u32 rx_rfr); - -void geni_se_select_mode(struct geni_se *se, enum geni_se_xfer_mode mode); - -void geni_se_config_packing(struct geni_se *se, int bpw, int pack_words, - bool msb_to_lsb, bool tx_cfg, bool rx_cfg); - -int geni_se_resources_off(struct geni_se *se); - -int geni_se_resources_on(struct geni_se *se); - -int geni_se_clk_tbl_get(struct geni_se *se, unsigned long **tbl); - -int geni_se_clk_freq_match(struct geni_se *se, unsigned long req_freq, - unsigned int *index, unsigned long *res_freq, - bool exact); - -int geni_se_tx_dma_prep(struct geni_se *se, void *buf, size_t len, - dma_addr_t *iova); - -int geni_se_rx_dma_prep(struct geni_se *se, void *buf, size_t len, - dma_addr_t *iova); - -void geni_se_tx_dma_unprep(struct geni_se *se, dma_addr_t iova, size_t len); - -void geni_se_rx_dma_unprep(struct geni_se *se, dma_addr_t iova, size_t len); - -int geni_icc_get(struct geni_se *se, const char *icc_ddr); - -int geni_icc_set_bw(struct geni_se *se); -void geni_icc_set_tag(struct geni_se *se, u32 tag); - -int geni_icc_enable(struct geni_se *se); - -int geni_icc_disable(struct geni_se *se); -#endif -#endif diff --git a/include/linux/soc/qcom/geni-se.h b/include/linux/soc/qcom/geni-se.h new file mode 100644 index 000000000000..400213daa461 --- /dev/null +++ b/include/linux/soc/qcom/geni-se.h @@ -0,0 +1,482 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2017-2018, The Linux Foundation. All rights reserved. + */ + +#ifndef _LINUX_QCOM_GENI_SE +#define _LINUX_QCOM_GENI_SE + +#include + +/** + * enum geni_se_xfer_mode: Transfer modes supported by Serial Engines + * + * @GENI_SE_INVALID: Invalid mode + * @GENI_SE_FIFO: FIFO mode. Data is transferred with SE FIFO + * by programmed IO method + * @GENI_SE_DMA: Serial Engine DMA mode. Data is transferred + * with SE by DMAengine internal to SE + * @GENI_GPI_DMA: GPI DMA mode. Data is transferred using a DMAengine + * configured by a firmware residing on a GSI engine. This DMA name is + * interchangeably used as GSI or GPI which seem to imply the same DMAengine + */ + +enum geni_se_xfer_mode { + GENI_SE_INVALID, + GENI_SE_FIFO, + GENI_SE_DMA, + GENI_GPI_DMA, +}; + +/* Protocols supported by GENI Serial Engines */ +enum geni_se_protocol_type { + GENI_SE_NONE, + GENI_SE_SPI, + GENI_SE_UART, + GENI_SE_I2C, + GENI_SE_I3C, +}; + +struct geni_wrapper; +struct clk; + +enum geni_icc_path_index { + GENI_TO_CORE, + CPU_TO_GENI, + GENI_TO_DDR +}; + +struct geni_icc_path { + struct icc_path *path; + unsigned int avg_bw; +}; + +/** + * struct geni_se - GENI Serial Engine + * @base: Base Address of the Serial Engine's register block + * @dev: Pointer to the Serial Engine device + * @wrapper: Pointer to the parent QUP Wrapper core + * @clk: Handle to the core serial engine clock + * @num_clk_levels: Number of valid clock levels in clk_perf_tbl + * @clk_perf_tbl: Table of clock frequency input to serial engine clock + * @icc_paths: Array of ICC paths for SE + */ +struct geni_se { + void __iomem *base; + struct device *dev; + struct geni_wrapper *wrapper; + struct clk *clk; + unsigned int num_clk_levels; + unsigned long *clk_perf_tbl; + struct geni_icc_path icc_paths[3]; +}; + +/* Common SE registers */ +#define GENI_FORCE_DEFAULT_REG 0x20 +#define SE_GENI_STATUS 0x40 +#define GENI_SER_M_CLK_CFG 0x48 +#define GENI_SER_S_CLK_CFG 0x4c +#define GENI_IF_DISABLE_RO 0x64 +#define GENI_FW_REVISION_RO 0x68 +#define SE_GENI_CLK_SEL 0x7c +#define SE_GENI_DMA_MODE_EN 0x258 +#define SE_GENI_M_CMD0 0x600 +#define SE_GENI_M_CMD_CTRL_REG 0x604 +#define SE_GENI_M_IRQ_STATUS 0x610 +#define SE_GENI_M_IRQ_EN 0x614 +#define SE_GENI_M_IRQ_CLEAR 0x618 +#define SE_GENI_S_CMD0 0x630 +#define SE_GENI_S_CMD_CTRL_REG 0x634 +#define SE_GENI_S_IRQ_STATUS 0x640 +#define SE_GENI_S_IRQ_EN 0x644 +#define SE_GENI_S_IRQ_CLEAR 0x648 +#define SE_GENI_TX_FIFOn 0x700 +#define SE_GENI_RX_FIFOn 0x780 +#define SE_GENI_TX_FIFO_STATUS 0x800 +#define SE_GENI_RX_FIFO_STATUS 0x804 +#define SE_GENI_TX_WATERMARK_REG 0x80c +#define SE_GENI_RX_WATERMARK_REG 0x810 +#define SE_GENI_RX_RFR_WATERMARK_REG 0x814 +#define SE_GENI_IOS 0x908 +#define SE_DMA_TX_IRQ_STAT 0xc40 +#define SE_DMA_TX_IRQ_CLR 0xc44 +#define SE_DMA_TX_FSM_RST 0xc58 +#define SE_DMA_RX_IRQ_STAT 0xd40 +#define SE_DMA_RX_IRQ_CLR 0xd44 +#define SE_DMA_RX_LEN_IN 0xd54 +#define SE_DMA_RX_FSM_RST 0xd58 +#define SE_HW_PARAM_0 0xe24 +#define SE_HW_PARAM_1 0xe28 + +/* GENI_FORCE_DEFAULT_REG fields */ +#define FORCE_DEFAULT BIT(0) + +/* GENI_STATUS fields */ +#define M_GENI_CMD_ACTIVE BIT(0) +#define S_GENI_CMD_ACTIVE BIT(12) + +/* GENI_SER_M_CLK_CFG/GENI_SER_S_CLK_CFG */ +#define SER_CLK_EN BIT(0) +#define CLK_DIV_MSK GENMASK(15, 4) +#define CLK_DIV_SHFT 4 + +/* GENI_IF_DISABLE_RO fields */ +#define FIFO_IF_DISABLE (BIT(0)) + +/* GENI_FW_REVISION_RO fields */ +#define FW_REV_PROTOCOL_MSK GENMASK(15, 8) +#define FW_REV_PROTOCOL_SHFT 8 + +/* GENI_CLK_SEL fields */ +#define CLK_SEL_MSK GENMASK(2, 0) + +/* SE_GENI_DMA_MODE_EN */ +#define GENI_DMA_MODE_EN BIT(0) + +/* GENI_M_CMD0 fields */ +#define M_OPCODE_MSK GENMASK(31, 27) +#define M_OPCODE_SHFT 27 +#define M_PARAMS_MSK GENMASK(26, 0) + +/* GENI_M_CMD_CTRL_REG */ +#define M_GENI_CMD_CANCEL BIT(2) +#define M_GENI_CMD_ABORT BIT(1) +#define M_GENI_DISABLE BIT(0) + +/* GENI_S_CMD0 fields */ +#define S_OPCODE_MSK GENMASK(31, 27) +#define S_OPCODE_SHFT 27 +#define S_PARAMS_MSK GENMASK(26, 0) + +/* GENI_S_CMD_CTRL_REG */ +#define S_GENI_CMD_CANCEL BIT(2) +#define S_GENI_CMD_ABORT BIT(1) +#define S_GENI_DISABLE BIT(0) + +/* GENI_M_IRQ_EN fields */ +#define M_CMD_DONE_EN BIT(0) +#define M_CMD_OVERRUN_EN BIT(1) +#define M_ILLEGAL_CMD_EN BIT(2) +#define M_CMD_FAILURE_EN BIT(3) +#define M_CMD_CANCEL_EN BIT(4) +#define M_CMD_ABORT_EN BIT(5) +#define M_TIMESTAMP_EN BIT(6) +#define M_RX_IRQ_EN BIT(7) +#define M_GP_SYNC_IRQ_0_EN BIT(8) +#define M_GP_IRQ_0_EN BIT(9) +#define M_GP_IRQ_1_EN BIT(10) +#define M_GP_IRQ_2_EN BIT(11) +#define M_GP_IRQ_3_EN BIT(12) +#define M_GP_IRQ_4_EN BIT(13) +#define M_GP_IRQ_5_EN BIT(14) +#define M_IO_DATA_DEASSERT_EN BIT(22) +#define M_IO_DATA_ASSERT_EN BIT(23) +#define M_RX_FIFO_RD_ERR_EN BIT(24) +#define M_RX_FIFO_WR_ERR_EN BIT(25) +#define M_RX_FIFO_WATERMARK_EN BIT(26) +#define M_RX_FIFO_LAST_EN BIT(27) +#define M_TX_FIFO_RD_ERR_EN BIT(28) +#define M_TX_FIFO_WR_ERR_EN BIT(29) +#define M_TX_FIFO_WATERMARK_EN BIT(30) +#define M_SEC_IRQ_EN BIT(31) +#define M_COMMON_GENI_M_IRQ_EN (GENMASK(6, 1) | \ + M_IO_DATA_DEASSERT_EN | \ + M_IO_DATA_ASSERT_EN | M_RX_FIFO_RD_ERR_EN | \ + M_RX_FIFO_WR_ERR_EN | M_TX_FIFO_RD_ERR_EN | \ + M_TX_FIFO_WR_ERR_EN) + +/* GENI_S_IRQ_EN fields */ +#define S_CMD_DONE_EN BIT(0) +#define S_CMD_OVERRUN_EN BIT(1) +#define S_ILLEGAL_CMD_EN BIT(2) +#define S_CMD_FAILURE_EN BIT(3) +#define S_CMD_CANCEL_EN BIT(4) +#define S_CMD_ABORT_EN BIT(5) +#define S_GP_SYNC_IRQ_0_EN BIT(8) +#define S_GP_IRQ_0_EN BIT(9) +#define S_GP_IRQ_1_EN BIT(10) +#define S_GP_IRQ_2_EN BIT(11) +#define S_GP_IRQ_3_EN BIT(12) +#define S_GP_IRQ_4_EN BIT(13) +#define S_GP_IRQ_5_EN BIT(14) +#define S_IO_DATA_DEASSERT_EN BIT(22) +#define S_IO_DATA_ASSERT_EN BIT(23) +#define S_RX_FIFO_RD_ERR_EN BIT(24) +#define S_RX_FIFO_WR_ERR_EN BIT(25) +#define S_RX_FIFO_WATERMARK_EN BIT(26) +#define S_RX_FIFO_LAST_EN BIT(27) +#define S_COMMON_GENI_S_IRQ_EN (GENMASK(5, 1) | GENMASK(13, 9) | \ + S_RX_FIFO_RD_ERR_EN | S_RX_FIFO_WR_ERR_EN) + +/* GENI_/TX/RX/RX_RFR/_WATERMARK_REG fields */ +#define WATERMARK_MSK GENMASK(5, 0) + +/* GENI_TX_FIFO_STATUS fields */ +#define TX_FIFO_WC GENMASK(27, 0) + +/* GENI_RX_FIFO_STATUS fields */ +#define RX_LAST BIT(31) +#define RX_LAST_BYTE_VALID_MSK GENMASK(30, 28) +#define RX_LAST_BYTE_VALID_SHFT 28 +#define RX_FIFO_WC_MSK GENMASK(24, 0) + +/* SE_GENI_IOS fields */ +#define IO2_DATA_IN BIT(1) +#define RX_DATA_IN BIT(0) + +/* SE_DMA_TX_IRQ_STAT Register fields */ +#define TX_DMA_DONE BIT(0) +#define TX_EOT BIT(1) +#define TX_SBE BIT(2) +#define TX_RESET_DONE BIT(3) + +/* SE_DMA_RX_IRQ_STAT Register fields */ +#define RX_DMA_DONE BIT(0) +#define RX_EOT BIT(1) +#define RX_SBE BIT(2) +#define RX_RESET_DONE BIT(3) +#define RX_FLUSH_DONE BIT(4) +#define RX_DMA_PARITY_ERR BIT(5) +#define RX_DMA_BREAK GENMASK(8, 7) +#define RX_GENI_GP_IRQ GENMASK(10, 5) +#define RX_GENI_CANCEL_IRQ BIT(11) +#define RX_GENI_GP_IRQ_EXT GENMASK(13, 12) + +/* SE_HW_PARAM_0 fields */ +#define TX_FIFO_WIDTH_MSK GENMASK(29, 24) +#define TX_FIFO_WIDTH_SHFT 24 +#define TX_FIFO_DEPTH_MSK GENMASK(21, 16) +#define TX_FIFO_DEPTH_SHFT 16 + +/* SE_HW_PARAM_1 fields */ +#define RX_FIFO_WIDTH_MSK GENMASK(29, 24) +#define RX_FIFO_WIDTH_SHFT 24 +#define RX_FIFO_DEPTH_MSK GENMASK(21, 16) +#define RX_FIFO_DEPTH_SHFT 16 + +#define HW_VER_MAJOR_MASK GENMASK(31, 28) +#define HW_VER_MAJOR_SHFT 28 +#define HW_VER_MINOR_MASK GENMASK(27, 16) +#define HW_VER_MINOR_SHFT 16 +#define HW_VER_STEP_MASK GENMASK(15, 0) + +#define GENI_SE_VERSION_MAJOR(ver) ((ver & HW_VER_MAJOR_MASK) >> HW_VER_MAJOR_SHFT) +#define GENI_SE_VERSION_MINOR(ver) ((ver & HW_VER_MINOR_MASK) >> HW_VER_MINOR_SHFT) +#define GENI_SE_VERSION_STEP(ver) (ver & HW_VER_STEP_MASK) + +/* QUP SE VERSION value for major number 2 and minor number 5 */ +#define QUP_SE_VERSION_2_5 0x20050000 + +/* + * Define bandwidth thresholds that cause the underlying Core 2X interconnect + * clock to run at the named frequency. These baseline values are recommended + * by the hardware team, and are not dynamically scaled with GENI bandwidth + * beyond basic on/off. + */ +#define CORE_2X_19_2_MHZ 960 +#define CORE_2X_50_MHZ 2500 +#define CORE_2X_100_MHZ 5000 +#define CORE_2X_150_MHZ 7500 +#define CORE_2X_200_MHZ 10000 +#define CORE_2X_236_MHZ 16383 + +#define GENI_DEFAULT_BW Bps_to_icc(1000) + +#if IS_ENABLED(CONFIG_QCOM_GENI_SE) + +u32 geni_se_get_qup_hw_version(struct geni_se *se); + +/** + * geni_se_read_proto() - Read the protocol configured for a serial engine + * @se: Pointer to the concerned serial engine. + * + * Return: Protocol value as configured in the serial engine. + */ +static inline u32 geni_se_read_proto(struct geni_se *se) +{ + u32 val; + + val = readl_relaxed(se->base + GENI_FW_REVISION_RO); + + return (val & FW_REV_PROTOCOL_MSK) >> FW_REV_PROTOCOL_SHFT; +} + +/** + * geni_se_setup_m_cmd() - Setup the primary sequencer + * @se: Pointer to the concerned serial engine. + * @cmd: Command/Operation to setup in the primary sequencer. + * @params: Parameter for the sequencer command. + * + * This function is used to configure the primary sequencer with the + * command and its associated parameters. + */ +static inline void geni_se_setup_m_cmd(struct geni_se *se, u32 cmd, u32 params) +{ + u32 m_cmd; + + m_cmd = (cmd << M_OPCODE_SHFT) | (params & M_PARAMS_MSK); + writel(m_cmd, se->base + SE_GENI_M_CMD0); +} + +/** + * geni_se_setup_s_cmd() - Setup the secondary sequencer + * @se: Pointer to the concerned serial engine. + * @cmd: Command/Operation to setup in the secondary sequencer. + * @params: Parameter for the sequencer command. + * + * This function is used to configure the secondary sequencer with the + * command and its associated parameters. + */ +static inline void geni_se_setup_s_cmd(struct geni_se *se, u32 cmd, u32 params) +{ + u32 s_cmd; + + s_cmd = readl_relaxed(se->base + SE_GENI_S_CMD0); + s_cmd &= ~(S_OPCODE_MSK | S_PARAMS_MSK); + s_cmd |= (cmd << S_OPCODE_SHFT); + s_cmd |= (params & S_PARAMS_MSK); + writel(s_cmd, se->base + SE_GENI_S_CMD0); +} + +/** + * geni_se_cancel_m_cmd() - Cancel the command configured in the primary + * sequencer + * @se: Pointer to the concerned serial engine. + * + * This function is used to cancel the currently configured command in the + * primary sequencer. + */ +static inline void geni_se_cancel_m_cmd(struct geni_se *se) +{ + writel_relaxed(M_GENI_CMD_CANCEL, se->base + SE_GENI_M_CMD_CTRL_REG); +} + +/** + * geni_se_cancel_s_cmd() - Cancel the command configured in the secondary + * sequencer + * @se: Pointer to the concerned serial engine. + * + * This function is used to cancel the currently configured command in the + * secondary sequencer. + */ +static inline void geni_se_cancel_s_cmd(struct geni_se *se) +{ + writel_relaxed(S_GENI_CMD_CANCEL, se->base + SE_GENI_S_CMD_CTRL_REG); +} + +/** + * geni_se_abort_m_cmd() - Abort the command configured in the primary sequencer + * @se: Pointer to the concerned serial engine. + * + * This function is used to force abort the currently configured command in the + * primary sequencer. + */ +static inline void geni_se_abort_m_cmd(struct geni_se *se) +{ + writel_relaxed(M_GENI_CMD_ABORT, se->base + SE_GENI_M_CMD_CTRL_REG); +} + +/** + * geni_se_abort_s_cmd() - Abort the command configured in the secondary + * sequencer + * @se: Pointer to the concerned serial engine. + * + * This function is used to force abort the currently configured command in the + * secondary sequencer. + */ +static inline void geni_se_abort_s_cmd(struct geni_se *se) +{ + writel_relaxed(S_GENI_CMD_ABORT, se->base + SE_GENI_S_CMD_CTRL_REG); +} + +/** + * geni_se_get_tx_fifo_depth() - Get the TX fifo depth of the serial engine + * @se: Pointer to the concerned serial engine. + * + * This function is used to get the depth i.e. number of elements in the + * TX fifo of the serial engine. + * + * Return: TX fifo depth in units of FIFO words. + */ +static inline u32 geni_se_get_tx_fifo_depth(struct geni_se *se) +{ + u32 val; + + val = readl_relaxed(se->base + SE_HW_PARAM_0); + + return (val & TX_FIFO_DEPTH_MSK) >> TX_FIFO_DEPTH_SHFT; +} + +/** + * geni_se_get_tx_fifo_width() - Get the TX fifo width of the serial engine + * @se: Pointer to the concerned serial engine. + * + * This function is used to get the width i.e. word size per element in the + * TX fifo of the serial engine. + * + * Return: TX fifo width in bits + */ +static inline u32 geni_se_get_tx_fifo_width(struct geni_se *se) +{ + u32 val; + + val = readl_relaxed(se->base + SE_HW_PARAM_0); + + return (val & TX_FIFO_WIDTH_MSK) >> TX_FIFO_WIDTH_SHFT; +} + +/** + * geni_se_get_rx_fifo_depth() - Get the RX fifo depth of the serial engine + * @se: Pointer to the concerned serial engine. + * + * This function is used to get the depth i.e. number of elements in the + * RX fifo of the serial engine. + * + * Return: RX fifo depth in units of FIFO words + */ +static inline u32 geni_se_get_rx_fifo_depth(struct geni_se *se) +{ + u32 val; + + val = readl_relaxed(se->base + SE_HW_PARAM_1); + + return (val & RX_FIFO_DEPTH_MSK) >> RX_FIFO_DEPTH_SHFT; +} + +void geni_se_init(struct geni_se *se, u32 rx_wm, u32 rx_rfr); + +void geni_se_select_mode(struct geni_se *se, enum geni_se_xfer_mode mode); + +void geni_se_config_packing(struct geni_se *se, int bpw, int pack_words, + bool msb_to_lsb, bool tx_cfg, bool rx_cfg); + +int geni_se_resources_off(struct geni_se *se); + +int geni_se_resources_on(struct geni_se *se); + +int geni_se_clk_tbl_get(struct geni_se *se, unsigned long **tbl); + +int geni_se_clk_freq_match(struct geni_se *se, unsigned long req_freq, + unsigned int *index, unsigned long *res_freq, + bool exact); + +int geni_se_tx_dma_prep(struct geni_se *se, void *buf, size_t len, + dma_addr_t *iova); + +int geni_se_rx_dma_prep(struct geni_se *se, void *buf, size_t len, + dma_addr_t *iova); + +void geni_se_tx_dma_unprep(struct geni_se *se, dma_addr_t iova, size_t len); + +void geni_se_rx_dma_unprep(struct geni_se *se, dma_addr_t iova, size_t len); + +int geni_icc_get(struct geni_se *se, const char *icc_ddr); + +int geni_icc_set_bw(struct geni_se *se); +void geni_icc_set_tag(struct geni_se *se, u32 tag); + +int geni_icc_enable(struct geni_se *se); + +int geni_icc_disable(struct geni_se *se); +#endif +#endif -- cgit v1.2.3 From 2b48f52f2bff8e8926165983f3a3d7b89b33de08 Mon Sep 17 00:00:00 2001 From: Matthew Rosato Date: Fri, 3 Feb 2023 16:50:26 -0500 Subject: vfio: fix deadlock between group lock and kvm lock After 51cdc8bc120e, we have another deadlock scenario between the kvm->lock and the vfio group_lock with two different codepaths acquiring the locks in different order. Specifically in vfio_open_device, vfio holds the vfio group_lock when issuing device->ops->open_device but some drivers (like vfio-ap) need to acquire kvm->lock during their open_device routine; Meanwhile, kvm_vfio_release will acquire the kvm->lock first before calling vfio_file_set_kvm which will acquire the vfio group_lock. To resolve this, let's remove the need for the vfio group_lock from the kvm_vfio_release codepath. This is done by introducing a new spinlock to protect modifications to the vfio group kvm pointer, and acquiring a kvm ref from within vfio while holding this spinlock, with the reference held until the last close for the device in question. Fixes: 51cdc8bc120e ("kvm/vfio: Fix potential deadlock on vfio group_lock") Reported-by: Anthony Krowiak Suggested-by: Jason Gunthorpe Signed-off-by: Matthew Rosato Tested-by: Tony Krowiak Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Link: https://lore.kernel.org/r/20230203215027.151988-2-mjrosato@linux.ibm.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 35be78e9ae57..87ff862ff555 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -46,7 +46,6 @@ struct vfio_device { struct vfio_device_set *dev_set; struct list_head dev_set_list; unsigned int migration_flags; - /* Driver must reference the kvm during open_device or never touch it */ struct kvm *kvm; /* Members below here are private, not for driver use */ @@ -58,6 +57,7 @@ struct vfio_device { struct list_head group_next; struct list_head iommu_entry; struct iommufd_access *iommufd_access; + void (*put_kvm)(struct kvm *kvm); #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_device *iommufd_device; struct iommufd_ctx *iommufd_ictx; -- cgit v1.2.3 From fae9068022186b832534ea8b325d5242586c4303 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Thu, 9 Feb 2023 00:12:09 -0800 Subject: vfio: Update the kdoc for vfio_device_ops this is missed when adding bind_iommufd/unbind_iommufd and attach_ioas. Signed-off-by: Yi Liu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20230209081210.141372-2-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 87ff862ff555..93134b023968 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -70,6 +70,10 @@ struct vfio_device { * * @init: initialize private fields in device structure * @release: Reclaim private fields in device structure + * @bind_iommufd: Called when binding the device to an iommufd + * @unbind_iommufd: Opposite of bind_iommufd + * @attach_ioas: Called when attaching device to an IOAS/HWPT managed by the + * bound iommufd. Undo in unbind_iommufd. * @open_device: Called when the first file descriptor is opened for this device * @close_device: Opposite of open_device * @read: Perform read(2) on device file descriptor -- cgit v1.2.3 From 19409796578c879a41e88ddbdbce50c19457658d Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Wed, 23 Nov 2022 10:55:26 +0100 Subject: include/linux/bcd.h: provide bcd_is_valid() helper bcd2bin(0x0A) happily returns 10, despite this being an invalid BCD value. RTC drivers converting possibly corrupted BCD timestamps might want to validate their input before calling bcd2bin(). Provide a macro to do so. Unlike bcd2bin and bin2bcd, out-of-line versions are not implemented. Should the macro experience enough use, this can be retrofitted. Signed-off-by: Ahmad Fatoum Signed-off-by: Sascha Hauer Link: https://lore.kernel.org/r/20221123095527.2771434-2-s.hauer@pengutronix.de Signed-off-by: Alexandre Belloni --- include/linux/bcd.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcd.h b/include/linux/bcd.h index 118bea36d7d4..abbc8149178e 100644 --- a/include/linux/bcd.h +++ b/include/linux/bcd.h @@ -14,8 +14,12 @@ const_bin2bcd(x) : \ _bin2bcd(x)) +#define bcd_is_valid(x) \ + const_bcd_is_valid(x) + #define const_bcd2bin(x) (((x) & 0x0f) + ((x) >> 4) * 10) #define const_bin2bcd(x) ((((x) / 10) << 4) + (x) % 10) +#define const_bcd_is_valid(x) (((x) & 0x0f) < 10 && ((x) >> 4) < 10) unsigned _bcd2bin(unsigned char val) __attribute_const__; unsigned char _bin2bcd(unsigned val) __attribute_const__; -- cgit v1.2.3 From c643e6ebedb435bcf863001f5e69a578f2658055 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 3 Feb 2023 16:28:40 -0500 Subject: mm: fix memcpy_from_file_folio() integer underflow If we have a HIGHMEM system with a large folio, 'offset' may be larger than PAGE_SIZE, and so min_t will cap at 'len' instead of the intended end-of-page. That can overflow into the next page which is likely to be unmapped and fault, but could theoretically copy the wrong data. Link: https://lkml.kernel.org/r/Y919vmSrtAgsf6K3@casper.infradead.org Fixes: 00cdf76012ab ("mm: add memcpy_from_file_folio()") Signed-off-by: Matthew Wilcox (Oracle) Cc: "Fabio M. De Francesco" Cc: Ira Weiny Signed-off-by: Andrew Morton --- include/linux/highmem.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 348701dae77f..b06254e76d99 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -431,9 +431,10 @@ static inline size_t memcpy_from_file_folio(char *to, struct folio *folio, size_t offset = offset_in_folio(folio, pos); char *from = kmap_local_folio(folio, offset); - if (folio_test_highmem(folio)) + if (folio_test_highmem(folio)) { + offset = offset_in_page(offset); len = min_t(size_t, len, PAGE_SIZE - offset); - else + } else len = min(len, folio_size(folio) - offset); memcpy(to, from, len); -- cgit v1.2.3 From e7f43ca99fc8bff2333547bb08dae20a35a23450 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:02 -0500 Subject: maple_tree: add mas_init() function Patch series "VMA tree type safety and remove __vma_adjust()", v4. This patchset does two things: 1. Clean up, including removal of __vma_adjust() and 2. Extends the VMA iterator API to provide type safety to the VMA operations using the maple tree, as requested by Linus [1]. It also addresses another issue of usability brought up by Linus about needing to modify the maple state within the loops. The maple state has been replaced by the VMA iterator and the iterator is now modified within the MM code so the caller should not need to worry about doing the work themselves when tree modifications occur. This brought up a potential inconsistency of the iterator state and what the user expects, so the inconsistency is addressed to keep the VMA iterator safe for use after the looping over a VMA range. This is addressed in patch 3 ("maple_tree: Reduce user error potential") and 4 ("test_maple_tree: Test modifications while iterating"). While cleaning up the state, the duplicate locking code in mm/mmap.c introduced by the maple tree has been address by abstracting it to two functions: vma_prepare() and vma_complete(). These abstractions allowed for a much simpler __vma_adjust(), which eventually leads to the removal of the __vma_adjust() function by placing the logic into the vma_merge() function itself. 1. https://lore.kernel.org/linux-mm/CAHk-=wg9WQXBGkNdKD2bqocnN73rDswuWsavBB7T-tekykEn_A@mail.gmail.com/ This patch (of 49): Add a function that will zero out the maple state struct and set some basic defaults. Link: https://lkml.kernel.org/r/20230120162650.984577-1-Liam.Howlett@oracle.com Link: https://lkml.kernel.org/r/20230120162650.984577-2-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/maple_tree.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h index a7bf58fd7cc6..1fadb5f5978b 100644 --- a/include/linux/maple_tree.h +++ b/include/linux/maple_tree.h @@ -432,6 +432,7 @@ struct ma_wr_state { .min = 0, \ .max = ULONG_MAX, \ .alloc = NULL, \ + .mas_flags = 0, \ } #define MA_WR_STATE(name, ma_state, wr_entry) \ @@ -470,6 +471,16 @@ void *mas_next(struct ma_state *mas, unsigned long max); int mas_empty_area(struct ma_state *mas, unsigned long min, unsigned long max, unsigned long size); +static inline void mas_init(struct ma_state *mas, struct maple_tree *tree, + unsigned long addr) +{ + memset(mas, 0, sizeof(struct ma_state)); + mas->tree = tree; + mas->index = mas->last = addr; + mas->max = ULONG_MAX; + mas->node = MAS_START; +} + /* Checks if a mas has not found anything */ static inline bool mas_is_none(struct ma_state *mas) { -- cgit v1.2.3 From b62b633e048bbddef90b2e55d2e33823187b425f Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:08 -0500 Subject: mm: expand vma iterator interface Add wrappers for the maple tree to the vma iterator. This will provide type safety at compile time. Link: https://lkml.kernel.org/r/20230120162650.984577-8-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 46 ++++++++++++++++++++++++++++++++++++++++++---- include/linux/mm_types.h | 4 +--- 2 files changed, 43 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index c9db257f09b3..b977a90d9829 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -670,16 +670,16 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma) static inline struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max) { - return mas_find(&vmi->mas, max); + return mas_find(&vmi->mas, max - 1); } static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi) { /* - * Uses vma_find() to get the first VMA when the iterator starts. + * Uses mas_find() to get the first VMA when the iterator starts. * Calling mas_next() could skip the first entry. */ - return vma_find(vmi, ULONG_MAX); + return mas_find(&vmi->mas, ULONG_MAX); } static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi) @@ -692,12 +692,50 @@ static inline unsigned long vma_iter_addr(struct vma_iterator *vmi) return vmi->mas.index; } +static inline unsigned long vma_iter_end(struct vma_iterator *vmi) +{ + return vmi->mas.last + 1; +} +static inline int vma_iter_bulk_alloc(struct vma_iterator *vmi, + unsigned long count) +{ + return mas_expected_entries(&vmi->mas, count); +} + +/* Free any unused preallocations */ +static inline void vma_iter_free(struct vma_iterator *vmi) +{ + mas_destroy(&vmi->mas); +} + +static inline int vma_iter_bulk_store(struct vma_iterator *vmi, + struct vm_area_struct *vma) +{ + vmi->mas.index = vma->vm_start; + vmi->mas.last = vma->vm_end - 1; + mas_store(&vmi->mas, vma); + if (unlikely(mas_is_err(&vmi->mas))) + return -ENOMEM; + + return 0; +} + +static inline void vma_iter_invalidate(struct vma_iterator *vmi) +{ + mas_pause(&vmi->mas); +} + +static inline void vma_iter_set(struct vma_iterator *vmi, unsigned long addr) +{ + mas_set(&vmi->mas, addr); +} + #define for_each_vma(__vmi, __vma) \ while (((__vma) = vma_next(&(__vmi))) != NULL) /* The MM code likes to work with exclusive end addresses */ #define for_each_vma_range(__vmi, __vma, __end) \ - while (((__vma) = vma_find(&(__vmi), (__end) - 1)) != NULL) + while (((__vma) = vma_find(&(__vmi), (__end))) != NULL) #ifdef CONFIG_SHMEM /* diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 452920467223..5ca11c6c46e8 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -849,9 +849,7 @@ struct vma_iterator { static inline void vma_iter_init(struct vma_iterator *vmi, struct mm_struct *mm, unsigned long addr) { - vmi->mas.tree = &mm->mm_mt; - vmi->mas.index = addr; - vmi->mas.node = MAS_START; + mas_init(&vmi->mas, &mm->mm_mt, addr); } struct mmu_gather; -- cgit v1.2.3 From 183654ce26a5d5bd7bc11bcb02e8086f02f66d7d Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:13 -0500 Subject: mmap: change do_mas_munmap and do_mas_aligned_munmap() to use vma iterator Start passing the vma iterator through the mm code. This will allow for reuse of the state and cleaner invalidation if necessary. Link: https://lkml.kernel.org/r/20230120162650.984577-13-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index b977a90d9829..152a1362b800 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2905,7 +2905,7 @@ extern unsigned long mmap_region(struct file *file, unsigned long addr, extern unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff, unsigned long *populate, struct list_head *uf); -extern int do_mas_munmap(struct ma_state *mas, struct mm_struct *mm, +extern int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf, bool downgrade); extern int do_munmap(struct mm_struct *, unsigned long, size_t, -- cgit v1.2.3 From f2ebfe43ba6c845e70b6acbabd6c69ab74b3c52e Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:15 -0500 Subject: mm: add temporary vma iterator versions of vma_merge(), split_vma(), and __split_vma() These wrappers are short-lived in this patch set so that each user can be converted on its own. In the end, these functions are renamed in one commit. Link: https://lkml.kernel.org/r/20230120162650.984577-15-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 152a1362b800..956025940053 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2843,11 +2843,20 @@ extern struct vm_area_struct *vma_merge(struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, struct mempolicy *, struct vm_userfaultfd_ctx, struct anon_vma_name *); +extern struct vm_area_struct *vmi_vma_merge(struct vma_iterator *vmi, + struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, + unsigned long end, unsigned long vm_flags, struct anon_vma *, + struct file *, pgoff_t, struct mempolicy *, struct vm_userfaultfd_ctx, + struct anon_vma_name *); extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); extern int __split_vma(struct mm_struct *, struct vm_area_struct *, - unsigned long addr, int new_below); + unsigned long addr, int new_below); +extern int vmi__split_vma(struct vma_iterator *vmi, struct mm_struct *, + struct vm_area_struct *, unsigned long addr, int new_below); extern int split_vma(struct mm_struct *, struct vm_area_struct *, unsigned long addr, int new_below); +extern int vmi_split_vma(struct vma_iterator *vmi, struct mm_struct *, + struct vm_area_struct *, unsigned long addr, int new_below); extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void unlink_file_vma(struct vm_area_struct *); extern struct vm_area_struct *copy_vma(struct vm_area_struct **, -- cgit v1.2.3 From 27b267011296e35dd5c983bf6c53b7230c78f383 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Thu, 26 Jan 2023 16:20:49 -0500 Subject: ipc/shm: introduce new do_vma_munmap() to munmap The shm already has the vma iterator in position for a write. do_vmi_munmap() searches for the correct position and aligns the write, so it is not the right function to use in this case. The shm VMA tree modification is similar to the brk munmap situation, the vma iterator is in position and the VMA is already known. This patch generalizes the brk munmap function do_brk_munmap() to be used for any other callers with the vma iterator already in position to munmap a VMA. Link: https://lkml.kernel.org/r/20230126212049.980501-1-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Reported-by: Sven Schnelle Link: https://lore.kernel.org/linux-mm/yt9dh6wec21a.fsf@linux.ibm.com/ Cc: Arnd Bergmann Signed-off-by: Andrew Morton --- include/linux/mm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 956025940053..5b5f26d6588a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2922,6 +2922,9 @@ extern int do_munmap(struct mm_struct *, unsigned long, size_t, extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior); #ifdef CONFIG_MMU +extern int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct list_head *uf, bool downgrade); extern int __mm_populate(unsigned long addr, unsigned long len, int ignore_errors); static inline void mm_populate(unsigned long addr, unsigned long len) -- cgit v1.2.3 From 2286a6914c776ec34cd97e4573b1466d055cb9de Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:18 -0500 Subject: mm: change mprotect_fixup to vma iterator Use the vma iterator so that the iterator can be invalidated or updated to avoid each caller doing so. Link: https://lkml.kernel.org/r/20230120162650.984577-18-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5b5f26d6588a..144ddfd65992 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2197,9 +2197,9 @@ bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr, extern long change_protection(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long cp_flags); -extern int mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma, - struct vm_area_struct **pprev, unsigned long start, - unsigned long end, unsigned long newflags); +extern int mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb, + struct vm_area_struct *vma, struct vm_area_struct **pprev, + unsigned long start, unsigned long end, unsigned long newflags); /* * doesn't attempt to fault and will return short. -- cgit v1.2.3 From 9760ebffbf5507320e0de41f5b80089bdef996a0 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:30 -0500 Subject: mm: switch vma_merge(), split_vma(), and __split_vma to vma iterator Drop the vmi_* functions and transition all users to use the vma iterator directly. Link: https://lkml.kernel.org/r/20230120162650.984577-30-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 144ddfd65992..f3b49feb5c35 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2839,24 +2839,16 @@ static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start, { return __vma_adjust(vma, start, end, pgoff, insert, NULL); } -extern struct vm_area_struct *vma_merge(struct mm_struct *, - struct vm_area_struct *prev, unsigned long addr, unsigned long end, - unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, - struct mempolicy *, struct vm_userfaultfd_ctx, struct anon_vma_name *); -extern struct vm_area_struct *vmi_vma_merge(struct vma_iterator *vmi, +extern struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, struct mempolicy *, struct vm_userfaultfd_ctx, struct anon_vma_name *); extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); -extern int __split_vma(struct mm_struct *, struct vm_area_struct *, - unsigned long addr, int new_below); -extern int vmi__split_vma(struct vma_iterator *vmi, struct mm_struct *, - struct vm_area_struct *, unsigned long addr, int new_below); -extern int split_vma(struct mm_struct *, struct vm_area_struct *, - unsigned long addr, int new_below); -extern int vmi_split_vma(struct vma_iterator *vmi, struct mm_struct *, - struct vm_area_struct *, unsigned long addr, int new_below); +extern int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *, + unsigned long addr, int new_below); +extern int split_vma(struct vma_iterator *vmi, struct vm_area_struct *, + unsigned long addr, int new_below); extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void unlink_file_vma(struct vm_area_struct *); extern struct vm_area_struct *copy_vma(struct vm_area_struct **, -- cgit v1.2.3 From fbcc3104b8437cc1babf04421e8bb8181561343e Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:32 -0500 Subject: mmap: convert __vma_adjust() to use vma iterator Use the vma iterator internally for __vma_adjust(). Avoid using the maple tree interface directly for type safety. Link: https://lkml.kernel.org/r/20230120162650.984577-32-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index f3b49feb5c35..2f62d687e9bd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2856,9 +2856,6 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **, bool *need_rmap_locks); extern void exit_mmap(struct mm_struct *); -void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas); -void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas); - static inline int check_data_rlimit(unsigned long rlim, unsigned long new, unsigned long start, -- cgit v1.2.3 From 9e56044625a1f472edc278105f41a60726991d89 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:33 -0500 Subject: mm: pass through vma iterator to __vma_adjust() Pass the vma iterator through to __vma_adjust() so the state can be updated. Link: https://lkml.kernel.org/r/20230120162650.984577-33-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2f62d687e9bd..9c15f401f295 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2831,13 +2831,15 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node); /* mmap.c */ extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); -extern int __vma_adjust(struct vm_area_struct *vma, unsigned long start, +extern int __vma_adjust(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert, struct vm_area_struct *expand); static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert) { - return __vma_adjust(vma, start, end, pgoff, insert, NULL); + VMA_ITERATOR(vmi, vma->vm_mm, start); + + return __vma_adjust(&vmi, vma, start, end, pgoff, insert, NULL); } extern struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, -- cgit v1.2.3 From b373037fa9bb374f26bbabc0779fe990d02d33b7 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:37 -0500 Subject: mm: add vma iterator to vma_adjust() arguments Change the vma_adjust() function definition to accept the vma iterator and pass it through to __vma_adjust(). Update fs/exec to use the new vma_adjust() function parameters. Update mm/mremap to use the new vma_adjust() function parameters. Revert the __split_vma() calls back from __vma_adjust() to vma_adjust() and pass through the vma iterator. Link: https://lkml.kernel.org/r/20230120162650.984577-37-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9c15f401f295..2e95287a9f74 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2834,12 +2834,11 @@ extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admi extern int __vma_adjust(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert, struct vm_area_struct *expand); -static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start, - unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert) +static inline int vma_adjust(struct vma_iterator *vmi, + struct vm_area_struct *vma, unsigned long start, unsigned long end, + pgoff_t pgoff, struct vm_area_struct *insert) { - VMA_ITERATOR(vmi, vma->vm_mm, start); - - return __vma_adjust(&vmi, vma, start, end, pgoff, insert, NULL); + return __vma_adjust(vmi, vma, start, end, pgoff, insert, NULL); } extern struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, -- cgit v1.2.3 From b2b3b886738fec5e89ca9ebc720eba1a8f615753 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:44 -0500 Subject: mm: don't use __vma_adjust() in __split_vma() Use the abstracted locking and maple tree operations. Since __split_vma() is the only user of the __vma_adjust() function to use the insert argument, drop that argument. Remove the NULL passed through from fs/exec's shift_arg_pages() and mremap() at the same time. Link: https://lkml.kernel.org/r/20230120162650.984577-44-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2e95287a9f74..3845de5d2581 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2832,13 +2832,12 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node); /* mmap.c */ extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); extern int __vma_adjust(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long start, - unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert, - struct vm_area_struct *expand); + unsigned long end, pgoff_t pgoff, struct vm_area_struct *expand); static inline int vma_adjust(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long start, unsigned long end, - pgoff_t pgoff, struct vm_area_struct *insert) + pgoff_t pgoff) { - return __vma_adjust(vmi, vma, start, end, pgoff, insert, NULL); + return __vma_adjust(vmi, vma, start, end, pgoff, NULL); } extern struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, -- cgit v1.2.3 From 7c9813e886bb52495ff5b97d4b0f1320d36d869b Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:45 -0500 Subject: mm/mremap: convert vma_adjust() to vma_expand() Stop using vma_adjust() in preparation for removing the function. Export vma_expand() to use instead. Link: https://lkml.kernel.org/r/20230120162650.984577-45-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3845de5d2581..245fb30858c9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2839,6 +2839,9 @@ static inline int vma_adjust(struct vma_iterator *vmi, { return __vma_adjust(vmi, vma, start, end, pgoff, NULL); } +extern int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, + unsigned long start, unsigned long end, pgoff_t pgoff, + struct vm_area_struct *next); extern struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, struct anon_vma *, -- cgit v1.2.3 From cf51e86dfbe39b7cae3a9de650d035af22dd5fb4 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Fri, 20 Jan 2023 11:26:46 -0500 Subject: mm/mmap: don't use __vma_adjust() in shift_arg_pages() Introduce shrink_vma() which uses the vma_prepare() and vma_complete() functions to reduce the vma coverage. Convert shift_arg_pages() to use expand_vma() and the new shrink_vma() function. Remove support from __vma_adjust() to reduce a vma size since shift_arg_pages() is the only user that shrinks a VMA in this way. Link: https://lkml.kernel.org/r/20230120162650.984577-46-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett Signed-off-by: Andrew Morton --- include/linux/mm.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 245fb30858c9..dcc34533d2f6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2831,17 +2831,11 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node); /* mmap.c */ extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); -extern int __vma_adjust(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long start, - unsigned long end, pgoff_t pgoff, struct vm_area_struct *expand); -static inline int vma_adjust(struct vma_iterator *vmi, - struct vm_area_struct *vma, unsigned long start, unsigned long end, - pgoff_t pgoff) -{ - return __vma_adjust(vmi, vma, start, end, pgoff, NULL); -} extern int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff, struct vm_area_struct *next); +extern int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma, + unsigned long start, unsigned long end, pgoff_t pgoff); extern struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, struct anon_vma *, -- cgit v1.2.3 From bc292ab00f6c7a661a8a605c714e8a148f629ef6 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 26 Jan 2023 11:37:47 -0800 Subject: mm: introduce vma->vm_flags wrapper functions vm_flags are among VMA attributes which affect decisions like VMA merging and splitting. Therefore all vm_flags modifications are performed after taking exclusive mmap_lock to prevent vm_flags updates racing with such operations. Introduce modifier functions for vm_flags to be used whenever flags are updated. This way we can better check and control correct locking behavior during these updates. Link: https://lkml.kernel.org/r/20230126193752.297968-3-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Davidlohr Bueso Acked-by: Michal Hocko Acked-by: Mel Gorman Acked-by: Mike Rapoport (IBM) Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Andy Lutomirski Cc: Arjun Roy Cc: Axel Rasmussen Cc: David Hildenbrand Cc: David Howells Cc: David Rientjes Cc: Eric Dumazet Cc: Greg Thelen Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jann Horn Cc: Joel Fernandes Cc: Johannes Weiner Cc: Kent Overstreet Cc: Laurent Dufour Cc: Liam R. Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcox Cc: Minchan Kim Cc: Paul E. McKenney Cc: Peter Oskolkov Cc: Peter Xu Cc: Peter Zijlstra Cc: Punit Agrawal Cc: Sebastian Andrzej Siewior Cc: Sebastian Reichel Cc: Shakeel Butt Cc: Soheil Hassas Yeganeh Cc: Song Liu Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/mm.h | 40 ++++++++++++++++++++++++++++++++++++++++ include/linux/mm_types.h | 10 +++++++++- 2 files changed, 49 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index dcc34533d2f6..e2df5d122b67 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -627,6 +627,46 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) INIT_LIST_HEAD(&vma->anon_vma_chain); } +/* Use when VMA is not part of the VMA tree and needs no locking */ +static inline void vm_flags_init(struct vm_area_struct *vma, + vm_flags_t flags) +{ + ACCESS_PRIVATE(vma, __vm_flags) = flags; +} + +/* Use when VMA is part of the VMA tree and modifications need coordination */ +static inline void vm_flags_reset(struct vm_area_struct *vma, + vm_flags_t flags) +{ + mmap_assert_write_locked(vma->vm_mm); + vm_flags_init(vma, flags); +} + +static inline void vm_flags_set(struct vm_area_struct *vma, + vm_flags_t flags) +{ + mmap_assert_write_locked(vma->vm_mm); + ACCESS_PRIVATE(vma, __vm_flags) |= flags; +} + +static inline void vm_flags_clear(struct vm_area_struct *vma, + vm_flags_t flags) +{ + mmap_assert_write_locked(vma->vm_mm); + ACCESS_PRIVATE(vma, __vm_flags) &= ~flags; +} + +/* + * Use only when the order of set/clear operations is unimportant, otherwise + * use vm_flags_{set|clear} explicitly. + */ +static inline void vm_flags_mod(struct vm_area_struct *vma, + vm_flags_t set, vm_flags_t clear) +{ + mmap_assert_write_locked(vma->vm_mm); + vm_flags_init(vma, (vma->vm_flags | set) & ~clear); +} + static inline void vma_set_anonymous(struct vm_area_struct *vma) { vma->vm_ops = NULL; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5ca11c6c46e8..10a1e41f4e70 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -491,7 +491,15 @@ struct vm_area_struct { * See vmf_insert_mixed_prot() for discussion. */ pgprot_t vm_page_prot; - unsigned long vm_flags; /* Flags, see mm.h. */ + + /* + * Flags, see mm.h. + * To modify use vm_flags_{init|reset|set|clear|mod} functions. + */ + union { + const vm_flags_t vm_flags; + vm_flags_t __private __vm_flags; + }; /* * For areas with an address space and backing store, -- cgit v1.2.3 From e430a95a04efc557bc4ff9b3035c7c85aee5d63f Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 26 Jan 2023 11:37:48 -0800 Subject: mm: replace VM_LOCKED_CLEAR_MASK with VM_LOCKED_MASK To simplify the usage of VM_LOCKED_CLEAR_MASK in vm_flags_clear(), replace it with VM_LOCKED_MASK bitmask and convert all users. Link: https://lkml.kernel.org/r/20230126193752.297968-4-surenb@google.com Signed-off-by: Suren Baghdasaryan Acked-by: Michal Hocko Acked-by: Mel Gorman Acked-by: Mike Rapoport (IBM) Reviewed-by: Davidlohr Bueso Cc: Andy Lutomirski Cc: Arjun Roy Cc: Axel Rasmussen Cc: David Hildenbrand Cc: David Howells Cc: David Rientjes Cc: Eric Dumazet Cc: Greg Thelen Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jann Horn Cc: Joel Fernandes Cc: Johannes Weiner Cc: Kent Overstreet Cc: Laurent Dufour Cc: Liam R. Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcox Cc: Minchan Kim Cc: Paul E. McKenney Cc: Peter Oskolkov Cc: Peter Xu Cc: Peter Zijlstra Cc: Punit Agrawal Cc: Sebastian Andrzej Siewior Cc: Sebastian Reichel Cc: Shakeel Butt Cc: Soheil Hassas Yeganeh Cc: Song Liu Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index e2df5d122b67..663726ca2240 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -421,8 +421,8 @@ extern unsigned int kobjsize(const void *objp); /* This mask defines which mm->def_flags a process can inherit its parent */ #define VM_INIT_DEF_MASK VM_NOHUGEPAGE -/* This mask is used to clear all the VMA flags used by mlock */ -#define VM_LOCKED_CLEAR_MASK (~(VM_LOCKED | VM_LOCKONFAULT)) +/* This mask represents all the VMA flag bits used by mlock */ +#define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT) /* Arch-specific flags to clear when updating VM flags on protection change */ #ifndef VM_ARCH_CLEAR -- cgit v1.2.3 From 1c71222e5f2393b5ea1a41795c67589eea7e3490 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 26 Jan 2023 11:37:49 -0800 Subject: mm: replace vma->vm_flags direct modifications with modifier calls Replace direct modifications to vma->vm_flags with calls to modifier functions to be able to track flag changes and to keep vma locking correctness. [akpm@linux-foundation.org: fix drivers/misc/open-dice.c, per Hyeonggon Yoo] Link: https://lkml.kernel.org/r/20230126193752.297968-5-surenb@google.com Signed-off-by: Suren Baghdasaryan Acked-by: Michal Hocko Acked-by: Mel Gorman Acked-by: Mike Rapoport (IBM) Acked-by: Sebastian Reichel Reviewed-by: Liam R. Howlett Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Andy Lutomirski Cc: Arjun Roy Cc: Axel Rasmussen Cc: David Hildenbrand Cc: David Howells Cc: Davidlohr Bueso Cc: David Rientjes Cc: Eric Dumazet Cc: Greg Thelen Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jann Horn Cc: Joel Fernandes Cc: Johannes Weiner Cc: Kent Overstreet Cc: Laurent Dufour Cc: Lorenzo Stoakes Cc: Matthew Wilcox Cc: Minchan Kim Cc: Paul E. McKenney Cc: Peter Oskolkov Cc: Peter Xu Cc: Peter Zijlstra Cc: Punit Agrawal Cc: Sebastian Andrzej Siewior Cc: Shakeel Butt Cc: Soheil Hassas Yeganeh Cc: Song Liu Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 663726ca2240..ce6d9d765aae 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3653,7 +3653,7 @@ static inline int seal_check_future_write(int seals, struct vm_area_struct *vma) * VM_MAYWRITE as we still want them to be COW-writable. */ if (vma->vm_flags & VM_SHARED) - vma->vm_flags &= ~(VM_MAYWRITE); + vm_flags_clear(vma, VM_MAYWRITE); } return 0; -- cgit v1.2.3 From 68f48381d7fdd1cbb9d88c37a4dfbb98ac78226d Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 26 Jan 2023 11:37:51 -0800 Subject: mm: introduce __vm_flags_mod and use it in untrack_pfn There are scenarios when vm_flags can be modified without exclusive mmap_lock, such as: - after VMA was isolated and mmap_lock was downgraded or dropped - in exit_mmap when there are no other mm users and locking is unnecessary Introduce __vm_flags_mod to avoid assertions when the caller takes responsibility for the required locking. Pass a hint to untrack_pfn to conditionally use __vm_flags_mod for flags modification to avoid assertion. Link: https://lkml.kernel.org/r/20230126193752.297968-7-surenb@google.com Signed-off-by: Suren Baghdasaryan Acked-by: Michal Hocko Acked-by: Mike Rapoport (IBM) Cc: Andy Lutomirski Cc: Arjun Roy Cc: Axel Rasmussen Cc: David Hildenbrand Cc: David Howells Cc: Davidlohr Bueso Cc: David Rientjes Cc: Eric Dumazet Cc: Greg Thelen Cc: Hugh Dickins Cc: Ingo Molnar Cc: Jann Horn Cc: Joel Fernandes Cc: Johannes Weiner Cc: Kent Overstreet Cc: Laurent Dufour Cc: Liam R. Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcox Cc: Mel Gorman Cc: Minchan Kim Cc: Paul E. McKenney Cc: Peter Oskolkov Cc: Peter Xu Cc: Peter Zijlstra Cc: Punit Agrawal Cc: Sebastian Andrzej Siewior Cc: Sebastian Reichel Cc: Shakeel Butt Cc: Soheil Hassas Yeganeh Cc: Song Liu Cc: Vlastimil Babka Cc: Will Deacon Signed-off-by: Andrew Morton --- include/linux/mm.h | 14 ++++++++++++-- include/linux/pgtable.h | 5 +++-- 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ce6d9d765aae..27b34f7730e7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -656,6 +656,16 @@ static inline void vm_flags_clear(struct vm_area_struct *vma, ACCESS_PRIVATE(vma, __vm_flags) &= ~flags; } +/* + * Use only if VMA is not part of the VMA tree or has no other users and + * therefore needs no locking. + */ +static inline void __vm_flags_mod(struct vm_area_struct *vma, + vm_flags_t set, vm_flags_t clear) +{ + vm_flags_init(vma, (vma->vm_flags | set) & ~clear); +} + /* * Use only when the order of set/clear operations is unimportant, otherwise * use vm_flags_{set|clear} explicitly. @@ -664,7 +674,7 @@ static inline void vm_flags_mod(struct vm_area_struct *vma, vm_flags_t set, vm_flags_t clear) { mmap_assert_write_locked(vma->vm_mm); - vm_flags_init(vma, (vma->vm_flags | set) & ~clear); + __vm_flags_mod(vma, set, clear); } static inline void vma_set_anonymous(struct vm_area_struct *vma) @@ -2085,7 +2095,7 @@ static inline void zap_vma_pages(struct vm_area_struct *vma) } void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, struct vm_area_struct *start_vma, unsigned long start, - unsigned long end); + unsigned long end, bool mm_wr_locked); struct mmu_notifier_range; diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 5fd45454c073..c63cd44777ec 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1185,7 +1185,8 @@ static inline int track_pfn_copy(struct vm_area_struct *vma) * can be for the entire vma (in which case pfn, size are zero). */ static inline void untrack_pfn(struct vm_area_struct *vma, - unsigned long pfn, unsigned long size) + unsigned long pfn, unsigned long size, + bool mm_wr_locked) { } @@ -1203,7 +1204,7 @@ extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, pfn_t pfn); extern int track_pfn_copy(struct vm_area_struct *vma); extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, - unsigned long size); + unsigned long size, bool mm_wr_locked); extern void untrack_pfn_moved(struct vm_area_struct *vma); #endif -- cgit v1.2.3 From 601c3c29dbeb049862faa00917f2daf094a71028 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Tue, 31 Jan 2023 16:01:16 -0800 Subject: mm: introduce vm_flags_reset_once to replace WRITE_ONCE vm_flags updates Provide vm_flags_reset_once() and replace the vm_flags updates which used WRITE_ONCE() to prevent compiler optimizations. Link: https://lkml.kernel.org/r/20230201000116.1333160-1-surenb@google.com Fixes: 0cce31a0aa0e ("mm: replace vma->vm_flags direct modifications with modifier calls") Signed-off-by: Suren Baghdasaryan Reported-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com> Acked-by: Michal Hocko Signed-off-by: Andrew Morton --- include/linux/mm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 27b34f7730e7..0ed0cb2401f5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -642,6 +642,13 @@ static inline void vm_flags_reset(struct vm_area_struct *vma, vm_flags_init(vma, flags); } +static inline void vm_flags_reset_once(struct vm_area_struct *vma, + vm_flags_t flags) +{ + mmap_assert_write_locked(vma->vm_mm); + WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags); +} + static inline void vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags) { -- cgit v1.2.3 From 3e629597b8477efbcc0ad14ee80558a080eebdc3 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Feb 2023 16:25:19 +0000 Subject: filemap: add mapping_read_folio_gfp() This is like read_cache_page_gfp() except it returns the folio instead of the precise page. Link: https://lkml.kernel.org/r/20230206162520.4029022-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Charan Teja Kalla Cc: David Rientjes Cc: Hugh Dickins Cc: Mark Hemment Cc: Michal Hocko Cc: Pavankumar Kondeti Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 9f1081683771..6a32ac170d3d 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -756,6 +756,8 @@ static inline struct page *grab_cache_page(struct address_space *mapping, struct folio *read_cache_folio(struct address_space *, pgoff_t index, filler_t *filler, struct file *file); +struct folio *mapping_read_folio_gfp(struct address_space *, pgoff_t index, + gfp_t flags); struct page *read_cache_page(struct address_space *, pgoff_t index, filler_t *filler, struct file *file); extern struct page * read_cache_page_gfp(struct address_space *mapping, -- cgit v1.2.3 From f01b2b3ed8735dacd92f1da548708449525e286a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Feb 2023 16:25:20 +0000 Subject: shmem: add shmem_read_folio() and shmem_read_folio_gfp() These are the folio replacements for shmem_read_mapping_page() and shmem_read_mapping_page_gfp(). [akpm@linux-foundation.org: fix shmem_read_mapping_page_gfp(), per Matthew] Link: https://lkml.kernel.org/r/Y+QdJTuzxeBYejw2@casper.infradead.org Link: https://lkml.kernel.org/r/20230206162520.4029022-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Mark Hemment Cc: Charan Teja Kalla Cc: David Rientjes Cc: Hugh Dickins Cc: Michal Hocko Cc: Pavankumar Kondeti Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/shmem_fs.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index d09d54be4ffd..103d1000a5a2 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -109,6 +109,14 @@ enum sgp_type { int shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop, enum sgp_type sgp); +struct folio *shmem_read_folio_gfp(struct address_space *mapping, + pgoff_t index, gfp_t gfp); + +static inline struct folio *shmem_read_folio(struct address_space *mapping, + pgoff_t index) +{ + return shmem_read_folio_gfp(mapping, index, mapping_gfp_mask(mapping)); +} static inline struct page *shmem_read_mapping_page( struct address_space *mapping, pgoff_t index) -- cgit v1.2.3 From 869176a096068056b338b5cc1b0af93106007f5d Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Mon, 6 Feb 2023 16:40:15 +0800 Subject: mm/vmalloc.c: add flags to mark vm_map_ram area Through vmalloc API, a virtual kernel area is reserved for physical address mapping. And vmap_area is used to track them, while vm_struct is allocated to associate with the vmap_area to store more information and passed out. However, area reserved via vm_map_ram() is an exception. It doesn't have vm_struct to associate with vmap_area. And we can't recognize the vmap_area with '->vm == NULL' as a vm_map_ram() area because the normal freeing path will set va->vm = NULL before unmapping, please see function remove_vm_area(). Meanwhile, there are two kinds of handling for vm_map_ram area. One is the whole vmap_area being reserved and mapped at one time through vm_map_area() interface; the other is the whole vmap_area with VMAP_BLOCK_SIZE size being reserved, while mapped into split regions with smaller size via vb_alloc(). To mark the area reserved through vm_map_ram(), add flags field into struct vmap_area. Bit 0 indicates this is vm_map_ram area created through vm_map_ram() interface, while bit 1 marks out the type of vm_map_ram area which makes use of vmap_block to manage split regions via vb_alloc/free(). This is a preparation for later use. Link: https://lkml.kernel.org/r/20230206084020.174506-3-bhe@redhat.com Signed-off-by: Baoquan He Reviewed-by: Lorenzo Stoakes Reviewed-by: Uladzislau Rezki (Sony) Cc: Dan Carpenter Cc: Stephen Brennan Signed-off-by: Andrew Morton --- include/linux/vmalloc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 096d48aa3437..69250efa03d1 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -76,6 +76,7 @@ struct vmap_area { unsigned long subtree_max_size; /* in "free" tree */ struct vm_struct *vm; /* in "busy" tree */ }; + unsigned long flags; /* mark type of vm_map_ram area */ }; /* archs that select HAVE_ARCH_HUGE_VMAP should override one or more of these */ -- cgit v1.2.3 From 7427c30bea1449a885a1dd9baf991aaad26209ce Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 24 Jan 2023 16:34:23 -0400 Subject: mm/gup: remove obsolete FOLL_LONGTERM comment These days FOLL_LONGTERM is not allowed at all on any get_user_pages*() functions, it must be only be used with pin_user_pages*(), plus it now has universal support for all the pin_user_pages*() functions. Link: https://lkml.kernel.org/r/2-v2-987e91b59705+36b-gup_tidy_jgg@nvidia.com Signed-off-by: Jason Gunthorpe Reviewed-by: John Hubbard Cc: Alistair Popple Cc: Christoph Hellwig Cc: Claudio Imbrenda Cc: David Hildenbrand Cc: David Howells Cc: Mike Rapoport (IBM) Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 10a1e41f4e70..4396c7bf06d1 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1053,12 +1053,6 @@ typedef unsigned int __bitwise zap_flags_t; * specifically failed. Filesystem pages are still subject to bugs and use of * FOLL_LONGTERM should be avoided on those pages. * - * FIXME: Also NOTE that FOLL_LONGTERM is not supported in every GUP call. - * Currently only get_user_pages() and get_user_pages_fast() support this flag - * and calls to get_user_pages_[un]locked are specifically not allowed. This - * is due to an incompatibility with the FS DAX check and - * FAULT_FLAG_ALLOW_RETRY. - * * In the CMA case: long term pins in a CMA region would unnecessarily fragment * that region. And so, CMA attempts to migrate the page before pinning, when * FOLL_LONGTERM is specified. -- cgit v1.2.3 From 7ce154fe6917e7db94d63bc4d6c73b678ad1c581 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 24 Jan 2023 16:34:25 -0400 Subject: mm/gup: move try_grab_page() to mm/internal.h This is part of the internal function of gup.c and is only non-static so that the parts of gup.c in the huge_memory.c and hugetlb.c can call it. Put it in internal.h beside the similarly purposed try_grab_folio() Link: https://lkml.kernel.org/r/4-v2-987e91b59705+36b-gup_tidy_jgg@nvidia.com Signed-off-by: Jason Gunthorpe Reviewed-by: John Hubbard Cc: Alistair Popple Cc: Christoph Hellwig Cc: Claudio Imbrenda Cc: David Hildenbrand Cc: David Howells Cc: Mike Rapoport (IBM) Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 0ed0cb2401f5..afefc166b349 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1268,8 +1268,6 @@ static inline void get_page(struct page *page) folio_get(page_folio(page)); } -int __must_check try_grab_page(struct page *page, unsigned int flags); - static inline __must_check bool try_get_page(struct page *page) { page = compound_head(page); -- cgit v1.2.3 From f04740f54594f85935e29a5c8ff6722f427f3dac Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 24 Jan 2023 16:34:29 -0400 Subject: mm/gup: add FOLL_UNLOCKABLE Setting FOLL_UNLOCKABLE allows GUP to lock/unlock the mmap lock on its own. It is a more explicit replacement for locked != NULL. This clears the way for passing in locked = 1, without intending that the lock can be unlocked. Set the flag in all cases where it is used, eg locked is present in the external interface or locked is used internally with locked = 0. Link: https://lkml.kernel.org/r/8-v2-987e91b59705+36b-gup_tidy_jgg@nvidia.com Signed-off-by: Jason Gunthorpe Acked-by: Mike Rapoport (IBM) Reviewed-by: John Hubbard Cc: Alistair Popple Cc: Christoph Hellwig Cc: Claudio Imbrenda Cc: David Hildenbrand Cc: David Howells Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 4396c7bf06d1..434b3ac8a351 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1104,5 +1104,6 @@ typedef unsigned int __bitwise zap_flags_t; #define FOLL_FAST_ONLY 0x80000 /* gup_fast: prevent fall-back to slow gup */ #define FOLL_PCI_P2PDMA 0x100000 /* allow returning PCI P2PDMA pages */ #define FOLL_INTERRUPTIBLE 0x200000 /* allow interrupts from generic signals */ +#define FOLL_UNLOCKABLE 0x400000 /* allow unlocking the mmap lock (internal only) */ #endif /* _LINUX_MM_TYPES_H */ -- cgit v1.2.3 From edad1bb1fbf7e28b49bf76b2aa66bfcaba00f627 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 24 Jan 2023 16:34:31 -0400 Subject: mm/gup: remove pin_user_pages_fast_only() Commit ed29c2691188 ("drm/i915: Fix userptr so we do not have to worry about obj->mm.lock, v7.") removed the only caller, remove this dead code too. Link: https://lkml.kernel.org/r/10-v2-987e91b59705+36b-gup_tidy_jgg@nvidia.com Signed-off-by: Jason Gunthorpe Acked-by: Mike Rapoport (IBM) Reviewed-by: John Hubbard Reviewed-by: David Hildenbrand Cc: Alistair Popple Cc: Christoph Hellwig Cc: Claudio Imbrenda Cc: David Howells Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index afefc166b349..a0d59645dcf9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2261,8 +2261,6 @@ extern int mprotect_fixup(struct vma_iterator *vmi, struct mmu_gather *tlb, */ int get_user_pages_fast_only(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages); -int pin_user_pages_fast_only(unsigned long start, int nr_pages, - unsigned int gup_flags, struct page **pages); static inline bool get_user_page_fast_only(unsigned long addr, unsigned int gup_flags, struct page **pagep) -- cgit v1.2.3 From 63b605128655f2e3968d99e30b293c7e7eaa2fc2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 24 Jan 2023 16:34:33 -0400 Subject: mm/gup: move gup_must_unshare() to mm/internal.h This function is only used in gup.c and closely related. It touches FOLL_PIN so it must be moved before the next patch. Link: https://lkml.kernel.org/r/12-v2-987e91b59705+36b-gup_tidy_jgg@nvidia.com Signed-off-by: Jason Gunthorpe Reviewed-by: John Hubbard Reviewed-by: David Hildenbrand Cc: Alistair Popple Cc: Christoph Hellwig Cc: Claudio Imbrenda Cc: David Howells Cc: Mike Rapoport (IBM) Signed-off-by: Andrew Morton --- include/linux/mm.h | 65 ------------------------------------------------------ 1 file changed, 65 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index a0d59645dcf9..4a0695ef969a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3180,71 +3180,6 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) return 0; } -/* - * Indicates for which pages that are write-protected in the page table, - * whether GUP has to trigger unsharing via FAULT_FLAG_UNSHARE such that the - * GUP pin will remain consistent with the pages mapped into the page tables - * of the MM. - * - * Temporary unmapping of PageAnonExclusive() pages or clearing of - * PageAnonExclusive() has to protect against concurrent GUP: - * * Ordinary GUP: Using the PT lock - * * GUP-fast and fork(): mm->write_protect_seq - * * GUP-fast and KSM or temporary unmapping (swap, migration): see - * page_try_share_anon_rmap() - * - * Must be called with the (sub)page that's actually referenced via the - * page table entry, which might not necessarily be the head page for a - * PTE-mapped THP. - * - * If the vma is NULL, we're coming from the GUP-fast path and might have - * to fallback to the slow path just to lookup the vma. - */ -static inline bool gup_must_unshare(struct vm_area_struct *vma, - unsigned int flags, struct page *page) -{ - /* - * FOLL_WRITE is implicitly handled correctly as the page table entry - * has to be writable -- and if it references (part of) an anonymous - * folio, that part is required to be marked exclusive. - */ - if ((flags & (FOLL_WRITE | FOLL_PIN)) != FOLL_PIN) - return false; - /* - * Note: PageAnon(page) is stable until the page is actually getting - * freed. - */ - if (!PageAnon(page)) { - /* - * We only care about R/O long-term pining: R/O short-term - * pinning does not have the semantics to observe successive - * changes through the process page tables. - */ - if (!(flags & FOLL_LONGTERM)) - return false; - - /* We really need the vma ... */ - if (!vma) - return true; - - /* - * ... because we only care about writable private ("COW") - * mappings where we have to break COW early. - */ - return is_cow_mapping(vma->vm_flags); - } - - /* Paired with a memory barrier in page_try_share_anon_rmap(). */ - if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) - smp_rmb(); - - /* - * Note that PageKsm() pages cannot be exclusive, and consequently, - * cannot get pinned. - */ - return !PageAnonExclusive(page); -} - /* * Indicates whether GUP can follow a PROT_NONE mapped page, or whether * a (NUMA hinting) fault is required. -- cgit v1.2.3 From 2c2241081f7dec878331fdc3a3f2361e99556bca Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 24 Jan 2023 16:34:34 -0400 Subject: mm/gup: move private gup FOLL_ flags to internal.h Move the flags that should not/are not used outside gup.c and related into mm/internal.h to discourage driver abuse. To make this more maintainable going forward compact the two FOLL ranges with new bit numbers from 0 to 11 and 16 to 21, using shifts so it is explicit. Switch to an enum so the whole thing is easier to read. Link: https://lkml.kernel.org/r/13-v2-987e91b59705+36b-gup_tidy_jgg@nvidia.com Signed-off-by: Jason Gunthorpe Reviewed-by: John Hubbard Acked-by: David Hildenbrand Cc: David Howells Cc: Christoph Hellwig Cc: Claudio Imbrenda Cc: Alistair Popple Cc: Mike Rapoport (IBM) Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 57 +++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 434b3ac8a351..56753d0f096d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1040,9 +1040,6 @@ typedef unsigned int __bitwise zap_flags_t; * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each * other. Here is what they mean, and how to use them: * - * FOLL_LONGTERM indicates that the page will be held for an indefinite time - * period _often_ under userspace control. This is in contrast to - * iov_iter_get_pages(), whose usages are transient. * * FIXME: For pages which are part of a filesystem, mappings are subject to the * lifetime enforced by the filesystem and we need guarantees that longterm @@ -1086,24 +1083,40 @@ typedef unsigned int __bitwise zap_flags_t; * Please see Documentation/core-api/pin_user_pages.rst for more information. */ -#define FOLL_WRITE 0x01 /* check pte is writable */ -#define FOLL_TOUCH 0x02 /* mark page accessed */ -#define FOLL_GET 0x04 /* do get_page on page */ -#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */ -#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ -#define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO - * and return without waiting upon it */ -#define FOLL_NOFAULT 0x80 /* do not fault in pages */ -#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ -#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ -#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ -#define FOLL_ANON 0x8000 /* don't do file mappings */ -#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */ -#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */ -#define FOLL_PIN 0x40000 /* pages must be released via unpin_user_page */ -#define FOLL_FAST_ONLY 0x80000 /* gup_fast: prevent fall-back to slow gup */ -#define FOLL_PCI_P2PDMA 0x100000 /* allow returning PCI P2PDMA pages */ -#define FOLL_INTERRUPTIBLE 0x200000 /* allow interrupts from generic signals */ -#define FOLL_UNLOCKABLE 0x400000 /* allow unlocking the mmap lock (internal only) */ +enum { + /* check pte is writable */ + FOLL_WRITE = 1 << 0, + /* do get_page on page */ + FOLL_GET = 1 << 1, + /* give error on hole if it would be zero */ + FOLL_DUMP = 1 << 2, + /* get_user_pages read/write w/o permission */ + FOLL_FORCE = 1 << 3, + /* + * if a disk transfer is needed, start the IO and return without waiting + * upon it + */ + FOLL_NOWAIT = 1 << 4, + /* do not fault in pages */ + FOLL_NOFAULT = 1 << 5, + /* check page is hwpoisoned */ + FOLL_HWPOISON = 1 << 6, + /* don't do file mappings */ + FOLL_ANON = 1 << 7, + /* + * FOLL_LONGTERM indicates that the page will be held for an indefinite + * time period _often_ under userspace control. This is in contrast to + * iov_iter_get_pages(), whose usages are transient. + */ + FOLL_LONGTERM = 1 << 8, + /* split huge pmd before returning */ + FOLL_SPLIT_PMD = 1 << 9, + /* allow returning PCI P2PDMA pages */ + FOLL_PCI_P2PDMA = 1 << 10, + /* allow interrupts from generic signals */ + FOLL_INTERRUPTIBLE = 1 << 11, + + /* See also internal only FOLL flags in mm/internal.h */ +}; #endif /* _LINUX_MM_TYPES_H */ -- cgit v1.2.3 From 17ce252266c7f016ece026492c45838f852ddc79 Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Thu, 19 Jan 2023 08:32:05 -0800 Subject: dmaengine: xilinx: xdma: Add xilinx xdma driver Add driver to enable PCIe board which uses XDMA (the DMA/Bridge Subsystem for PCI Express). For example, Xilinx Alveo PCIe devices. https://www.xilinx.com/products/boards-and-kits/alveo.html The XDMA engine support up to 4 Host to Card (H2C) and 4 Card to Host (C2H) channels. Memory transfers are specified on a per-channel basis in descriptor linked lists, which the DMA fetches from host memory and processes. Events such as descriptor completion and errors are signaled using interrupts. The hardware detail is provided by https://docs.xilinx.com/r/en-US/pg195-pcie-dma/Introduction This driver implements dmaengine APIs. - probe the available DMA channels - use dma_slave_map for channel lookup - use virtual channel to manage dmaengine tx descriptors - implement device_prep_slave_sg callback to handle host scatter gather list - implement device_config to config device address for DMA transfer Signed-off-by: Lizhi Hou Signed-off-by: Sonal Santan Signed-off-by: Max Zhen Signed-off-by: Brian Xu Tested-by: Martin Tuma Link: https://lore.kernel.org/r/1674145926-29449-2-git-send-email-lizhi.hou@amd.com Signed-off-by: Vinod Koul --- include/linux/platform_data/amd_xdma.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 include/linux/platform_data/amd_xdma.h (limited to 'include/linux') diff --git a/include/linux/platform_data/amd_xdma.h b/include/linux/platform_data/amd_xdma.h new file mode 100644 index 000000000000..b5e23e14bac8 --- /dev/null +++ b/include/linux/platform_data/amd_xdma.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2022, Advanced Micro Devices, Inc. + */ + +#ifndef _PLATDATA_AMD_XDMA_H +#define _PLATDATA_AMD_XDMA_H + +#include + +/** + * struct xdma_chan_info - DMA channel information + * This information is used to match channel when request dma channel + * @dir: Channel transfer direction + */ +struct xdma_chan_info { + enum dma_transfer_direction dir; +}; + +#define XDMA_FILTER_PARAM(chan_info) ((void *)(chan_info)) + +struct dma_slave_map; + +/** + * struct xdma_platdata - platform specific data for XDMA engine + * @max_dma_channels: Maximum dma channels in each direction + */ +struct xdma_platdata { + u32 max_dma_channels; + u32 device_map_cnt; + struct dma_slave_map *device_map; +}; + +#endif /* _PLATDATA_AMD_XDMA_H */ -- cgit v1.2.3 From ecf294a6f63f882319485f807754dacaeae96e9d Mon Sep 17 00:00:00 2001 From: Lizhi Hou Date: Thu, 19 Jan 2023 08:32:06 -0800 Subject: dmaengine: xilinx: xdma: Add user logic interrupt support The Xilinx DMA/Bridge Subsystem for PCIe (XDMA) provides up to 16 user interrupt wires to user logic that generate interrupts to the host. This patch adds APIs to enable/disable user logic interrupt for a given interrupt wire index. Signed-off-by: Lizhi Hou Signed-off-by: Sonal Santan Signed-off-by: Max Zhen Signed-off-by: Brian Xu Tested-by: Martin Tuma Link: https://lore.kernel.org/r/1674145926-29449-3-git-send-email-lizhi.hou@amd.com Signed-off-by: Vinod Koul --- include/linux/dma/amd_xdma.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/linux/dma/amd_xdma.h (limited to 'include/linux') diff --git a/include/linux/dma/amd_xdma.h b/include/linux/dma/amd_xdma.h new file mode 100644 index 000000000000..ceba69ed7cb4 --- /dev/null +++ b/include/linux/dma/amd_xdma.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2022, Advanced Micro Devices, Inc. + */ + +#ifndef _DMAENGINE_AMD_XDMA_H +#define _DMAENGINE_AMD_XDMA_H + +#include +#include + +int xdma_enable_user_irq(struct platform_device *pdev, u32 irq_num); +void xdma_disable_user_irq(struct platform_device *pdev, u32 irq_num); +int xdma_get_user_irq(struct platform_device *pdev, u32 user_irq_index); + +#endif /* _DMAENGINE_AMD_XDMA_H */ -- cgit v1.2.3 From 8b5443102cfca7a4346a50918f8ecc8a1644ea2e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 30 Jan 2023 13:05:03 +0200 Subject: dmaengine: Make an order in struct dma_device definition Make an order in struct dma_device: - added missing kernel doc descriptions - put descriptions in the order of appearance in the code - updated indentation where it makes sense Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230130110503.52250-1-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index c923f4e60f24..bcfece892812 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -773,6 +773,7 @@ struct dma_filter { /** * struct dma_device - info on the entity supplying DMA services + * @ref: reference is taken and put every time a channel is allocated or freed * @chancnt: how many DMA channels are supported * @privatecnt: how many DMA channels are requested by dma_request_channel * @channels: the list of struct dma_chan @@ -789,6 +790,7 @@ struct dma_filter { * @dev_id: unique device ID * @dev: struct device reference for dma mapping api * @owner: owner module (automatically set based on the provided dev) + * @chan_ida: unique channel ID * @src_addr_widths: bit mask of src addr widths the device supports * Width is specified in bytes, e.g. for a device supporting * a width of 4 the mask should have BIT(4) set. @@ -802,6 +804,7 @@ struct dma_filter { * @max_sg_burst: max number of SG list entries executed in a single burst * DMA tansaction with no software intervention for reinitialization. * Zero value means unlimited number of entries. + * @descriptor_reuse: a submitted transfer can be resubmitted after completion * @residue_granularity: granularity of the transfer residue reported * by tx_status * @device_alloc_chan_resources: allocate resources and return the @@ -839,7 +842,6 @@ struct dma_filter { * struct with auxiliary transfer status information, otherwise the call * will just return a simple status code * @device_issue_pending: push pending transactions to hardware - * @descriptor_reuse: a submitted transfer can be resubmitted after completion * @device_release: called sometime atfer dma_async_device_unregister() is * called and there are no further references to this structure. This * must be implemented to free resources however many existing drivers @@ -847,6 +849,7 @@ struct dma_filter { * @dbg_summary_show: optional routine to show contents in debugfs; default code * will be used when this is omitted, but custom code can show extra, * controller specific information. + * @dbg_dev_root: the root folder in debugfs for this device */ struct dma_device { struct kref ref; @@ -855,7 +858,7 @@ struct dma_device { struct list_head channels; struct list_head global_node; struct dma_filter filter; - dma_cap_mask_t cap_mask; + dma_cap_mask_t cap_mask; enum dma_desc_metadata_mode desc_metadata_modes; unsigned short max_xor; unsigned short max_pq; @@ -924,10 +927,8 @@ struct dma_device { struct dma_chan *chan, dma_addr_t dst, u64 data, unsigned long flags); - void (*device_caps)(struct dma_chan *chan, - struct dma_slave_caps *caps); - int (*device_config)(struct dma_chan *chan, - struct dma_slave_config *config); + void (*device_caps)(struct dma_chan *chan, struct dma_slave_caps *caps); + int (*device_config)(struct dma_chan *chan, struct dma_slave_config *config); int (*device_pause)(struct dma_chan *chan); int (*device_resume)(struct dma_chan *chan); int (*device_terminate_all)(struct dma_chan *chan); -- cgit v1.2.3 From 8c99377e614f8abfd881c34611002b2af5ab1ee8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 9 Feb 2023 10:35:56 +0100 Subject: driver core: bus: add bus_get_dev_root() function Instead of poking around in the struct bus_type directly for the dev_root pointer, provide a function to return it properly reference counted, if it is present in the bus. This will be needed to move the pointer out of struct bus_type in the future. Use the function in the driver core code at the same time it is introduced to verify that it works properly. Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20230209093556.19132-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/bus.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 31be18608f83..6ce32ef4b8fd 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -282,5 +282,6 @@ enum bus_notifier_event { }; extern struct kset *bus_get_kset(const struct bus_type *bus); +struct device *bus_get_dev_root(const struct bus_type *bus); #endif -- cgit v1.2.3 From 53c0e2f9b808fafaea7bec91eeec48046bcaaba0 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 13 Jan 2023 20:14:03 +0300 Subject: dmaengine: dw-edma: Replace chip ID number with device name Using an abstract number as the DW eDMA chip identifier isn't practical because there can be more than one DW eDMA controller on the platform. Some may be detected as the PCIe Endpoints, and others may be embedded in DW PCIe Root Port/Endpoint controllers. An abstract number in, for instance, the IRQ handlers list, doesn't give a notion regarding their reference to the particular DMA controller. To preserve the code simplicity and support multi-eDMA platforms, use the parental device name to create the DW eDMA controller name. Link: https://lore.kernel.org/r/20230113171409.30470-22-Sergey.Semin@baikalelectronics.ru Tested-by: Manivannan Sadhasivam Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam Acked-by: Vinod Koul --- include/linux/dma/edma.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma/edma.h b/include/linux/dma/edma.h index 07a23ecc834f..96bfd0173f3a 100644 --- a/include/linux/dma/edma.h +++ b/include/linux/dma/edma.h @@ -76,7 +76,6 @@ enum dw_edma_chip_flags { */ struct dw_edma_chip { struct device *dev; - int id; int nr_irqs; const struct dw_edma_core_ops *ops; u32 flags; -- cgit v1.2.3 From 93c177fd6ff0655a5fa43ec945a57d7b0200ad80 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:06:21 -0800 Subject: kernel/range: Uplevel the cxl subsystem's range_contains() helper In support of the CXL subsystem's use of 'struct range' to track decode address ranges, add a common range_contains() implementation with identical semantics as resource_contains(); The existing 'range_contains()' in lib/stackinit_kunit.c is namespaced with a 'stackinit_' prefix. Cc: Kees Cook Reviewed-by: Vishal Verma Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Gregory Price Reviewed-by: Ira Weiny Tested-by: Fan Ni Link: https://lore.kernel.org/r/167601998163.1924368.6067392174077323935.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- include/linux/range.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/range.h b/include/linux/range.h index 274681cc3154..7efb6a9b069b 100644 --- a/include/linux/range.h +++ b/include/linux/range.h @@ -13,6 +13,11 @@ static inline u64 range_len(const struct range *range) return range->end - range->start + 1; } +static inline bool range_contains(struct range *r1, struct range *r2) +{ + return r1->start <= r2->start && r1->end >= r2->end; +} + int add_range(struct range *range, int az, int nr_range, u64 start, u64 end); -- cgit v1.2.3 From fe098574a93b4e2acb046b583e9857337d807f38 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:07:02 -0800 Subject: dax/hmem: Convey the dax range via memregion_info() In preparation for hmem platform devices to be unregistered, stop using platform_device_add_resources() to convey the address range. The platform_device_add_resources() API causes an existing "Soft Reserved" iomem resource to be re-parented under an inserted platform device resource. When that platform device is deleted it removes the platform device resource and all children. Instead, it is sufficient to convey just the address range and let request_mem_region() insert resources to indicate the devices active in the range. This allows the "Soft Reserved" resource to be re-enumerated upon the next probe event. Reviewed-by: Jonathan Cameron Tested-by: Fan Ni Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/167602002217.1924368.7036275892522551624.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- include/linux/memregion.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memregion.h b/include/linux/memregion.h index bf83363807ac..c01321467789 100644 --- a/include/linux/memregion.h +++ b/include/linux/memregion.h @@ -3,10 +3,12 @@ #define _MEMREGION_H_ #include #include +#include #include struct memregion_info { int target_node; + struct range range; }; #ifdef CONFIG_MEMREGION -- cgit v1.2.3 From 7dab174e2e27eeaf10273e597ffbef4f8ea032bb Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:07:07 -0800 Subject: dax/hmem: Move hmem device registration to dax_hmem.ko In preparation for the CXL region driver to take over the responsibility of registering device-dax instances for CXL regions, move the registration of "hmem" devices to dax_hmem.ko. Previously the builtin component of this enabling (drivers/dax/hmem/device.o) would register platform devices for each address range and trigger the dax_hmem.ko module to load and attach device-dax instances to those devices. Now, the ranges are collected from the HMAT and EFI memory map walking, but the device creation is deferred. A new "hmem_platform" device is created which triggers dax_hmem.ko to load and register the platform devices. Tested-by: Fan Ni Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/167602002771.1924368.5653558226424530127.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- include/linux/dax.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 2b5ecb591059..bf6258472e49 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -262,11 +262,14 @@ static inline bool dax_mapping(struct address_space *mapping) } #ifdef CONFIG_DEV_DAX_HMEM_DEVICES -void hmem_register_device(int target_nid, struct resource *r); +void hmem_register_resource(int target_nid, struct resource *r); #else -static inline void hmem_register_device(int target_nid, struct resource *r) +static inline void hmem_register_resource(int target_nid, struct resource *r) { } #endif +typedef int (*walk_hmem_fn)(struct device *dev, int target_nid, + const struct resource *res); +int walk_hmem_resources(struct device *dev, walk_hmem_fn fn); #endif -- cgit v1.2.3 From ffb1b4a41016295e298409c9dbcacd55680bd6d4 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 10 Feb 2023 14:42:01 -0800 Subject: x86/unwind/orc: Add 'signal' field to ORC metadata Add a 'signal' field which allows unwind hints to specify whether the instruction pointer should be taken literally (like for most interrupts and exceptions) rather than decremented (like for call stack return addresses) when used to find the next ORC entry. Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/d2c5ec4d83a45b513d8fd72fab59f1a8cfa46871.1676068346.git.jpoimboe@kernel.org --- include/linux/objtool.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 62c54ffbeeaa..9ac3df3fccf0 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -15,6 +15,7 @@ struct unwind_hint { s16 sp_offset; u8 sp_reg; u8 type; + u8 signal; u8 end; }; #endif @@ -49,7 +50,7 @@ struct unwind_hint { #ifndef __ASSEMBLY__ -#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ +#define UNWIND_HINT(sp_reg, sp_offset, type, signal, end) \ "987: \n\t" \ ".pushsection .discard.unwind_hints\n\t" \ /* struct unwind_hint */ \ @@ -57,6 +58,7 @@ struct unwind_hint { ".short " __stringify(sp_offset) "\n\t" \ ".byte " __stringify(sp_reg) "\n\t" \ ".byte " __stringify(type) "\n\t" \ + ".byte " __stringify(signal) "\n\t" \ ".byte " __stringify(end) "\n\t" \ ".balign 4 \n\t" \ ".popsection\n\t" @@ -129,7 +131,7 @@ struct unwind_hint { * the debuginfo as necessary. It will also warn if it sees any * inconsistencies. */ -.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0 end=0 .Lunwind_hint_ip_\@: .pushsection .discard.unwind_hints /* struct unwind_hint */ @@ -137,6 +139,7 @@ struct unwind_hint { .short \sp_offset .byte \sp_reg .byte \type + .byte \signal .byte \end .balign 4 .popsection @@ -174,7 +177,7 @@ struct unwind_hint { #ifndef __ASSEMBLY__ -#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ +#define UNWIND_HINT(sp_reg, sp_offset, type, signal, end) \ "\n\t" #define STACK_FRAME_NON_STANDARD(func) #define STACK_FRAME_NON_STANDARD_FP(func) @@ -182,7 +185,7 @@ struct unwind_hint { #define ASM_REACHABLE #else #define ANNOTATE_INTRA_FUNCTION_CALL -.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 +.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0 end=0 .endm .macro STACK_FRAME_NON_STANDARD func:req .endm -- cgit v1.2.3 From 05b8773ca33253ea562be145cf3145b05ef19f86 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Thu, 22 Dec 2022 19:33:31 +0000 Subject: mtd: ubi: block: wire-up device parent ubiblock devices were previously only identifyable by their name, but not connected to their parent UBI volume device e.g. in sysfs. Properly parent ubiblock device as descendant of a UBI volume device to reflect device model hierachy. Signed-off-by: Daniel Golle Signed-off-by: Richard Weinberger --- include/linux/mtd/ubi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h index 7d48ea368c5e..a529347fd75b 100644 --- a/include/linux/mtd/ubi.h +++ b/include/linux/mtd/ubi.h @@ -110,6 +110,7 @@ struct ubi_volume_info { int name_len; const char *name; dev_t cdev; + struct device *dev; }; /** -- cgit v1.2.3 From d6dd5bafaabf98a99a76227ab8dc9a89e76a198f Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 24 Jan 2023 12:41:56 +0530 Subject: PCI: endpoint: Use a separate lock for protecting epc->pci_epf list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The EPC controller maintains a list of EPF drivers added to it. For protecting this list against the concurrent accesses, the epc->lock (used for protecting epc_ops) has been used so far. Since there were no users trying to use epc_ops and modify the pci_epf list simultaneously, this was not an issue. But with the addition of callback mechanism for passing the events, this will be a problem. Because the pci_epf list needs to be iterated first for getting hold of the EPF driver and then the relevant event specific callback needs to be called for the driver. If the same epc->lock is used, then it will result in a deadlock scenario. For instance, ... mutex_lock(&epc->lock); list_for_each_entry(epf, &epc->pci_epf, list) { epf->event_ops->core_init(epf); | |-> pci_epc_set_bar(); | |-> mutex_lock(&epc->lock) # DEADLOCK ... So to fix this issue, use a separate lock called "list_lock" for protecting the pci_epf list against the concurrent accesses. This lock will also be used by the callback mechanism. Link: https://lore.kernel.org/linux-pci/20230124071158.5503-4-manivannan.sadhasivam@linaro.org Signed-off-by: Manivannan Sadhasivam Signed-off-by: Krzysztof Wilczyński Acked-by: Kishon Vijay Abraham I --- include/linux/pci-epc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index a48778e1a4ee..fe729dfe509b 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -122,6 +122,7 @@ struct pci_epc_mem { * struct pci_epc - represents the PCI EPC device * @dev: PCI EPC device * @pci_epf: list of endpoint functions present in this EPC device + * list_lock: Mutex for protecting pci_epf list * @ops: function pointers for performing endpoint operations * @windows: array of address space of the endpoint controller * @mem: first window of the endpoint controller, which corresponds to @@ -139,6 +140,7 @@ struct pci_epc_mem { struct pci_epc { struct device dev; struct list_head pci_epf; + struct mutex list_lock; const struct pci_epc_ops *ops; struct pci_epc_mem **windows; struct pci_epc_mem *mem; -- cgit v1.2.3 From 838125b07e7706b1a9069079a73507fd3df244f7 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 24 Jan 2023 12:41:57 +0530 Subject: PCI: endpoint: Use callback mechanism for passing events from EPC to EPF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of using the notifiers for passing the events from EPC to EPF, let's introduce a callback based mechanism where the EPF drivers can populate relevant callbacks for EPC events they want to subscribe. The use of notifiers in kernel is not recommended if there is a real link between the sender and receiver, like in this case. Also, the existing atomic notifier forces the notification functions to be in atomic context while the caller may be in non-atomic context. For instance, the two in-kernel users of the notifiers, pcie-qcom and pcie-tegra194, both are calling the notifier functions in non-atomic context (from threaded IRQ handlers). This creates a sleeping in atomic context issue with the existing EPF_TEST driver that calls the EPC APIs that may sleep. For all these reasons, let's get rid of the notifier chains and use the simple callback mechanism for signalling the events from EPC to EPF drivers. This preserves the context of the caller and avoids the latency of going through a separate interface for triggering the notifications. As a first step of the transition, the core_init() callback is introduced in this commit, that'll replace the existing CORE_INIT notifier used for signalling the init complete event from EPC. During the occurrence of the event, EPC will go over the list of EPF drivers attached to it and will call the core_init() callback if available. Link: https://lore.kernel.org/linux-pci/20230124071158.5503-5-manivannan.sadhasivam@linaro.org Signed-off-by: Manivannan Sadhasivam Signed-off-by: Krzysztof Wilczyński Acked-by: Kishon Vijay Abraham I --- include/linux/pci-epf.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 009a07147c61..fa629c191f00 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -18,7 +18,6 @@ struct pci_epf; enum pci_epc_interface_type; enum pci_notify_event { - CORE_INIT, LINK_UP, }; @@ -72,6 +71,14 @@ struct pci_epf_ops { struct config_group *group); }; +/** + * struct pci_epf_event_ops - Callbacks for capturing the EPC events + * @core_init: Callback for the EPC initialization complete event + */ +struct pci_epc_event_ops { + int (*core_init)(struct pci_epf *epf); +}; + /** * struct pci_epf_driver - represents the PCI EPF driver * @probe: ops to perform when a new EPF device has been bound to the EPF driver @@ -139,6 +146,7 @@ struct pci_epf_bar { * @is_vf: true - virtual function, false - physical function * @vfunction_num_map: bitmap to manage virtual function number * @pci_vepf: list of virtual endpoint functions associated with this function + * @event_ops: Callbacks for capturing the EPC events */ struct pci_epf { struct device dev; @@ -168,6 +176,7 @@ struct pci_epf { unsigned int is_vf; unsigned long vfunction_num_map; struct list_head pci_vepf; + const struct pci_epc_event_ops *event_ops; }; /** -- cgit v1.2.3 From f5edd8715e2ea672e6119c3764041743761fb178 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 24 Jan 2023 12:41:58 +0530 Subject: PCI: endpoint: Use link_up() callback in place of LINK_UP notifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As a part of the transition towards callback mechanism for signalling the events from EPC to EPF, let's use the link_up() callback in the place of the LINK_UP notifier. This also removes the notifier support completely from the PCI endpoint framework. Link: https://lore.kernel.org/linux-pci/20230124071158.5503-6-manivannan.sadhasivam@linaro.org Signed-off-by: Manivannan Sadhasivam Signed-off-by: Krzysztof Wilczyński Acked-by: Kishon Vijay Abraham I --- include/linux/pci-epc.h | 8 -------- include/linux/pci-epf.h | 8 ++------ 2 files changed, 2 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index fe729dfe509b..301bb0e53707 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -135,7 +135,6 @@ struct pci_epc_mem { * @group: configfs group representing the PCI EPC device * @lock: mutex to protect pci_epc ops * @function_num_map: bitmap to manage physical function number - * @notifier: used to notify EPF of any EPC events (like linkup) */ struct pci_epc { struct device dev; @@ -151,7 +150,6 @@ struct pci_epc { /* mutex to protect against concurrent access of EP controller */ struct mutex lock; unsigned long function_num_map; - struct atomic_notifier_head notifier; }; /** @@ -194,12 +192,6 @@ static inline void *epc_get_drvdata(struct pci_epc *epc) return dev_get_drvdata(&epc->dev); } -static inline int -pci_epc_register_notifier(struct pci_epc *epc, struct notifier_block *nb) -{ - return atomic_notifier_chain_register(&epc->notifier, nb); -} - struct pci_epc * __devm_pci_epc_create(struct device *dev, const struct pci_epc_ops *ops, struct module *owner); diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index fa629c191f00..a215dc8ce693 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -17,10 +17,6 @@ struct pci_epf; enum pci_epc_interface_type; -enum pci_notify_event { - LINK_UP, -}; - enum pci_barno { NO_BAR = -1, BAR_0, @@ -74,9 +70,11 @@ struct pci_epf_ops { /** * struct pci_epf_event_ops - Callbacks for capturing the EPC events * @core_init: Callback for the EPC initialization complete event + * @link_up: Callback for the EPC link up event */ struct pci_epc_event_ops { int (*core_init)(struct pci_epf *epf); + int (*link_up)(struct pci_epf *epf); }; /** @@ -134,7 +132,6 @@ struct pci_epf_bar { * @epf_pf: the physical EPF device to which this virtual EPF device is bound * @driver: the EPF driver to which this EPF device is bound * @list: to add pci_epf as a list of PCI endpoint functions to pci_epc - * @nb: notifier block to notify EPF of any EPC events (like linkup) * @lock: mutex to protect pci_epf_ops * @sec_epc: the secondary EPC device to which this EPF device is bound * @sec_epc_list: to add pci_epf as list of PCI endpoint functions to secondary @@ -162,7 +159,6 @@ struct pci_epf { struct pci_epf *epf_pf; struct pci_epf_driver *driver; struct list_head list; - struct notifier_block nb; /* mutex to protect against concurrent access of pci_epf_ops */ struct mutex lock; -- cgit v1.2.3 From 80df83c2c57e75cb482ccf0c639ce84703ab41a2 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 4 Feb 2023 00:53:35 +0100 Subject: mmc: core: add devm_mmc_alloc_host Add a device-managed version of mmc_alloc_host(). The argument order is reversed compared to mmc_alloc_host() because device-managed functions typically have the device argument first. Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/6d8f9fdc-7c9e-8e4f-e6ef-5470b971c74e@gmail.com Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 8fdd3cf971a3..812e6b583b25 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -527,6 +527,7 @@ struct mmc_host { struct device_node; struct mmc_host *mmc_alloc_host(int extra, struct device *); +struct mmc_host *devm_mmc_alloc_host(struct device *dev, int extra); int mmc_add_host(struct mmc_host *); void mmc_remove_host(struct mmc_host *); void mmc_free_host(struct mmc_host *); -- cgit v1.2.3 From 6aa3a920125e9f58891e2b5dc2efd4d0c1ff05a6 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Fri, 13 Jan 2023 16:30:50 -0600 Subject: mm/hugetlb: convert isolate_hugetlb to folios Patch series "continue hugetlb folio conversion", v3. This series continues the conversion of core hugetlb functions to use folios. This series converts many helper funtions in the hugetlb fault path. This is in preparation for another series to convert the hugetlb fault code paths to operate on folios. This patch (of 8): Convert isolate_hugetlb() to take in a folio and convert its callers to pass a folio. Use page_folio() to convert the callers to use a folio is safe as isolate_hugetlb() operates on a head page. Link: https://lkml.kernel.org/r/20230113223057.173292-1-sidhartha.kumar@oracle.com Link: https://lkml.kernel.org/r/20230113223057.173292-2-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: John Hubbard Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index a51e6daacac6..6e38a019f654 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -171,7 +171,7 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to, vm_flags_t vm_flags); long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); -int isolate_hugetlb(struct page *page, struct list_head *list); +int isolate_hugetlb(struct folio *folio, struct list_head *list); int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison); int get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared); @@ -413,7 +413,7 @@ static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, return NULL; } -static inline int isolate_hugetlb(struct page *page, struct list_head *list) +static inline int isolate_hugetlb(struct folio *folio, struct list_head *list) { return -EBUSY; } -- cgit v1.2.3 From ff7d853b031302376a0d3640fa1c463d94079637 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Fri, 13 Jan 2023 16:30:54 -0600 Subject: mm/hugetlb: increase use of folios in alloc_huge_page() Change hugetlb_cgroup_commit_charge{,_rsvd}(), dequeue_huge_page_vma() and alloc_buddy_huge_page_with_mpol() to use folios so alloc_huge_page() is cleaned by operating on folios until its return. Link: https://lkml.kernel.org/r/20230113223057.173292-6-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: John Hubbard Cc: Matthew Wilcox Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/hugetlb_cgroup.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index f706626a8063..3d82d91f49ac 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -141,10 +141,10 @@ extern int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages, struct hugetlb_cgroup **ptr); extern void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg, - struct page *page); + struct folio *folio); extern void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg, - struct page *page); + struct folio *folio); extern void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages, struct folio *folio); extern void hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages, @@ -230,14 +230,14 @@ static inline int hugetlb_cgroup_charge_cgroup_rsvd(int idx, static inline void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg, - struct page *page) + struct folio *folio) { } static inline void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages, struct hugetlb_cgroup *h_cg, - struct page *page) + struct folio *folio) { } -- cgit v1.2.3 From e37d3e838d9078538f920957d1e89682b6764977 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Fri, 13 Jan 2023 16:30:55 -0600 Subject: mm/hugetlb: convert alloc_migrate_huge_page to folios Change alloc_huge_page_nodemask() to alloc_hugetlb_folio_nodemask() and alloc_migrate_huge_page() to alloc_migrate_hugetlb_folio(). Both functions now return a folio rather than a page. Link: https://lkml.kernel.org/r/20230113223057.173292-7-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: John Hubbard Cc: Matthew Wilcox Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 6e38a019f654..2375c62c61a4 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -719,7 +719,7 @@ struct huge_bootmem_page { int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); -struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, +struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask); struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address); @@ -1040,8 +1040,8 @@ static inline struct page *alloc_huge_page(struct vm_area_struct *vma, return NULL; } -static inline struct page * -alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, +static inline struct folio * +alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask) { return NULL; -- cgit v1.2.3 From ea8e72f4116a995c2aba3fb738ac372c4115375a Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 25 Jan 2023 09:05:32 -0800 Subject: mm/hugetlb: convert putback_active_hugepage to take in a folio Convert putback_active_hugepage() to folio_putback_active_hugetlb(), this removes one user of the Huge Page macros which take in a page. The callers in migrate.c are also cleaned up by being able to directly use the src and dst folio variables. Link: https://lkml.kernel.org/r/20230125170537.96973-4-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Mike Kravetz Cc: Gerald Schaefer Cc: John Hubbard Cc: Matthew Wilcox Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 2375c62c61a4..067906c5778e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -175,7 +175,7 @@ int isolate_hugetlb(struct folio *folio, struct list_head *list); int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison); int get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared); -void putback_active_hugepage(struct page *page); +void folio_putback_active_hugetlb(struct folio *folio); void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason); void free_huge_page(struct page *page); void hugetlb_fix_reserve_counts(struct inode *inode); @@ -429,7 +429,7 @@ static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags, return 0; } -static inline void putback_active_hugepage(struct page *page) +static inline void folio_putback_active_hugetlb(struct folio *folio) { } -- cgit v1.2.3 From d0ce0e47b323a8d7fb5dc3314ce56afa650ade2d Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 25 Jan 2023 09:05:33 -0800 Subject: mm/hugetlb: convert hugetlb fault paths to use alloc_hugetlb_folio() Change alloc_huge_page() to alloc_hugetlb_folio() by changing all callers to handle the now folio return type of the function. In this conversion, alloc_huge_page_vma() is also changed to alloc_hugetlb_folio_vma() and hugepage_add_new_anon_rmap() is changed to take in a folio directly. Many additions of '&folio->page' are cleaned up in subsequent patches. hugetlbfs_fallocate() is also refactored to use the RCU + page_cache_next_miss() API. Link: https://lkml.kernel.org/r/20230125170537.96973-5-sidhartha.kumar@oracle.com Suggested-by: Mike Kravetz Reported-by: kernel test robot Signed-off-by: Sidhartha Kumar Cc: Gerald Schaefer Cc: John Hubbard Cc: Matthew Wilcox Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 8 ++++---- include/linux/rmap.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 067906c5778e..6408f85e5754 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -717,11 +717,11 @@ struct huge_bootmem_page { }; int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); -struct page *alloc_huge_page(struct vm_area_struct *vma, +struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask); -struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, +struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address); int hugetlb_add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t idx); @@ -1033,7 +1033,7 @@ static inline int isolate_or_dissolve_huge_page(struct page *page, return -ENOMEM; } -static inline struct page *alloc_huge_page(struct vm_area_struct *vma, +static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve) { @@ -1047,7 +1047,7 @@ alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, return NULL; } -static inline struct page *alloc_huge_page_vma(struct hstate *h, +static inline struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address) { diff --git a/include/linux/rmap.h b/include/linux/rmap.h index a6bd1f0a183d..a4570da03e58 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -203,7 +203,7 @@ void page_remove_rmap(struct page *, struct vm_area_struct *, void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long address, rmap_t flags); -void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *, +void hugepage_add_new_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address); static inline void __page_dup_rmap(struct page *page, bool compound) -- cgit v1.2.3 From d2d7bb44bfbd29200426ba17741550d36e081f91 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 25 Jan 2023 09:05:34 -0800 Subject: mm/hugetlb: convert restore_reserve_on_error to take in a folio Every caller of restore_reserve_on_error() is now passing in &folio->page, change the function to take in a folio directly and clean up the call sites. Link: https://lkml.kernel.org/r/20230125170537.96973-6-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Cc: Gerald Schaefer Cc: John Hubbard Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 6408f85e5754..20ceaaea1697 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -726,7 +726,7 @@ struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *v int hugetlb_add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t idx); void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, - unsigned long address, struct page *page); + unsigned long address, struct folio *folio); /* arch callback */ int __init __alloc_bootmem_huge_page(struct hstate *h, int nid); -- cgit v1.2.3 From 9b91c0e277a3dbb165c2e4301be7a231dc2f76f7 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Wed, 25 Jan 2023 09:05:35 -0800 Subject: mm/hugetlb: convert hugetlb_add_to_page_cache to take in a folio Every caller of hugetlb_add_to_page_cache() is now passing in &folio->page, change the function to take in a folio directly and clean up the call sites. Link: https://lkml.kernel.org/r/20230125170537.96973-7-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Cc: Gerald Schaefer Cc: John Hubbard Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 20ceaaea1697..df6dd624ccfe 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -723,7 +723,7 @@ struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask, gfp_t gfp_mask); struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma, unsigned long address); -int hugetlb_add_to_page_cache(struct page *page, struct address_space *mapping, +int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping, pgoff_t idx); void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, unsigned long address, struct folio *folio); -- cgit v1.2.3 From fa4e3f5ffa5e6e22f751d289c9afa502dda30b8d Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Mon, 30 Jan 2023 12:18:28 -0800 Subject: mm: add folio_estimated_sharers() Patch series "Convert various mempolicy.c functions to use folios", v4. This patch series converts migrate_page_add() and queue_pages_required() to migrate_folio_add() and queue_page_required(). It also converts the callers of the functions to use folios as well, and introduces a helper function to estimate the number of sharers of a folio. This patch (of 6): folio_estimated_sharers() takes in a folio and returns the precise number of times the first subpage of the folio is mapped. This function aims to provide an estimate for the number of sharers of a folio. This is necessary for folio conversions where we care about the number of processes that share a folio, but don't necessarily want to check every single page within that folio. This is in contrast to folio_mapcount() which calculates the total number of the times a folio and all its subpages are mapped. Link: https://lkml.kernel.org/r/20230130201833.27042-1-vishal.moola@gmail.com Link: https://lkml.kernel.org/r/20230130201833.27042-2-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Reviewed-by: Yin Fengwei Acked-by: David Hildenbrand Cc: Jane Chu Signed-off-by: Andrew Morton --- include/linux/mm.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9454b7eb055b..89c118ad4a44 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1916,6 +1916,24 @@ static inline size_t folio_size(struct folio *folio) return PAGE_SIZE << folio_order(folio); } +/** + * folio_estimated_sharers - Estimate the number of sharers of a folio. + * @folio: The folio. + * + * folio_estimated_sharers() aims to serve as a function to efficiently + * estimate the number of processes sharing a folio. This is done by + * looking at the precise mapcount of the first subpage in the folio, and + * assuming the other subpages are the same. This may not be true for large + * folios. If you want exact mapcounts for exact calculations, look at + * page_mapcount() or folio_total_mapcount(). + * + * Return: The estimated number of processes sharing a folio. + */ +static inline int folio_estimated_sharers(struct folio *folio) +{ + return page_mapcount(folio_page(folio, 0)); +} + #ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE static inline int arch_make_page_accessible(struct page *page) { -- cgit v1.2.3 From 3c1ea2c729ef8ef07bcb80d01ab2ead45b3406dd Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Mon, 30 Jan 2023 13:43:49 -0800 Subject: mm: add folio_get_nontail_page() Patch series "Convert a couple migrate functions to use folios", v2. This patchset introduces folio_movable_ops() and converts 3 functions in mm/migrate.c to use folios. It also introduces folio_get_nontail_page() for folio conversions which may want to distinguish between head and tail pages. This patch (of 4): folio_get_nontail_page() returns the folio associated with a head page. This is necessary for folio conversions where the behavior of that function differs between head pages and tail pages. Link: https://lkml.kernel.org/r/20230130214352.40538-1-vishal.moola@gmail.com Link: https://lkml.kernel.org/r/20230130214352.40538-2-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- include/linux/mm.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 89c118ad4a44..2992a2d55aee 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -892,6 +892,13 @@ static inline bool get_page_unless_zero(struct page *page) return page_ref_add_unless(page, 1, 0); } +static inline struct folio *folio_get_nontail_page(struct page *page) +{ + if (unlikely(!get_page_unless_zero(page))) + return NULL; + return (struct folio *)page; +} + extern int page_is_ram(unsigned long pfn); enum { -- cgit v1.2.3 From da707a6d184a8a6ef0b756c3ba49888fec223793 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Mon, 30 Jan 2023 13:43:50 -0800 Subject: mm/migrate: add folio_movable_ops() folio_movable_ops() does the same as page_movable_ops() except uses folios instead of pages. This function will help make folio conversions in migrate.c more readable. Link: https://lkml.kernel.org/r/20230130214352.40538-3-vishal.moola@gmail.com Signed-off-by: Vishal Moola (Oracle) Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- include/linux/migrate.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 3ef77f52a4f0..bdff950a8bb4 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -122,6 +122,15 @@ static inline bool folio_test_movable(struct folio *folio) return PageMovable(&folio->page); } +static inline +const struct movable_operations *folio_movable_ops(struct folio *folio) +{ + VM_BUG_ON(!__folio_test_movable(folio)); + + return (const struct movable_operations *) + ((unsigned long)folio->mapping - PAGE_MAPPING_MOVABLE); +} + static inline const struct movable_operations *page_movable_ops(struct page *page) { -- cgit v1.2.3 From a2b9b123ccac913e9f9b80337d687a2fe786a634 Mon Sep 17 00:00:00 2001 From: Mengyuan Lou Date: Tue, 7 Feb 2023 18:24:19 +0800 Subject: PCI: Add ACS quirk for Wangxun NICs Wangxun has verified there is no peer-to-peer between functions for the below selection of SFxxx, RP1000 and RP2000 NICS. They may be multi-function devices, but the hardware does not advertise ACS capability. Add an ACS quirk for these devices so the functions can be in independent IOMMU groups. Link: https://lore.kernel.org/r/20230207102419.44326-1-mengyuanlou@net-swift.com Signed-off-by: Mengyuan Lou Signed-off-by: Bjorn Helgaas --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index b362d90eb9b0..bc8f484cdcf3 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -3012,6 +3012,8 @@ #define PCI_DEVICE_ID_INTEL_VMD_9A0B 0x9a0b #define PCI_DEVICE_ID_INTEL_S21152BB 0xb152 +#define PCI_VENDOR_ID_WANGXUN 0x8088 + #define PCI_VENDOR_ID_SCALEMP 0x8686 #define PCI_DEVICE_ID_SCALEMP_VSMP_CTL 0x1010 -- cgit v1.2.3 From f57aec443c24d2e8e1f3b5b4856aea12ddda4254 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 13 Feb 2023 17:01:05 -0800 Subject: cxl/pmem: Fix nvdimm registration races A loop of the form: while true; do modprobe cxl_pci; modprobe -r cxl_pci; done ...fails with the following crash signature: BUG: kernel NULL pointer dereference, address: 0000000000000040 [..] RIP: 0010:cxl_internal_send_cmd+0x5/0xb0 [cxl_core] [..] Call Trace: cxl_pmem_ctl+0x121/0x240 [cxl_pmem] nvdimm_get_config_data+0xd6/0x1a0 [libnvdimm] nd_label_data_init+0x135/0x7e0 [libnvdimm] nvdimm_probe+0xd6/0x1c0 [libnvdimm] nvdimm_bus_probe+0x7a/0x1e0 [libnvdimm] really_probe+0xde/0x380 __driver_probe_device+0x78/0x170 driver_probe_device+0x1f/0x90 __device_attach_driver+0x85/0x110 bus_for_each_drv+0x7d/0xc0 __device_attach+0xb4/0x1e0 bus_probe_device+0x9f/0xc0 device_add+0x445/0x9c0 nd_async_device_register+0xe/0x40 [libnvdimm] async_run_entry_fn+0x30/0x130 ...namely that the bottom half of async nvdimm device registration runs after the CXL has already torn down the context that cxl_pmem_ctl() needs. Unlike the ACPI NFIT case that benefits from launching multiple nvdimm device registrations in parallel from those listed in the table, CXL is already marked PROBE_PREFER_ASYNCHRONOUS. So provide for a synchronous registration path to preclude this scenario. Fixes: 21083f51521f ("cxl/pmem: Register 'pmem' / cxl_nvdimm devices") Cc: Reported-by: Dave Jiang Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index af38252ad704..e772aae71843 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -41,6 +41,9 @@ enum { */ NDD_INCOHERENT = 7, + /* dimm provider wants synchronous registration by __nvdimm_create() */ + NDD_REGISTER_SYNC = 8, + /* need to set a limit somewhere, but yes, this is likely overkill */ ND_IOCTL_MAX_BUFLEN = SZ_4M, ND_CMD_MAX_ELEM = 5, -- cgit v1.2.3 From ab9fdd41d970c38ddc0fd59e5f8f37e8d966d454 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 13 Feb 2023 07:52:11 -0800 Subject: rpmsg: glink: smem: Wrap driver context The Glink SMEM driver allocates a struct device and hangs two devres-allocated pipe objects thereon. To facilitate the move of interrupt and mailbox handling to the driver, introduce a wrapper object capturing the device, glink reference and remote processor id. The type of the remoteproc reference is updated, as these are specifically targeting the SMEM implementation. Signed-off-by: Bjorn Andersson Reviewed-by: Chris Lew Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230213155215.1237059-3-quic_bjorande@quicinc.com --- include/linux/rpmsg/qcom_glink.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rpmsg/qcom_glink.h b/include/linux/rpmsg/qcom_glink.h index 22fc3a69b683..bfbd48f435fa 100644 --- a/include/linux/rpmsg/qcom_glink.h +++ b/include/linux/rpmsg/qcom_glink.h @@ -5,7 +5,7 @@ #include -struct qcom_glink; +struct qcom_glink_smem; #if IS_ENABLED(CONFIG_RPMSG_QCOM_GLINK) void qcom_glink_ssr_notify(const char *ssr_name); @@ -15,20 +15,20 @@ static inline void qcom_glink_ssr_notify(const char *ssr_name) {} #if IS_ENABLED(CONFIG_RPMSG_QCOM_GLINK_SMEM) -struct qcom_glink *qcom_glink_smem_register(struct device *parent, - struct device_node *node); -void qcom_glink_smem_unregister(struct qcom_glink *glink); +struct qcom_glink_smem *qcom_glink_smem_register(struct device *parent, + struct device_node *node); +void qcom_glink_smem_unregister(struct qcom_glink_smem *glink); #else -static inline struct qcom_glink * +static inline struct qcom_glink_smem * qcom_glink_smem_register(struct device *parent, struct device_node *node) { return NULL; } -static inline void qcom_glink_smem_unregister(struct qcom_glink *glink) {} +static inline void qcom_glink_smem_unregister(struct qcom_glink_smem *glink) {} #endif #endif -- cgit v1.2.3 From 35c5db0ec49f073e6a2d5236b5fcfb0a134a215a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Jan 2023 16:33:35 -0500 Subject: NFS: Add basic functionality for tracking folios in struct nfs_page Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index ba7e2e4b0926..d2ddc9a594c5 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -25,6 +25,7 @@ enum { PG_BUSY = 0, /* nfs_{un}lock_request */ PG_MAPPED, /* page private set for buffered io */ + PG_FOLIO, /* Tracking a folio (unset for O_DIRECT) */ PG_CLEAN, /* write succeeded */ PG_COMMIT_TO_DS, /* used by pnfs layouts */ PG_INODE_REF, /* extra ref held by inode when in writeback */ @@ -41,7 +42,10 @@ enum { struct nfs_inode; struct nfs_page { struct list_head wb_list; /* Defines state of page: */ - struct page *wb_page; /* page to read in/write out */ + union { + struct page *wb_page; /* page to read in/write out */ + struct folio *wb_folio; + }; struct nfs_lock_context *wb_lock_context; /* lock context info */ pgoff_t wb_index; /* Offset >> PAGE_SHIFT */ unsigned int wb_offset, /* Offset & ~PAGE_MASK */ @@ -153,6 +157,38 @@ extern int nfs_page_set_headlock(struct nfs_page *req); extern void nfs_page_clear_headlock(struct nfs_page *req); extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *); +/** + * nfs_page_to_folio - Retrieve a struct folio for the request + * @req: pointer to a struct nfs_page + * + * If a folio was assigned to @req, then return it, otherwise return NULL. + */ +static inline struct folio *nfs_page_to_folio(const struct nfs_page *req) +{ + if (test_bit(PG_FOLIO, &req->wb_flags)) + return req->wb_folio; + return NULL; +} + +/** + * nfs_page_to_page - Retrieve a struct page for the request + * @req: pointer to a struct nfs_page + * @pgbase: folio byte offset + * + * Return the page containing the byte that is at offset @pgbase relative + * to the start of the folio. + * Note: The request starts at offset @req->wb_pgbase. + */ +static inline struct page *nfs_page_to_page(const struct nfs_page *req, + size_t pgbase) +{ + struct folio *folio = nfs_page_to_folio(req); + + if (folio == NULL) + return req->wb_page; + return folio_page(folio, pgbase >> PAGE_SHIFT); +} + /* * Lock the page of an asynchronous request */ -- cgit v1.2.3 From 8e0bdc7021f713fdf3b985cda3ce715e41b06698 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Jan 2023 16:33:37 -0500 Subject: NFS: Fix nfs_coalesce_size() to work with folios Use the helper folio_size() where appropriate. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index d2ddc9a594c5..192071a6e5f6 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -189,6 +189,21 @@ static inline struct page *nfs_page_to_page(const struct nfs_page *req, return folio_page(folio, pgbase >> PAGE_SHIFT); } +/** + * nfs_page_max_length - Retrieve the maximum possible length for a request + * @req: pointer to a struct nfs_page + * + * Returns the maximum possible length of a request + */ +static inline size_t nfs_page_max_length(const struct nfs_page *req) +{ + struct folio *folio = nfs_page_to_folio(req); + + if (folio == NULL) + return PAGE_SIZE; + return folio_size(folio); +} + /* * Lock the page of an asynchronous request */ -- cgit v1.2.3 From 6dd85e83f3f182b56770f8bb6dbed1f0dafb9117 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Jan 2023 16:33:38 -0500 Subject: NFS: Add a helper to convert a struct nfs_page into an inode Replace all the open coded calls to page_file_mapping(req->wb_page)->host. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 192071a6e5f6..b0b03ec4a209 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -189,6 +189,19 @@ static inline struct page *nfs_page_to_page(const struct nfs_page *req, return folio_page(folio, pgbase >> PAGE_SHIFT); } +/** + * nfs_page_to_inode - Retrieve an inode for the request + * @req: pointer to a struct nfs_page + */ +static inline struct inode *nfs_page_to_inode(const struct nfs_page *req) +{ + struct folio *folio = nfs_page_to_folio(req); + + if (folio == NULL) + return page_file_mapping(req->wb_page)->host; + return folio_file_mapping(folio)->host; +} + /** * nfs_page_max_length - Retrieve the maximum possible length for a request * @req: pointer to a struct nfs_page -- cgit v1.2.3 From 4b27232a6e064f3d779cfa76cd251d6023949d22 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Jan 2023 16:33:40 -0500 Subject: NFS: Add a helper nfs_wb_folio() ...and use it in nfs_launder_folio(). Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index d92fdfd2444c..66b5de42f6b8 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -578,6 +578,7 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned */ extern int nfs_sync_inode(struct inode *inode); extern int nfs_wb_all(struct inode *inode); +extern int nfs_wb_folio(struct inode *inode, struct folio *folio); extern int nfs_wb_page(struct inode *inode, struct page *page); int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio); extern int nfs_commit_inode(struct inode *, int); -- cgit v1.2.3 From ab75bff1140733f1b43e81f055acd7d27af7ac05 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Jan 2023 16:33:41 -0500 Subject: NFS: Convert buffered reads to use folios Perform a largely mechanical conversion of references to struct page and page-specific functions to use the folio equivalents. Note that the fscache functionality remains untouched. Instead we just pass in the folio page. This should be OK, as long as we use order 0 folios together with fscache. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index b0b03ec4a209..3c71493d5cc3 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -125,6 +125,10 @@ extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, struct page *page, unsigned int offset, unsigned int count); +extern struct nfs_page *nfs_page_create_from_folio(struct nfs_open_context *ctx, + struct folio *folio, + unsigned int offset, + unsigned int count); extern void nfs_release_request(struct nfs_page *); -- cgit v1.2.3 From 0c493b5cf16e28d761b6e77c7c32aa0e7af70813 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Jan 2023 16:33:43 -0500 Subject: NFS: Convert buffered writes to use folios Mostly mechanical conversion of struct page and functions into struct folio equivalents. The lack of support for folios in write_cache_pages(), means we still only support order 0 folio allocations. However the rest of the writeback code should now be ready for order n > 0. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_fs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 66b5de42f6b8..a61bfd52d551 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -569,8 +569,9 @@ extern void nfs_complete_unlink(struct dentry *dentry, struct inode *); extern int nfs_congestion_kb; extern int nfs_writepage(struct page *page, struct writeback_control *wbc); extern int nfs_writepages(struct address_space *, struct writeback_control *); -extern int nfs_flush_incompatible(struct file *file, struct page *page); -extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); +extern int nfs_flush_incompatible(struct file *file, struct folio *folio); +extern int nfs_update_folio(struct file *file, struct folio *folio, + unsigned int offset, unsigned int count); /* * Try to write back everything synchronously (but check the -- cgit v1.2.3 From 4cbf76948c457f0beb9f184ebb21341c8235846a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Jan 2023 16:33:44 -0500 Subject: NFS: Remove unused function nfs_wb_page() Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index a61bfd52d551..45c44211e50e 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -580,7 +580,6 @@ extern int nfs_update_folio(struct file *file, struct folio *folio, extern int nfs_sync_inode(struct inode *inode); extern int nfs_wb_all(struct inode *inode); extern int nfs_wb_folio(struct inode *inode, struct folio *folio); -extern int nfs_wb_page(struct inode *inode, struct page *page); int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio); extern int nfs_commit_inode(struct inode *, int); extern struct nfs_commit_data *nfs_commitdata_alloc(void); -- cgit v1.2.3 From 70e9db69f927bb378db9aaa807cc83ae550779a9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Jan 2023 16:33:47 -0500 Subject: NFS: Clean up O_DIRECT request allocation Rather than adjusting the index+offset after the call to nfs_create_request(), add a function nfs_page_create_from_page() that takes an offset. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 3c71493d5cc3..a2f1ca657623 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -121,10 +121,11 @@ struct nfs_pageio_descriptor { #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) -extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, - struct page *page, - unsigned int offset, - unsigned int count); +extern struct nfs_page *nfs_page_create_from_page(struct nfs_open_context *ctx, + struct page *page, + unsigned int pgbase, + loff_t offset, + unsigned int count); extern struct nfs_page *nfs_page_create_from_folio(struct nfs_open_context *ctx, struct folio *folio, unsigned int offset, -- cgit v1.2.3 From 3bd940030752a33ff665eefdd74a1cdb74a4f9b0 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Wed, 25 Jan 2023 21:00:44 +0100 Subject: dm: add missing SPDX-License-Indentifiers 'GPL-2.0-only' is used instead of 'GPL-2.0' because SPDX has deprecated its use. Suggested-by: John Wiele Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 1 + include/linux/dm-bufio.h | 1 + include/linux/dm-dirty-log.h | 1 + include/linux/dm-io.h | 1 + include/linux/dm-kcopyd.h | 1 + include/linux/dm-region-hash.h | 1 + 6 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 04c6acf7faaa..425fa9be648c 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2001 Sistina Software (UK) Limited. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. diff --git a/include/linux/dm-bufio.h b/include/linux/dm-bufio.h index 15d9e15ca830..798379b8597a 100644 --- a/include/linux/dm-bufio.h +++ b/include/linux/dm-bufio.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2009-2011 Red Hat, Inc. * diff --git a/include/linux/dm-dirty-log.h b/include/linux/dm-dirty-log.h index 7084503c3405..c12d40e332c1 100644 --- a/include/linux/dm-dirty-log.h +++ b/include/linux/dm-dirty-log.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2003 Sistina Software * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index 8e1c4ab5df04..6846019da2a5 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2003 Sistina Software * Copyright (C) 2004 - 2008 Red Hat, Inc. All rights reserved. diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h index c1707ee5b540..6b4ece85a2a0 100644 --- a/include/linux/dm-kcopyd.h +++ b/include/linux/dm-kcopyd.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2001 - 2003 Sistina Software * Copyright (C) 2004 - 2008 Red Hat, Inc. All rights reserved. diff --git a/include/linux/dm-region-hash.h b/include/linux/dm-region-hash.h index 9e2a7a401df5..095541c59123 100644 --- a/include/linux/dm-region-hash.h +++ b/include/linux/dm-region-hash.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2003 Sistina Software Limited. * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. -- cgit v1.2.3 From 86a3238c7b9b759cb864f4f768ab2e24687dc0e6 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Wed, 25 Jan 2023 21:14:58 +0100 Subject: dm: change "unsigned" to "unsigned int" Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 38 +++++++++++++++++++------------------- include/linux/dm-bufio.h | 12 ++++++------ include/linux/dm-dirty-log.h | 6 +++--- include/linux/dm-io.h | 8 ++++---- include/linux/dm-kcopyd.h | 22 +++++++++++----------- include/linux/dm-region-hash.h | 2 +- 6 files changed, 44 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 425fa9be648c..9881e772b68c 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -88,10 +88,10 @@ typedef int (*dm_preresume_fn) (struct dm_target *ti); typedef void (*dm_resume_fn) (struct dm_target *ti); typedef void (*dm_status_fn) (struct dm_target *ti, status_type_t status_type, - unsigned status_flags, char *result, unsigned maxlen); + unsigned int status_flags, char *result, unsigned int maxlen); -typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv, - char *result, unsigned maxlen); +typedef int (*dm_message_fn) (struct dm_target *ti, unsigned int argc, char **argv, + char *result, unsigned int maxlen); typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, struct block_device **bdev); @@ -188,7 +188,7 @@ struct target_type { uint64_t features; const char *name; struct module *module; - unsigned version[3]; + unsigned int version[3]; dm_ctr_fn ctr; dm_dtr_fn dtr; dm_map_fn map; @@ -314,31 +314,31 @@ struct dm_target { * It is a responsibility of the target driver to remap these bios * to the real underlying devices. */ - unsigned num_flush_bios; + unsigned int num_flush_bios; /* * The number of discard bios that will be submitted to the target. * The bio number can be accessed with dm_bio_get_target_bio_nr. */ - unsigned num_discard_bios; + unsigned int num_discard_bios; /* * The number of secure erase bios that will be submitted to the target. * The bio number can be accessed with dm_bio_get_target_bio_nr. */ - unsigned num_secure_erase_bios; + unsigned int num_secure_erase_bios; /* * The number of WRITE ZEROES bios that will be submitted to the target. * The bio number can be accessed with dm_bio_get_target_bio_nr. */ - unsigned num_write_zeroes_bios; + unsigned int num_write_zeroes_bios; /* * The minimum number of extra bytes allocated in each io for the * target to use. */ - unsigned per_io_data_size; + unsigned int per_io_data_size; /* target specific data */ void *private; @@ -384,7 +384,7 @@ struct dm_target { void *dm_per_bio_data(struct bio *bio, size_t data_size); struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size); -unsigned dm_bio_get_target_bio_nr(const struct bio *bio); +unsigned int dm_bio_get_target_bio_nr(const struct bio *bio); u64 dm_start_time_ns_from_clone(struct bio *bio); @@ -395,7 +395,7 @@ void dm_unregister_target(struct target_type *t); * Target argument parsing. */ struct dm_arg_set { - unsigned argc; + unsigned int argc; char **argv; }; @@ -404,8 +404,8 @@ struct dm_arg_set { * the error message to use if the number is found to be outside that range. */ struct dm_arg { - unsigned min; - unsigned max; + unsigned int min; + unsigned int max; char *error; }; @@ -414,7 +414,7 @@ struct dm_arg { * returning -EINVAL and setting *error. */ int dm_read_arg(const struct dm_arg *arg, struct dm_arg_set *arg_set, - unsigned *value, char **error); + unsigned int *value, char **error); /* * Process the next argument as the start of a group containing between @@ -422,7 +422,7 @@ int dm_read_arg(const struct dm_arg *arg, struct dm_arg_set *arg_set, * *num_args or, if invalid, return -EINVAL and set *error. */ int dm_read_arg_group(const struct dm_arg *arg, struct dm_arg_set *arg_set, - unsigned *num_args, char **error); + unsigned int *num_args, char **error); /* * Return the current argument and shift to the next. @@ -432,7 +432,7 @@ const char *dm_shift_arg(struct dm_arg_set *as); /* * Move through num_args arguments. */ -void dm_consume_args(struct dm_arg_set *as, unsigned num_args); +void dm_consume_args(struct dm_arg_set *as, unsigned int num_args); /*----------------------------------------------------------------- * Functions for creating and manipulating mapped devices. @@ -462,7 +462,7 @@ void *dm_get_mdptr(struct mapped_device *md); /* * A device can still be used while suspended, but I/O is deferred. */ -int dm_suspend(struct mapped_device *md, unsigned suspend_flags); +int dm_suspend(struct mapped_device *md, unsigned int suspend_flags); int dm_resume(struct mapped_device *md); /* @@ -482,7 +482,7 @@ struct gendisk *dm_disk(struct mapped_device *md); int dm_suspended(struct dm_target *ti); int dm_post_suspending(struct dm_target *ti); int dm_noflush_suspending(struct dm_target *ti); -void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors); +void dm_accept_partial_bio(struct bio *bio, unsigned int n_sectors); void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone); union map_info *dm_get_rq_mapinfo(struct request *rq); @@ -526,7 +526,7 @@ int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo); * First create an empty table. */ int dm_table_create(struct dm_table **result, fmode_t mode, - unsigned num_targets, struct mapped_device *md); + unsigned int num_targets, struct mapped_device *md); /* * Then call this once for each target. diff --git a/include/linux/dm-bufio.h b/include/linux/dm-bufio.h index 798379b8597a..2056743aaaaa 100644 --- a/include/linux/dm-bufio.h +++ b/include/linux/dm-bufio.h @@ -27,8 +27,8 @@ struct dm_buffer; * Create a buffered IO cache on a given device */ struct dm_bufio_client * -dm_bufio_client_create(struct block_device *bdev, unsigned block_size, - unsigned reserved_buffers, unsigned aux_size, +dm_bufio_client_create(struct block_device *bdev, unsigned int block_size, + unsigned int reserved_buffers, unsigned int aux_size, void (*alloc_callback)(struct dm_buffer *), void (*write_callback)(struct dm_buffer *), unsigned int flags); @@ -82,7 +82,7 @@ void *dm_bufio_new(struct dm_bufio_client *c, sector_t block, * I/O to finish. */ void dm_bufio_prefetch(struct dm_bufio_client *c, - sector_t block, unsigned n_blocks); + sector_t block, unsigned int n_blocks); /* * Release a reference obtained with dm_bufio_{read,get,new}. The data @@ -107,7 +107,7 @@ void dm_bufio_mark_buffer_dirty(struct dm_buffer *b); * write the specified part of the buffer or it may write a larger superset. */ void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b, - unsigned start, unsigned end); + unsigned int start, unsigned int end); /* * Initiate writing of dirty buffers, without waiting for completion. @@ -153,9 +153,9 @@ void dm_bufio_forget_buffers(struct dm_bufio_client *c, sector_t block, sector_t /* * Set the minimum number of buffers before cleanup happens. */ -void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n); +void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned int n); -unsigned dm_bufio_get_block_size(struct dm_bufio_client *c); +unsigned int dm_bufio_get_block_size(struct dm_bufio_client *c); sector_t dm_bufio_get_device_size(struct dm_bufio_client *c); struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c); sector_t dm_bufio_get_block_number(struct dm_buffer *b); diff --git a/include/linux/dm-dirty-log.h b/include/linux/dm-dirty-log.h index c12d40e332c1..a3eb7490d205 100644 --- a/include/linux/dm-dirty-log.h +++ b/include/linux/dm-dirty-log.h @@ -34,7 +34,7 @@ struct dm_dirty_log_type { struct list_head list; int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti, - unsigned argc, char **argv); + unsigned int argc, char **argv); void (*dtr)(struct dm_dirty_log *log); /* @@ -117,7 +117,7 @@ struct dm_dirty_log_type { * Support function for mirror status requests. */ int (*status)(struct dm_dirty_log *log, status_type_t status_type, - char *result, unsigned maxlen); + char *result, unsigned int maxlen); /* * is_remote_recovering is necessary for cluster mirroring. It provides @@ -140,7 +140,7 @@ int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type); struct dm_dirty_log *dm_dirty_log_create(const char *type_name, struct dm_target *ti, int (*flush_callback_fn)(struct dm_target *ti), - unsigned argc, char **argv); + unsigned int argc, char **argv); void dm_dirty_log_destroy(struct dm_dirty_log *log); #endif /* __KERNEL__ */ diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index 6846019da2a5..7595142f3fc5 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -27,7 +27,7 @@ struct page_list { struct page *page; }; -typedef void (*io_notify_fn)(unsigned long error, void *context); +typedef void (*io_notify_fn)(unsigned int long error, void *context); enum dm_io_mem_type { DM_IO_PAGE_LIST,/* Page list */ @@ -39,7 +39,7 @@ enum dm_io_mem_type { struct dm_io_memory { enum dm_io_mem_type type; - unsigned offset; + unsigned int offset; union { struct page_list *pl; @@ -79,8 +79,8 @@ void dm_io_client_destroy(struct dm_io_client *client); * Each bit in the optional 'sync_error_bits' bitset indicates whether an * error occurred doing io to the corresponding region. */ -int dm_io(struct dm_io_request *io_req, unsigned num_regions, - struct dm_io_region *region, unsigned long *sync_error_bits); +int dm_io(struct dm_io_request *io_req, unsigned int num_regions, + struct dm_io_region *region, unsigned int long *sync_error_bits); #endif /* __KERNEL__ */ #endif /* _LINUX_DM_IO_H */ diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h index 6b4ece85a2a0..51fb1af0b63e 100644 --- a/include/linux/dm-kcopyd.h +++ b/include/linux/dm-kcopyd.h @@ -24,11 +24,11 @@ #define DM_KCOPYD_WRITE_SEQ 2 struct dm_kcopyd_throttle { - unsigned throttle; - unsigned num_io_jobs; - unsigned io_period; - unsigned total_period; - unsigned last_jiffies; + unsigned int throttle; + unsigned int num_io_jobs; + unsigned int io_period; + unsigned int total_period; + unsigned int last_jiffies; }; /* @@ -61,12 +61,12 @@ void dm_kcopyd_client_flush(struct dm_kcopyd_client *kc); * read_err is a boolean, * write_err is a bitset, with 1 bit for each destination region */ -typedef void (*dm_kcopyd_notify_fn)(int read_err, unsigned long write_err, +typedef void (*dm_kcopyd_notify_fn)(int read_err, unsigned int long write_err, void *context); void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, - unsigned num_dests, struct dm_io_region *dests, - unsigned flags, dm_kcopyd_notify_fn fn, void *context); + unsigned int num_dests, struct dm_io_region *dests, + unsigned int flags, dm_kcopyd_notify_fn fn, void *context); /* * Prepare a callback and submit it via the kcopyd thread. @@ -81,11 +81,11 @@ void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, */ void *dm_kcopyd_prepare_callback(struct dm_kcopyd_client *kc, dm_kcopyd_notify_fn fn, void *context); -void dm_kcopyd_do_callback(void *job, int read_err, unsigned long write_err); +void dm_kcopyd_do_callback(void *job, int read_err, unsigned int long write_err); void dm_kcopyd_zero(struct dm_kcopyd_client *kc, - unsigned num_dests, struct dm_io_region *dests, - unsigned flags, dm_kcopyd_notify_fn fn, void *context); + unsigned int num_dests, struct dm_io_region *dests, + unsigned int flags, dm_kcopyd_notify_fn fn, void *context); #endif /* __KERNEL__ */ #endif /* _LINUX_DM_KCOPYD_H */ diff --git a/include/linux/dm-region-hash.h b/include/linux/dm-region-hash.h index 095541c59123..4086098a0d8e 100644 --- a/include/linux/dm-region-hash.h +++ b/include/linux/dm-region-hash.h @@ -38,7 +38,7 @@ struct dm_region_hash *dm_region_hash_create( struct bio_list *bios), void (*wakeup_workers)(void *context), void (*wakeup_all_recovery_waiters)(void *context), - sector_t target_begin, unsigned max_recovery, + sector_t target_begin, unsigned int max_recovery, struct dm_dirty_log *log, uint32_t region_size, region_t nr_regions); void dm_region_hash_destroy(struct dm_region_hash *rh); -- cgit v1.2.3 From 44bc08ed63db7a852bd1ba16611b700ee666091c Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Wed, 1 Feb 2023 21:51:04 +0100 Subject: dm: enclose complex macros into parentheses where possible Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 9881e772b68c..aa68267bc36f 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -613,8 +613,7 @@ void dm_destroy_crypto_profile(struct blk_crypto_profile *profile); #define DMDEBUG(fmt, ...) pr_debug(DM_FMT(fmt), ##__VA_ARGS__) #define DMDEBUG_LIMIT(fmt, ...) pr_debug_ratelimited(DM_FMT(fmt), ##__VA_ARGS__) -#define DMEMIT(x...) sz += ((sz >= maxlen) ? \ - 0 : scnprintf(result + sz, maxlen - sz, x)) +#define DMEMIT(x...) (sz += ((sz >= maxlen) ? 0 : scnprintf(result + sz, maxlen - sz, x))) #define DMEMIT_TARGET_NAME_VERSION(y) \ DMEMIT("target_name=%s,target_version=%u.%u.%u", \ -- cgit v1.2.3 From a4a82ce3d24d4409143a7b7b980072ada6e20b2a Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Thu, 26 Jan 2023 15:48:30 +0100 Subject: dm: correct block comments format. Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 18 ++++++++++++------ include/linux/dm-dirty-log.h | 2 +- include/linux/dm-region-hash.h | 6 ++++-- 3 files changed, 17 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index aa68267bc36f..7975483816e4 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -434,10 +434,12 @@ const char *dm_shift_arg(struct dm_arg_set *as); */ void dm_consume_args(struct dm_arg_set *as, unsigned int num_args); -/*----------------------------------------------------------------- +/* + *---------------------------------------------------------------- * Functions for creating and manipulating mapped devices. * Drop the reference with dm_put when you finish with the object. - *---------------------------------------------------------------*/ + *---------------------------------------------------------------- + */ /* * DM_ANY_MINOR chooses the next available minor number. @@ -518,9 +520,11 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md); int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo); int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo); -/*----------------------------------------------------------------- +/* + *--------------------------------------------------------------- * Functions for manipulating device-mapper tables. - *---------------------------------------------------------------*/ + *--------------------------------------------------------------- + */ /* * First create an empty table. @@ -594,9 +598,11 @@ struct dm_table *dm_swap_table(struct mapped_device *md, */ void dm_destroy_crypto_profile(struct blk_crypto_profile *profile); -/*----------------------------------------------------------------- +/* + *--------------------------------------------------------------- * Macros. - *---------------------------------------------------------------*/ + *--------------------------------------------------------------- + */ #define DM_NAME "device-mapper" #define DM_FMT(fmt) DM_NAME ": " DM_MSG_PREFIX ": " fmt "\n" diff --git a/include/linux/dm-dirty-log.h b/include/linux/dm-dirty-log.h index a3eb7490d205..0b10faedb26a 100644 --- a/include/linux/dm-dirty-log.h +++ b/include/linux/dm-dirty-log.h @@ -97,7 +97,7 @@ struct dm_dirty_log_type { * Do not confuse this function with 'in_sync()', one * tells you if an area is synchronised, the other * assigns recovery work. - */ + */ int (*get_resync_work)(struct dm_dirty_log *log, region_t *region); /* diff --git a/include/linux/dm-region-hash.h b/include/linux/dm-region-hash.h index 4086098a0d8e..3079ed93dd2d 100644 --- a/include/linux/dm-region-hash.h +++ b/include/linux/dm-region-hash.h @@ -13,9 +13,11 @@ #include -/*----------------------------------------------------------------- +/* + *---------------------------------------------------------------- * Region hash - *----------------------------------------------------------------*/ + *---------------------------------------------------------------- + */ struct dm_region_hash; struct dm_region; -- cgit v1.2.3 From ade1229caed9433921b69e20bd6fadf1bba9558e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 14 Feb 2023 15:01:21 +0100 Subject: dma-mapping: no need to pass a bus_type into get_arch_dma_ops() The get_arch_dma_ops() arch-specific function never does anything with the struct bus_type that is passed into it, so remove it entirely as it is not needed. Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Thomas Bogendoerfer Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: "David S. Miller" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: x86@kernel.org Cc: "H. Peter Anvin" Cc: Marek Szyprowski Cc: Robin Murphy Cc: Konrad Rzeszutek Wilk Cc: linux-alpha@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-mips@vger.kernel.org Cc: linux-parisc@vger.kernel.org Cc: sparclinux@vger.kernel.org Cc: iommu@lists.linux.dev Cc: linux-arch@vger.kernel.org Acked-by: Christoph Hellwig Reviewed-by: Arnd Bergmann Link: https://lore.kernel.org/r/20230214140121.131859-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/dma-map-ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index d678afeb8a13..41bf4bdb117a 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -90,7 +90,7 @@ static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { if (dev->dma_ops) return dev->dma_ops; - return get_arch_dma_ops(dev->bus); + return get_arch_dma_ops(); } static inline void set_dma_ops(struct device *dev, -- cgit v1.2.3 From d16c0cd27331179daa86a3a489f50ce409121c80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= Date: Mon, 10 Oct 2022 08:43:59 +0200 Subject: docs: driver-api: virtio: virtio on Linux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Basic doc about Virtio on Linux and a short tutorial on Virtio drivers. includes the following fixup: virtio: fix virtio_config_ops kerneldocs Fixes two warning messages when building htmldocs: warning: duplicate section name 'Note' warning: expecting prototype for virtio_config_ops(). Prototype was for vq_callback_t() instead Message-Id: <20221010064359.1324353-2-ricardo.canuelo@collabora.com> Signed-off-by: Ricardo Cañuelo Reviewed-by: Cornelia Huck Message-Id: <20221220100035.2712449-1-ricardo.canuelo@collabora.com> Reported-by: Stephen Rothwell Reviewed-by: Bagas Sanjaya Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 4b517649cfe8..2b3438de2c4d 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -16,8 +16,10 @@ struct virtio_shm_region { u64 len; }; +typedef void vq_callback_t(struct virtqueue *); + /** - * virtio_config_ops - operations for configuring a virtio device + * struct virtio_config_ops - operations for configuring a virtio device * Note: Do not assume that a transport implements all of the operations * getting/setting a value as a simple read/write! Generally speaking, * any of @get/@set, @get_status/@set_status, or @get_features/ @@ -69,7 +71,8 @@ struct virtio_shm_region { * vdev: the virtio_device * This sends the driver feature bits to the device: it can change * the dev->feature bits if it wants. - * Note: despite the name this can be called any number of times. + * Note that despite the name this can be called any number of + * times. * Returns 0 on success or error status * @bus_name: return the bus name associated with the device (optional) * vdev: the virtio_device @@ -91,7 +94,6 @@ struct virtio_shm_region { * If disable_vq_and_reset is set, then enable_vq_after_reset must also be * set. */ -typedef void vq_callback_t(struct virtqueue *); struct virtio_config_ops { void (*get)(struct virtio_device *vdev, unsigned offset, void *buf, unsigned len); -- cgit v1.2.3 From 88f94c782b0ee2297685764f942e7c176d6b89aa Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 14 Feb 2023 22:41:19 +0100 Subject: mmc: core: support setting card detect interrupt from drivers On certain platforms like Amlogic Meson gpiod_to_irq() isn't supported due to the design of gpio / interrupt controller. Therefore provide an option for drivers to pass the card detect interrupt number (retrieved e.g. from device tree) to mmc core. Suggested-by refers to the mechanism to pass and store the interrupt. Suggested-by: Ulf Hansson Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/5777f38b-465f-ce48-a87f-5eb8b3c57b0a@gmail.com Signed-off-by: Ulf Hansson --- include/linux/mmc/slot-gpio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h index 4ae2f2908f99..5d3d15e97868 100644 --- a/include/linux/mmc/slot-gpio.h +++ b/include/linux/mmc/slot-gpio.h @@ -15,6 +15,7 @@ struct mmc_host; int mmc_gpio_get_ro(struct mmc_host *host); int mmc_gpio_get_cd(struct mmc_host *host); +void mmc_gpio_set_cd_irq(struct mmc_host *host, int irq); int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id, unsigned int idx, bool override_active_level, unsigned int debounce); -- cgit v1.2.3 From 007ed7900aae514986770f6e14069ebd99c4a4c6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 12 Feb 2023 23:03:24 -0800 Subject: i3c: fix device.h kernel-doc warnings Fix all kernel-doc warnings in : include/linux/i3c/device.h:27: warning: contents before sections include/linux/i3c/device.h:196: warning: Excess function parameter 'dev' description in 'dev_to_i3cdev' Fixes: fa838c8ce537 ("i3c: move dev_to_i3cdev() to use container_of_const()") Fixes: 3a379bbcea0a ("i3c: Add core I3C infrastructure") Signed-off-by: Randy Dunlap Cc: Boris Brezillon Cc: Greg Kroah-Hartman Cc: Alexandre Belloni Cc: linux-i3c@lists.infradead.org Link: https://lore.kernel.org/r/20230213070324.1564-1-rdunlap@infradead.org Signed-off-by: Greg Kroah-Hartman --- include/linux/i3c/device.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h index ce115ef08fec..90fa83464f00 100644 --- a/include/linux/i3c/device.h +++ b/include/linux/i3c/device.h @@ -18,17 +18,18 @@ /** * enum i3c_error_code - I3C error codes * + * @I3C_ERROR_UNKNOWN: unknown error, usually means the error is not I3C + * related + * @I3C_ERROR_M0: M0 error + * @I3C_ERROR_M1: M1 error + * @I3C_ERROR_M2: M2 error + * * These are the standard error codes as defined by the I3C specification. * When -EIO is returned by the i3c_device_do_priv_xfers() or * i3c_device_send_hdr_cmds() one can check the error code in * &struct_i3c_priv_xfer.err or &struct i3c_hdr_cmd.err to get a better idea of * what went wrong. * - * @I3C_ERROR_UNKNOWN: unknown error, usually means the error is not I3C - * related - * @I3C_ERROR_M0: M0 error - * @I3C_ERROR_M1: M1 error - * @I3C_ERROR_M2: M2 error */ enum i3c_error_code { I3C_ERROR_UNKNOWN = 0, @@ -189,7 +190,7 @@ struct device *i3cdev_to_dev(struct i3c_device *i3cdev); /** * dev_to_i3cdev() - Returns the I3C device containing @dev - * @dev: device object + * @__dev: device object * * Return: a pointer to an I3C device object. */ -- cgit v1.2.3 From 5e7b9a6ae8c352819a2d998a065910b536de0e8c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 16 Feb 2023 07:24:19 +0100 Subject: swiotlb: remove swiotlb_max_segment swiotlb_max_segment has always been a bogus API, so remove it now that the remaining callers are gone. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- include/linux/swiotlb.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 35bc4e281c21..bcef10e20ea4 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -121,7 +121,6 @@ static inline bool is_swiotlb_force_bounce(struct device *dev) void swiotlb_init(bool addressing_limited, unsigned int flags); void __init swiotlb_exit(void); -unsigned int swiotlb_max_segment(void); size_t swiotlb_max_mapping_size(struct device *dev); bool is_swiotlb_active(struct device *dev); void __init swiotlb_adjust_size(unsigned long size); @@ -140,10 +139,6 @@ static inline bool is_swiotlb_force_bounce(struct device *dev) static inline void swiotlb_exit(void) { } -static inline unsigned int swiotlb_max_segment(void) -{ - return 0; -} static inline size_t swiotlb_max_mapping_size(struct device *dev) { return SIZE_MAX; -- cgit v1.2.3 From 5720a18baa4686d56d0a235e6ecbcc55f8d716d7 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 16 Feb 2023 11:34:19 -0800 Subject: hwmon: Deprecate [devm_]hwmon_device_register_with_groups Even though the hardware monitoring documentation already stated that new drivers should use [devm_]devm_hwmon_device_register_with_info() to register with the hardware monitoring subsystem, we still get submissions for new drivers using the older APIs. There is no benefit to use those APIs. On the contrary, using the older APIs results in substantially larger code size. Explicitly deprecate [devm_]hwmon_device_register_with_groups() to ensure that all new drivers use the latest API. Signed-off-by: Guenter Roeck --- include/linux/hwmon.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index 14325f93c6b2..c1b62384b6ee 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -436,6 +436,10 @@ struct hwmon_chip_info { /* hwmon_device_register() is deprecated */ struct device *hwmon_device_register(struct device *dev); +/* + * hwmon_device_register_with_groups() and + * devm_hwmon_device_register_with_groups() are deprecated. + */ struct device * hwmon_device_register_with_groups(struct device *dev, const char *name, void *drvdata, -- cgit v1.2.3 From 15ef6a982f40a2b53b057dad24f00c3fb43e7e70 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:15:49 +0100 Subject: lib/stackdepot: put functions in logical order Patch series "lib/stackdepot: fixes and clean-ups", v2. A set of fixes, comments, and clean-ups I came up with while reading the stack depot code. This patch (of 18): Put stack depot functions' declarations and definitions in a more logical order: 1. Functions that save stack traces into stack depot. 2. Functions that fetch and print stack traces. 3. stack_depot_get_extra_bits that operates on stack depot handles and does not interact with the stack depot storage. No functional changes. Link: https://lkml.kernel.org/r/cover.1676063693.git.andreyknvl@google.com Link: https://lkml.kernel.org/r/daca1319b665d826b94c596b992a8d8117846147.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Cc: Evgenii Stepanov Cc: Marco Elver Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/stackdepot.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 9ca7798d7a31..1296a6eeaec0 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -14,17 +14,13 @@ #include typedef u32 depot_stack_handle_t; + /* * Number of bits in the handle that stack depot doesn't use. Users may store * information in them. */ #define STACK_DEPOT_EXTRA_BITS 5 -depot_stack_handle_t __stack_depot_save(unsigned long *entries, - unsigned int nr_entries, - unsigned int extra_bits, - gfp_t gfp_flags, bool can_alloc); - /* * Every user of stack depot has to call stack_depot_init() during its own init * when it's decided that it will be calling stack_depot_save() later. This is @@ -59,17 +55,22 @@ static inline void stack_depot_want_early_init(void) { } static inline int stack_depot_early_init(void) { return 0; } #endif +depot_stack_handle_t __stack_depot_save(unsigned long *entries, + unsigned int nr_entries, + unsigned int extra_bits, + gfp_t gfp_flags, bool can_alloc); + depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags); unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries); -unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle); +void stack_depot_print(depot_stack_handle_t stack); int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, int spaces); -void stack_depot_print(depot_stack_handle_t stack); +unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle); #endif -- cgit v1.2.3 From 1c0310add78e7e47e3357c24369b61453a5a72eb Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:15:51 +0100 Subject: lib/stackdepot, mm: rename stack_depot_want_early_init Rename stack_depot_want_early_init to stack_depot_request_early_init. The old name is confusing, as it hints at returning some kind of intention of stack depot. The new name reflects that this function requests an action from stack depot instead. No functional changes. [akpm@linux-foundation.org: update mm/kmemleak.c] Link: https://lkml.kernel.org/r/359f31bf67429a06e630b4395816a967214ef753.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Acked-by: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/stackdepot.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 1296a6eeaec0..c4e3abc16b16 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -31,26 +31,26 @@ typedef u32 depot_stack_handle_t; * enabled as part of mm_init(), for subsystems where it's known at compile time * that stack depot will be used. * - * Another alternative is to call stack_depot_want_early_init(), when the + * Another alternative is to call stack_depot_request_early_init(), when the * decision to use stack depot is taken e.g. when evaluating kernel boot * parameters, which precedes the enablement point in mm_init(). * - * stack_depot_init() and stack_depot_want_early_init() can be called regardless - * of CONFIG_STACKDEPOT and are no-op when disabled. The actual save/fetch/print - * functions should only be called from code that makes sure CONFIG_STACKDEPOT - * is enabled. + * stack_depot_init() and stack_depot_request_early_init() can be called + * regardless of CONFIG_STACKDEPOT and are no-op when disabled. The actual + * save/fetch/print functions should only be called from code that makes sure + * CONFIG_STACKDEPOT is enabled. */ #ifdef CONFIG_STACKDEPOT int stack_depot_init(void); -void __init stack_depot_want_early_init(void); +void __init stack_depot_request_early_init(void); /* This is supposed to be called only from mm_init() */ int __init stack_depot_early_init(void); #else static inline int stack_depot_init(void) { return 0; } -static inline void stack_depot_want_early_init(void) { } +static inline void stack_depot_request_early_init(void) { } static inline int stack_depot_early_init(void) { return 0; } #endif -- cgit v1.2.3 From 36aa1e6779c3c6f8e0d4552544214f5cffe3c287 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:16:03 +0100 Subject: lib/stacktrace, kasan, kmsan: rework extra_bits interface The current implementation of the extra_bits interface is confusing: passing extra_bits to __stack_depot_save makes it seem that the extra bits are somehow stored in stack depot. In reality, they are only embedded into a stack depot handle and are not used within stack depot. Drop the extra_bits argument from __stack_depot_save and instead provide a new stack_depot_set_extra_bits function (similar to the exsiting stack_depot_get_extra_bits) that saves extra bits into a stack depot handle. Update the callers of __stack_depot_save to use the new interace. This change also fixes a minor issue in the old code: __stack_depot_save does not return NULL if saving stack trace fails and extra_bits is used. Link: https://lkml.kernel.org/r/317123b5c05e2f82854fc55d8b285e0869d3cb77.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Signed-off-by: Andrew Morton --- include/linux/stackdepot.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index c4e3abc16b16..267f4b2634ee 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -57,7 +57,6 @@ static inline int stack_depot_early_init(void) { return 0; } depot_stack_handle_t __stack_depot_save(unsigned long *entries, unsigned int nr_entries, - unsigned int extra_bits, gfp_t gfp_flags, bool can_alloc); depot_stack_handle_t stack_depot_save(unsigned long *entries, @@ -71,6 +70,9 @@ void stack_depot_print(depot_stack_handle_t stack); int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, int spaces); +depot_stack_handle_t __must_check stack_depot_set_extra_bits( + depot_stack_handle_t handle, unsigned int extra_bits); + unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle); #endif -- cgit v1.2.3 From b232b9995a6dbaafe19d07d81acc039bc84bd569 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:16:05 +0100 Subject: lib/stackdepot: various comments clean-ups Clean up comments in include/linux/stackdepot.h and lib/stackdepot.c: 1. Rework the initialization comment in stackdepot.h. 2. Rework the header comment in stackdepot.c. 3. Various clean-ups for other comments. Also adjust whitespaces for find_stack and depot_alloc_stack call sites. No functional changes. Link: https://lkml.kernel.org/r/5836231b7954355e2311fc9b5870f697ea8e1f7d.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Signed-off-by: Andrew Morton --- include/linux/stackdepot.h | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 267f4b2634ee..afdf8ee7b597 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -1,11 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* - * A generic stack depot implementation + * Stack depot - a stack trace storage that avoids duplication. * * Author: Alexander Potapenko * Copyright (C) 2016 Google, Inc. * - * Based on code by Dmitry Chernenkov. + * Based on the code by Dmitry Chernenkov. */ #ifndef _LINUX_STACKDEPOT_H @@ -17,35 +17,37 @@ typedef u32 depot_stack_handle_t; /* * Number of bits in the handle that stack depot doesn't use. Users may store - * information in them. + * information in them via stack_depot_set/get_extra_bits. */ #define STACK_DEPOT_EXTRA_BITS 5 /* - * Every user of stack depot has to call stack_depot_init() during its own init - * when it's decided that it will be calling stack_depot_save() later. This is - * recommended for e.g. modules initialized later in the boot process, when - * slab_is_available() is true. + * Using stack depot requires its initialization, which can be done in 3 ways: * - * The alternative is to select STACKDEPOT_ALWAYS_INIT to have stack depot - * enabled as part of mm_init(), for subsystems where it's known at compile time - * that stack depot will be used. + * 1. Selecting CONFIG_STACKDEPOT_ALWAYS_INIT. This option is suitable in + * scenarios where it's known at compile time that stack depot will be used. + * Enabling this config makes the kernel initialize stack depot in mm_init(). * - * Another alternative is to call stack_depot_request_early_init(), when the - * decision to use stack depot is taken e.g. when evaluating kernel boot - * parameters, which precedes the enablement point in mm_init(). + * 2. Calling stack_depot_request_early_init() during early boot, before + * stack_depot_early_init() in mm_init() completes. For example, this can + * be done when evaluating kernel boot parameters. + * + * 3. Calling stack_depot_init(). Possible after boot is complete. This option + * is recommended for modules initialized later in the boot process, after + * mm_init() completes. * * stack_depot_init() and stack_depot_request_early_init() can be called - * regardless of CONFIG_STACKDEPOT and are no-op when disabled. The actual - * save/fetch/print functions should only be called from code that makes sure - * CONFIG_STACKDEPOT is enabled. + * regardless of whether CONFIG_STACKDEPOT is enabled and are no-op when this + * config is disabled. The save/fetch/print stack depot functions can only be + * called from the code that makes sure CONFIG_STACKDEPOT is enabled _and_ + * initializes stack depot via one of the ways listed above. */ #ifdef CONFIG_STACKDEPOT int stack_depot_init(void); void __init stack_depot_request_early_init(void); -/* This is supposed to be called only from mm_init() */ +/* Must be only called from mm_init(). */ int __init stack_depot_early_init(void); #else static inline int stack_depot_init(void) { return 0; } -- cgit v1.2.3 From 0621d160f1003a8aedd3628133568ecffdd724f7 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Fri, 10 Feb 2023 22:16:06 +0100 Subject: lib/stackdepot: move documentation comments to stackdepot.h Move all interface- and usage-related documentation comments to include/linux/stackdepot.h. It makes sense to have them in the header where they are available to the interface users. [akpm@linux-foundation.org: grammar fix, per Alexander] Link: https://lkml.kernel.org/r/fbfee41495b306dd8881f9b1c1b80999c885e82f.1676063693.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Reviewed-by: Alexander Potapenko Signed-off-by: Andrew Morton --- include/linux/stackdepot.h | 87 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index afdf8ee7b597..e58306783d8e 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -2,6 +2,17 @@ /* * Stack depot - a stack trace storage that avoids duplication. * + * Stack depot is intended to be used by subsystems that need to store and + * later retrieve many potentially duplicated stack traces without wasting + * memory. + * + * For example, KASAN needs to save allocation and free stack traces for each + * object. Storing two stack traces per object requires a lot of memory (e.g. + * SLUB_DEBUG needs 256 bytes per object for that). Since allocation and free + * stack traces often repeat, using stack depot allows to save about 100x space. + * + * Stack traces are never removed from the stack depot. + * * Author: Alexander Potapenko * Copyright (C) 2016 Google, Inc. * @@ -57,24 +68,100 @@ static inline void stack_depot_request_early_init(void) { } static inline int stack_depot_early_init(void) { return 0; } #endif +/** + * __stack_depot_save - Save a stack trace to stack depot + * + * @entries: Pointer to the stack trace + * @nr_entries: Number of frames in the stack + * @alloc_flags: Allocation GFP flags + * @can_alloc: Allocate stack pools (increased chance of failure if false) + * + * Saves a stack trace from @entries array of size @nr_entries. If @can_alloc is + * %true, stack depot can replenish the stack pools in case no space is left + * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids + * any allocations and fails if no space is left to store the stack trace. + * + * If the provided stack trace comes from the interrupt context, only the part + * up to the interrupt entry is saved. + * + * Context: Any context, but setting @can_alloc to %false is required if + * alloc_pages() cannot be used from the current context. Currently + * this is the case for contexts where neither %GFP_ATOMIC nor + * %GFP_NOWAIT can be used (NMI, raw_spin_lock). + * + * Return: Handle of the stack struct stored in depot, 0 on failure + */ depot_stack_handle_t __stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags, bool can_alloc); +/** + * stack_depot_save - Save a stack trace to stack depot + * + * @entries: Pointer to the stack trace + * @nr_entries: Number of frames in the stack + * @alloc_flags: Allocation GFP flags + * + * Context: Contexts where allocations via alloc_pages() are allowed. + * See __stack_depot_save() for more details. + * + * Return: Handle of the stack trace stored in depot, 0 on failure + */ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags); +/** + * stack_depot_fetch - Fetch a stack trace from stack depot + * + * @handle: Stack depot handle returned from stack_depot_save() + * @entries: Pointer to store the address of the stack trace + * + * Return: Number of frames for the fetched stack + */ unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries); +/** + * stack_depot_print - Print a stack trace from stack depot + * + * @stack: Stack depot handle returned from stack_depot_save() + */ void stack_depot_print(depot_stack_handle_t stack); +/** + * stack_depot_snprint - Print a stack trace from stack depot into a buffer + * + * @handle: Stack depot handle returned from stack_depot_save() + * @buf: Pointer to the print buffer + * @size: Size of the print buffer + * @spaces: Number of leading spaces to print + * + * Return: Number of bytes printed + */ int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, int spaces); +/** + * stack_depot_set_extra_bits - Set extra bits in a stack depot handle + * + * @handle: Stack depot handle returned from stack_depot_save() + * @extra_bits: Value to set the extra bits + * + * Return: Stack depot handle with extra bits set + * + * Stack depot handles have a few unused bits, which can be used for storing + * user-specific information. These bits are transparent to the stack depot. + */ depot_stack_handle_t __must_check stack_depot_set_extra_bits( depot_stack_handle_t handle, unsigned int extra_bits); +/** + * stack_depot_get_extra_bits - Retrieve extra bits from a stack depot handle + * + * @handle: Stack depot handle with extra bits saved + * + * Return: Extra bits retrieved from the stack depot handle + */ unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle); #endif -- cgit v1.2.3 From 64c8902ed4418317cd416c566f896bd4a92b2efc Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Mon, 13 Feb 2023 20:34:39 +0800 Subject: migrate_pages: split unmap_and_move() to _unmap() and _move() This is a preparation patch to batch the folio unmapping and moving. In this patch, unmap_and_move() is split to migrate_folio_unmap() and migrate_folio_move(). So, we can batch _unmap() and _move() in different loops later. To pass some information between unmap and move, the original unused dst->mapping and dst->private are used. Link: https://lkml.kernel.org/r/20230213123444.155149-5-ying.huang@intel.com Signed-off-by: "Huang, Ying" Reviewed-by: Baolin Wang Reviewed-by: Xin Hao Cc: Zi Yan Cc: Yang Shi Cc: Oscar Salvador Cc: Matthew Wilcox Cc: Bharata B Rao Cc: Alistair Popple Cc: Minchan Kim Cc: Mike Kravetz Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Signed-off-by: Andrew Morton --- include/linux/migrate.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index bdff950a8bb4..c88b96b48be7 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -18,6 +18,7 @@ struct migration_target_control; * - zero on page migration success; */ #define MIGRATEPAGE_SUCCESS 0 +#define MIGRATEPAGE_UNMAP 1 /** * struct movable_operations - Driver page migration -- cgit v1.2.3 From f7a449f779608efe1941a0e0c4bd7b5f57000be7 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 13 Feb 2023 11:29:22 -0800 Subject: mm: memcontrol: rename memcg_kmem_enabled() Currently there are two kmem-related helper functions with a confusing semantics: memcg_kmem_enabled() and mem_cgroup_kmem_disabled(). The problem is that an obvious expectation memcg_kmem_enabled() == !mem_cgroup_kmem_disabled(), can be false. mem_cgroup_kmem_disabled() is similar to mem_cgroup_disabled(): it returns true only if CONFIG_MEMCG_KMEM is not set or the kmem accounting is disabled using a boot time kernel option "cgroup.memory=nokmem". It never changes the value dynamically. memcg_kmem_enabled() is different: it always returns false until the first non-root memory cgroup will get online (assuming the kernel memory accounting is enabled). It's goal is to improve the performance on systems without the cgroupfs mounted/memory controller enabled or on the systems with only the root memory cgroup. To make things more obvious and avoid potential bugs, let's rename memcg_kmem_enabled() to memcg_kmem_online(). Link: https://lkml.kernel.org/r/20230213192922.1146370-1-roman.gushchin@linux.dev Signed-off-by: Roman Gushchin Acked-by: Muchun Song Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Shakeel Butt Cc: Dennis Zhou Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 35478695cabf..5567319027d1 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1776,24 +1776,24 @@ struct obj_cgroup *get_obj_cgroup_from_page(struct page *page); int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size); void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size); -extern struct static_key_false memcg_kmem_enabled_key; +extern struct static_key_false memcg_kmem_online_key; -static inline bool memcg_kmem_enabled(void) +static inline bool memcg_kmem_online(void) { - return static_branch_likely(&memcg_kmem_enabled_key); + return static_branch_likely(&memcg_kmem_online_key); } static inline int memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order) { - if (memcg_kmem_enabled()) + if (memcg_kmem_online()) return __memcg_kmem_charge_page(page, gfp, order); return 0; } static inline void memcg_kmem_uncharge_page(struct page *page, int order) { - if (memcg_kmem_enabled()) + if (memcg_kmem_online()) __memcg_kmem_uncharge_page(page, order); } @@ -1814,7 +1814,7 @@ static inline void count_objcg_event(struct obj_cgroup *objcg, { struct mem_cgroup *memcg; - if (!memcg_kmem_enabled()) + if (!memcg_kmem_online()) return; rcu_read_lock(); @@ -1854,7 +1854,7 @@ static inline struct obj_cgroup *get_obj_cgroup_from_page(struct page *page) return NULL; } -static inline bool memcg_kmem_enabled(void) +static inline bool memcg_kmem_online(void) { return false; } -- cgit v1.2.3 From 4b7296aa6c6618d6a6840fbe857e4990f626bd90 Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Tue, 17 Jan 2023 15:14:49 +0200 Subject: net/mlx5: Expose bits for querying special mkeys Add needed HW bits to query the values of all special mkeys. Link: https://lore.kernel.org/r/080ebb563a9717c15b1ea75d669aede676df386b.1673960981.git.leon@kernel.org Signed-off-by: Or Har-Toov Reviewed-by: Michael Guralnik Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/mlx5_ifc.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c3d3a2eef7d4..67cfac8fbe46 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1485,7 +1485,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 relaxed_ordering_write[0x1]; u8 relaxed_ordering_read[0x1]; u8 log_max_mkey[0x6]; - u8 reserved_at_f0[0x8]; + u8 reserved_at_f0[0x6]; + u8 terminate_scatter_list_mkey[0x1]; + u8 repeated_mkey[0x1]; u8 dump_fill_mkey[0x1]; u8 reserved_at_f9[0x2]; u8 fast_teardown[0x1]; @@ -5210,7 +5212,11 @@ struct mlx5_ifc_query_special_contexts_out_bits { u8 null_mkey[0x20]; - u8 reserved_at_a0[0x60]; + u8 terminate_scatter_list_mkey[0x20]; + + u8 repeated_mkey[0x20]; + + u8 reserved_at_a0[0x20]; }; struct mlx5_ifc_query_special_contexts_in_bits { -- cgit v1.2.3 From a419bfb7632095410adc3aecb1e863568f049add Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Tue, 17 Jan 2023 15:14:50 +0200 Subject: net/mlx5: Change define name for 0x100 lkey value Change define of 0x100 lkey value from MLX5_INVALID_LKEY to be MLX5_TERMINATE_SCATTER_LIST_LKEY as 0x100 is the value of terminate_scatter_list_mkey. Link: https://lore.kernel.org/r/3a116dc3fbae4cb6b76a63d27d418830b06ade0c.1673960981.git.leon@kernel.org Signed-off-by: Or Har-Toov Reviewed-by: Michael Guralnik Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/qp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 4657d5c54abe..df55fbb65717 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -36,7 +36,7 @@ #include #include -#define MLX5_INVALID_LKEY 0x100 +#define MLX5_TERMINATE_SCATTER_LIST_LKEY cpu_to_be32(0x100) /* UMR (3 WQE_BB's) + SIG (3 WQE_BB's) + PSV (mem) + PSV (wire) */ #define MLX5_SIG_WQE_SIZE (MLX5_SEND_WQE_BB * 8) #define MLX5_DIF_SIZE 8 -- cgit v1.2.3 From d88cbbb39b4db057feb1552de31f22c02a21b36f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 17 Feb 2023 10:29:10 +0100 Subject: blk-mq: Reorder fields in 'struct blk_mq_tag_set' Group some variables based on their sizes to reduce hole and avoid padding. On x86_64, this shrinks the size of 'struct blk_mq_tag_set' from 304 to 296 bytes. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/6f249f9b02a3490283ef0278096556de41aa0cf0.1676626130.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 779fba613bd0..dd5ce1137f04 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -473,6 +473,7 @@ enum hctx_type { /** * struct blk_mq_tag_set - tag set that can be shared between request queues + * @ops: Pointers to functions that implement block driver behavior. * @map: One or more ctx -> hctx mappings. One map exists for each * hardware queue type (enum hctx_type) that the driver wishes * to support. There are no restrictions on maps being of the @@ -480,7 +481,6 @@ enum hctx_type { * types. * @nr_maps: Number of elements in the @map array. A number in the range * [1, HCTX_MAX_TYPES]. - * @ops: Pointers to functions that implement block driver behavior. * @nr_hw_queues: Number of hardware queues supported by the block driver that * owns this data structure. * @queue_depth: Number of tags per hardware queue, reserved tags included. @@ -505,9 +505,9 @@ enum hctx_type { * (BLK_MQ_F_BLOCKING). */ struct blk_mq_tag_set { + const struct blk_mq_ops *ops; struct blk_mq_queue_map map[HCTX_MAX_TYPES]; unsigned int nr_maps; - const struct blk_mq_ops *ops; unsigned int nr_hw_queues; unsigned int queue_depth; unsigned int reserved_tags; -- cgit v1.2.3 From 9cbad37ce8122de32a1529e394b468bc101c9e7f Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 9 Feb 2023 15:35:01 -0600 Subject: of: Add of_property_present() helper Add an of_property_present() function similar to fwnode_property_present(). of_property_read_bool() could be used directly, but it is cleaner to not use it on non-boolean properties. Reviewed-by: Frank Rowand Tested-by: Frank Rowand Link: https://lore.kernel.org/all/20230215215547.691573-1-robh@kernel.org/ Signed-off-by: Rob Herring --- include/linux/of.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 8bb348666709..37afd04f36eb 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1155,7 +1155,8 @@ static inline int of_property_read_string_index(const struct device_node *np, * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * - * Search for a property in a device node. + * Search for a boolean property in a device node. Usage on non-boolean + * property types is deprecated. * * Return: true if the property exists false otherwise. */ @@ -1167,6 +1168,20 @@ static inline bool of_property_read_bool(const struct device_node *np, return prop ? true : false; } +/** + * of_property_present - Test if a property is present in a node + * @np: device node to search for the property. + * @propname: name of the property to be searched. + * + * Test for a property present in a device node. + * + * Return: true if the property exists false otherwise. + */ +static inline bool of_property_present(const struct device_node *np, const char *propname) +{ + return of_property_read_bool(np, propname); +} + /** * of_property_read_u8_array - Find and read an array of u8 from a property. * -- cgit v1.2.3 From 2455f0e124d317dd08d337a7550a78a224d4ba41 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Wed, 15 Feb 2023 15:33:45 -0700 Subject: tracing: Always use canonical ftrace path The canonical location for the tracefs filesystem is at /sys/kernel/tracing. But, from Documentation/trace/ftrace.rst: Before 4.1, all ftrace tracing control files were within the debugfs file system, which is typically located at /sys/kernel/debug/tracing. For backward compatibility, when mounting the debugfs file system, the tracefs file system will be automatically mounted at: /sys/kernel/debug/tracing Many comments and Kconfig help messages in the tracing code still refer to this older debugfs path, so let's update them to avoid confusion. Link: https://lore.kernel.org/linux-trace-kernel/20230215223350.2658616-2-zwisler@google.com Acked-by: Masami Hiramatsu (Google) Reviewed-by: Mukesh Ojha Signed-off-by: Ross Zwisler Signed-off-by: Steven Rostedt (Google) --- include/linux/kernel.h | 2 +- include/linux/tracepoint.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index fe6efb24d151..40bce7495af8 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -297,7 +297,7 @@ bool mac_pton(const char *s, u8 *mac); * * Use tracing_on/tracing_off when you want to quickly turn on or off * tracing. It simply enables or disables the recording of the trace events. - * This also corresponds to the user space /sys/kernel/debug/tracing/tracing_on + * This also corresponds to the user space /sys/kernel/tracing/tracing_on * file, which gives a means for the kernel and userspace to interact. * Place a tracing_off() in the kernel where you want tracing to end. * From user space, examine the trace, and then echo 1 > tracing_on diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 4b33b95eb8be..fa1004fcf810 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -471,7 +471,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * * This is how the trace record is structured and will * * be saved into the ring buffer. These are the fields * * that will be exposed to user-space in - * * /sys/kernel/debug/tracing/events/<*>/format. + * * /sys/kernel/tracing/events/<*>/format. * * * * The declared 'local variable' is called '__entry' * * @@ -531,7 +531,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * tracepoint callback (this is used by programmatic plugins and * can also by used by generic instrumentation like SystemTap), and * it is also used to expose a structured trace record in - * /sys/kernel/debug/tracing/events/. + * /sys/kernel/tracing/events/. * * A set of (un)registration functions can be passed to the variant * TRACE_EVENT_FN to perform any (un)registration work. -- cgit v1.2.3 From 66fb1d5df6ace316a4a6e2c31e13fc123ea2b644 Mon Sep 17 00:00:00 2001 From: Edward Srouji Date: Thu, 16 Feb 2023 11:13:45 +0200 Subject: IB/mlx5: Extend debug control for CC parameters This patch adds rtt_resp_dscp to the current debug controllability of congestion control (CC) parameters. rtt_resp_dscp can be read or written through debugfs. If set, its value overwrites the DSCP of the generated RTT response. Signed-off-by: Edward Srouji Reviewed-by: Maor Gottlieb Link: https://lore.kernel.org/r/1dcc3440ee53c688f19f579a051ded81a2aaa70a.1676538714.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 67cfac8fbe46..c63b92aa4c96 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2159,6 +2159,17 @@ struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits { u8 reserved_at_360[0x4a0]; }; +struct mlx5_ifc_cong_control_r_roce_general_bits { + u8 reserved_at_0[0x80]; + + u8 reserved_at_80[0x10]; + u8 rtt_resp_dscp_valid[0x1]; + u8 reserved_at_91[0x9]; + u8 rtt_resp_dscp[0x6]; + + u8 reserved_at_a0[0x760]; +}; + struct mlx5_ifc_cong_control_802_1qau_rp_bits { u8 reserved_at_0[0x80]; @@ -4304,6 +4315,7 @@ union mlx5_ifc_cong_control_roce_ecn_auto_bits { struct mlx5_ifc_cong_control_802_1qau_rp_bits cong_control_802_1qau_rp; struct mlx5_ifc_cong_control_r_roce_ecn_rp_bits cong_control_r_roce_ecn_rp; struct mlx5_ifc_cong_control_r_roce_ecn_np_bits cong_control_r_roce_ecn_np; + struct mlx5_ifc_cong_control_r_roce_general_bits cong_control_r_roce_general; u8 reserved_at_0[0x800]; }; -- cgit v1.2.3 From e1f19857f94be09f9526f180e64f20138bd4e394 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Wed, 7 Dec 2022 09:43:08 +0100 Subject: fs: namei: Allow follow_down() to uncover auto mounts This function is only used by NFSD to cross mount points. If a mount point is of type auto mount, follow_down() will not uncover it. Add LOOKUP_AUTOMOUNT to the lookup flags to have ->d_automount() called when NFSD walks down the mount tree. Signed-off-by: Richard Weinberger Reviewed-by: Ian Kent Reviewed-by: Jeff Layton Acked-by: Al Viro Signed-off-by: Chuck Lever --- include/linux/namei.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/namei.h b/include/linux/namei.h index 00fee52df842..c82e2ac65846 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -77,7 +77,7 @@ struct dentry *lookup_one_positive_unlocked(struct user_namespace *mnt_userns, struct dentry *base, int len); extern int follow_down_one(struct path *); -extern int follow_down(struct path *); +extern int follow_down(struct path *path, unsigned int flags); extern int follow_up(struct path *); extern struct dentry *lock_rename(struct dentry *, struct dentry *); -- cgit v1.2.3 From 846b5756d7632523b5bfce78c163aa883aa9d587 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 2 Jan 2023 12:05:43 -0500 Subject: SUNRPC: Add an XDR decoding helper for struct opaque_auth RFC 5531 defines the body of an RPC Call message like this: struct call_body { unsigned int rpcvers; unsigned int prog; unsigned int vers; unsigned int proc; opaque_auth cred; opaque_auth verf; /* procedure-specific parameters start here */ }; In the current server code, decoding a struct opaque_auth type is open-coded in several places, and is thus difficult to harden everywhere. Introduce a helper for decoding an opaque_auth within the context of a xdr_stream. This helper can be shared with all authentication flavor implemenations, even on the client-side. Done as part of hardening the server-side RPC header decoding paths. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index f84e2a1358e1..8b5c9d0cdcb5 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -346,6 +346,9 @@ ssize_t xdr_stream_decode_string(struct xdr_stream *xdr, char *str, size_t size); ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str, size_t maxlen, gfp_t gfp_flags); +ssize_t xdr_stream_decode_opaque_auth(struct xdr_stream *xdr, u32 *flavor, + void **body, unsigned int *body_len); + /** * xdr_align_size - Calculate padded size of an object * @n: Size of an object being XDR encoded (in bytes) -- cgit v1.2.3 From 6181b0c6432bf0807512e85e0c5863f7aca8e515 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 2 Jan 2023 12:05:56 -0500 Subject: SUNRPC: Convert svcauth_unix_accept() to use xdr_stream Done as part of hardening the server-side RPC header decoding path. Since the server-side of the Linux kernel SunRPC implementation ignores the contents of the Call's machinename field, there's no need for its RPC_AUTH_UNIX authenticator to reject names that are larger than UNX_MAXNODENAME. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/msg_prot.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 02117ed0fa2e..c4b0eb2b2f04 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -34,6 +34,11 @@ enum rpc_auth_flavors { RPC_AUTH_GSS_SPKMP = 390011, }; +/* Maximum size (in octets) of the machinename in an AUTH_UNIX + * credential (per RFC 5531 Appendix A) + */ +#define RPC_MAX_MACHINENAME (255) + /* Maximum size (in bytes) of an rpc credential or verifier */ #define RPC_MAX_AUTH_SIZE (400) -- cgit v1.2.3 From b68e4c5c32275e23d35badb7287faaa310c15ba0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 2 Jan 2023 12:06:54 -0500 Subject: SUNRPC: Convert unwrap_integ_data() to use xdr_stream Done as part of hardening the server-side RPC header decoding path. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 8b5c9d0cdcb5..accfe8d6e283 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -247,6 +247,7 @@ extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, size_t nbytes); extern void __xdr_commit_encode(struct xdr_stream *xdr); extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); +extern void xdr_truncate_decode(struct xdr_stream *xdr, size_t len); extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); -- cgit v1.2.3 From 42140718ea26c47fb06c679451a6aa6703545136 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 2 Jan 2023 12:07:07 -0500 Subject: SUNRPC: Convert unwrap_priv_data() to use xdr_stream Done as part of hardening the server-side RPC header decoding path. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index accfe8d6e283..884df67009f4 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -188,7 +188,6 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) /* * XDR buffer helper functions */ -extern void xdr_shift_buf(struct xdr_buf *, size_t); extern void xdr_buf_from_iov(const struct kvec *, struct xdr_buf *); extern int xdr_buf_subsegment(const struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); extern void xdr_buf_trim(struct xdr_buf *, unsigned int); -- cgit v1.2.3 From df24ac7a2e3a9d0bc68f1756a880e50bfe4b4522 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Sun, 18 Dec 2022 16:55:53 -0800 Subject: NFSD: enhance inter-server copy cleanup Currently nfsd4_setup_inter_ssc returns the vfsmount of the source server's export when the mount completes. After the copy is done nfsd4_cleanup_inter_ssc is called with the vfsmount of the source server and it searches nfsd_ssc_mount_list for a matching entry to do the clean up. The problems with this approach are (1) the need to search the nfsd_ssc_mount_list and (2) the code has to handle the case where the matching entry is not found which looks ugly. The enhancement is instead of nfsd4_setup_inter_ssc returning the vfsmount, it returns the nfsd4_ssc_umount_item which has the vfsmount embedded in it. When nfsd4_cleanup_inter_ssc is called it's passed with the nfsd4_ssc_umount_item directly to do the clean up so no searching is needed and there is no need to handle the 'not found' case. Signed-off-by: Dai Ngo Signed-off-by: Chuck Lever [ cel: adjusted whitespace and variable/function names ] Reviewed-by: Olga Kornievskaia --- include/linux/nfs_ssc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_ssc.h b/include/linux/nfs_ssc.h index 75843c00f326..22265b1ff080 100644 --- a/include/linux/nfs_ssc.h +++ b/include/linux/nfs_ssc.h @@ -53,6 +53,7 @@ static inline void nfs42_ssc_close(struct file *filep) if (nfs_ssc_client_tbl.ssc_nfs4_ops) (*nfs_ssc_client_tbl.ssc_nfs4_ops->sco_close)(filep); } +#endif struct nfsd4_ssc_umount_item { struct list_head nsui_list; @@ -66,7 +67,6 @@ struct nfsd4_ssc_umount_item { struct vfsmount *nsui_vfsmount; char nsui_ipaddr[RPC_MAX_ADDRBUFLEN + 1]; }; -#endif /* * NFS_FS -- cgit v1.2.3 From 7b402c8db66414abb4001d0c2676553baa619a2b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 8 Jan 2023 11:29:38 -0500 Subject: SUNRPC: Add XDR encoding helper for opaque_auth RFC 5531 defines an MSG_ACCEPTED Reply message like this: struct accepted_reply { opaque_auth verf; union switch (accept_stat stat) { case SUCCESS: ... In the current server code, struct opaque_auth encoding is open- coded. Introduce a helper that encodes an opaque_auth data item within the context of a xdr_stream. Done as part of hardening the server-side RPC header decoding and encoding paths. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 884df67009f4..f3b6eb9accd7 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -348,6 +348,8 @@ ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str, size_t maxlen, gfp_t gfp_flags); ssize_t xdr_stream_decode_opaque_auth(struct xdr_stream *xdr, u32 *flavor, void **body, unsigned int *body_len); +ssize_t xdr_stream_encode_opaque_auth(struct xdr_stream *xdr, u32 flavor, + void *body, unsigned int body_len); /** * xdr_align_size - Calculate padded size of an object -- cgit v1.2.3 From 8dd41d70f331c342842e8d349d7a1f73b0ba7ccd Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 8 Jan 2023 11:29:44 -0500 Subject: SUNRPC: Push svcxdr_init_encode() into svc_process_common() Now that all vs_dispatch functions invoke svcxdr_init_encode(), it is common code and can be pushed down into the generic RPC server. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index f3b6eb9accd7..72014c9216fc 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -474,6 +474,27 @@ xdr_stream_encode_u32(struct xdr_stream *xdr, __u32 n) return len; } +/** + * xdr_stream_encode_be32 - Encode a big-endian 32-bit integer + * @xdr: pointer to xdr_stream + * @n: integer to encode + * + * Return values: + * On success, returns length in bytes of XDR buffer consumed + * %-EMSGSIZE on XDR buffer overflow + */ +static inline ssize_t +xdr_stream_encode_be32(struct xdr_stream *xdr, __be32 n) +{ + const size_t len = sizeof(n); + __be32 *p = xdr_reserve_space(xdr, len); + + if (unlikely(!p)) + return -EMSGSIZE; + *p = n; + return len; +} + /** * xdr_stream_encode_u64 - Encode a 64-bit integer * @xdr: pointer to xdr_stream -- cgit v1.2.3 From 7bb0dfb2234725ba085cacfd35f34c187def92b2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 8 Jan 2023 11:30:15 -0500 Subject: SUNRPC: Convert unwrap data paths to use xdr_stream for replies We're now moving svcxdr_init_encode() to /before/ the flavor's ->accept method has set rq_auth_slack. Add a helper that can set rq_auth_slack /after/ svcxdr_init_encode() has been called. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index ed722dd33b46..dd9f68acd9f1 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -577,12 +577,34 @@ static inline void svcxdr_init_encode(struct svc_rqst *rqstp) xdr->buf = buf; xdr->iov = resv; xdr->p = resv->iov_base + resv->iov_len; - xdr->end = resv->iov_base + PAGE_SIZE - rqstp->rq_auth_slack; + xdr->end = resv->iov_base + PAGE_SIZE; buf->len = resv->iov_len; xdr->page_ptr = buf->pages - 1; buf->buflen = PAGE_SIZE * (rqstp->rq_page_end - buf->pages); - buf->buflen -= rqstp->rq_auth_slack; xdr->rqst = NULL; } +/** + * svcxdr_set_auth_slack - + * @rqstp: RPC transaction + * @slack: buffer space to reserve for the transaction's security flavor + * + * Set the request's slack space requirement, and set aside that much + * space in the rqstp's rq_res.head for use when the auth wraps the Reply. + */ +static inline void svcxdr_set_auth_slack(struct svc_rqst *rqstp, int slack) +{ + struct xdr_stream *xdr = &rqstp->rq_res_stream; + struct xdr_buf *buf = &rqstp->rq_res; + struct kvec *resv = buf->head; + + rqstp->rq_auth_slack = slack; + + xdr->end -= XDR_QUADLEN(slack); + buf->buflen -= rqstp->rq_auth_slack; + + WARN_ON(xdr->iov != resv); + WARN_ON(xdr->p > xdr->end); +} + #endif /* SUNRPC_SVC_H */ -- cgit v1.2.3 From 5df25676de2e13b2cb67140fd43bcbfdd60ef0df Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 8 Jan 2023 11:30:52 -0500 Subject: SUNRPC: Remove no-longer-used helper functions The svc_get/put helpers are no longer used. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 66 +--------------------------------------------- 1 file changed, 1 insertion(+), 65 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index dd9f68acd9f1..32eb98e621c3 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -193,40 +193,6 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp); #define RPCSVC_MAXPAGES ((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE \ + 2 + 1) -static inline u32 svc_getnl(struct kvec *iov) -{ - __be32 val, *vp; - vp = iov->iov_base; - val = *vp++; - iov->iov_base = (void*)vp; - iov->iov_len -= sizeof(__be32); - return ntohl(val); -} - -static inline void svc_putnl(struct kvec *iov, u32 val) -{ - __be32 *vp = iov->iov_base + iov->iov_len; - *vp = htonl(val); - iov->iov_len += sizeof(__be32); -} - -static inline __be32 svc_getu32(struct kvec *iov) -{ - __be32 val, *vp; - vp = iov->iov_base; - val = *vp++; - iov->iov_base = (void*)vp; - iov->iov_len -= sizeof(__be32); - return val; -} - -static inline void svc_putu32(struct kvec *iov, __be32 val) -{ - __be32 *vp = iov->iov_base + iov->iov_len; - *vp = val; - iov->iov_len += sizeof(__be32); -} - /* * The context of a single thread, including the request currently being * processed. @@ -345,29 +311,6 @@ static inline struct sockaddr *svc_daddr(const struct svc_rqst *rqst) return (struct sockaddr *) &rqst->rq_daddr; } -/* - * Check buffer bounds after decoding arguments - */ -static inline int -xdr_argsize_check(struct svc_rqst *rqstp, __be32 *p) -{ - char *cp = (char *)p; - struct kvec *vec = &rqstp->rq_arg.head[0]; - return cp >= (char*)vec->iov_base - && cp <= (char*)vec->iov_base + vec->iov_len; -} - -static inline int -xdr_ressize_check(struct svc_rqst *rqstp, __be32 *p) -{ - struct kvec *vec = &rqstp->rq_res.head[0]; - char *cp = (char*)p; - - vec->iov_len = cp - (char*)vec->iov_base; - - return vec->iov_len <= PAGE_SIZE; -} - static inline void svc_free_res_pages(struct svc_rqst *rqstp) { while (rqstp->rq_next_page != rqstp->rq_respages) { @@ -540,9 +483,6 @@ static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space) * svcxdr_init_decode - Prepare an xdr_stream for Call decoding * @rqstp: controlling server RPC transaction context * - * This function currently assumes the RPC header in rq_arg has - * already been decoded. Upon return, xdr->p points to the - * location of the upper layer header. */ static inline void svcxdr_init_decode(struct svc_rqst *rqstp) { @@ -550,11 +490,7 @@ static inline void svcxdr_init_decode(struct svc_rqst *rqstp) struct xdr_buf *buf = &rqstp->rq_arg; struct kvec *argv = buf->head; - /* - * svc_getnl() and friends do not keep the xdr_buf's ::len - * field up to date. Refresh that field before initializing - * the argument decoding stream. - */ + WARN_ON(buf->len != buf->head->iov_len + buf->page_len + buf->tail->iov_len); buf->len = buf->head->iov_len + buf->page_len + buf->tail->iov_len; xdr_init_decode(xdr, buf, argv->iov_base, NULL); -- cgit v1.2.3 From cee4db19452467eef8ab93c6eb6a3a84d11d25d7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 8 Jan 2023 11:30:59 -0500 Subject: SUNRPC: Refactor RPC server dispatch method Currently, svcauth_gss_accept() pre-reserves response buffer space for the RPC payload length and GSS sequence number before returning to the dispatcher, which then adds the header's accept_stat field. The problem is the accept_stat field is supposed to go before the length and seq_num fields. So svcauth_gss_release() has to relocate the accept_stat value (see svcauth_gss_prepare_to_wrap()). To enable these fields to be added to the response buffer in the correct (final) order, the pointer to the accept_stat has to be made available to svcauth_gss_accept() so that it can set it before reserving space for the length and seq_num fields. As a first step, move the pointer to the location of the accept_stat field into struct svc_rqst. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 32eb98e621c3..f40a90ca5de6 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -251,6 +251,7 @@ struct svc_rqst { void * rq_argp; /* decoded arguments */ void * rq_resp; /* xdr'd results */ + __be32 *rq_accept_statp; void * rq_auth_data; /* flavor-specific data */ __be32 rq_auth_stat; /* authentication status */ int rq_auth_slack; /* extra space xdr code @@ -337,7 +338,7 @@ struct svc_deferred_req { struct svc_process_info { union { - int (*dispatch)(struct svc_rqst *, __be32 *); + int (*dispatch)(struct svc_rqst *rqstp); struct { unsigned int lovers; unsigned int hivers; @@ -389,7 +390,7 @@ struct svc_version { bool vs_need_cong_ctrl; /* Dispatch function */ - int (*vs_dispatch)(struct svc_rqst *, __be32 *); + int (*vs_dispatch)(struct svc_rqst *rqstp); }; /* -- cgit v1.2.3 From 4bcf0343e8a69eb22f7e83bfa7cfce32a28c9d95 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 8 Jan 2023 11:31:05 -0500 Subject: SUNRPC: Set rq_accept_statp inside ->accept methods To navigate around the space that svcauth_gss_accept() reserves for the RPC payload body length and sequence number fields, svcauth_gss_release() does a little dance with the reply's accept_stat, moving the accept_stat value in the response buffer down by two words. Instead, let's have the ->accept() methods each set the proper final location of the accept_stat to avoid having to move things. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index f40a90ca5de6..392d2d2620fa 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -544,4 +544,23 @@ static inline void svcxdr_set_auth_slack(struct svc_rqst *rqstp, int slack) WARN_ON(xdr->p > xdr->end); } +/** + * svcxdr_set_accept_stat - Reserve space for the accept_stat field + * @rqstp: RPC transaction context + * + * Return values: + * %true: Success + * %false: No response buffer space was available + */ +static inline bool svcxdr_set_accept_stat(struct svc_rqst *rqstp) +{ + struct xdr_stream *xdr = &rqstp->rq_res_stream; + + rqstp->rq_accept_statp = xdr_reserve_space(xdr, XDR_UNIT); + if (unlikely(!rqstp->rq_accept_statp)) + return false; + *rqstp->rq_accept_statp = rpc_success; + return true; +} + #endif /* SUNRPC_SVC_H */ -- cgit v1.2.3 From 65ba3d2425bf51165b6e88509c632bd15d12883d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 10 Jan 2023 10:31:54 -0500 Subject: SUNRPC: Use per-CPU counters to tally server RPC counts - Improves counting accuracy - Reduces cross-CPU memory traffic Signed-off-by: Chuck Lever --- include/linux/lockd/lockd.h | 4 ++-- include/linux/sunrpc/svc.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 70ce419e2709..84de6b7c9948 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -196,9 +196,9 @@ struct nlm_block { * Global variables */ extern const struct rpc_program nlm_program; -extern const struct svc_procedure nlmsvc_procedures[]; +extern const struct svc_procedure nlmsvc_procedures[24]; #ifdef CONFIG_LOCKD_V4 -extern const struct svc_procedure nlmsvc_procedures4[]; +extern const struct svc_procedure nlmsvc_procedures4[24]; #endif extern int nlmsvc_grace_period; extern unsigned long nlmsvc_timeout; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 392d2d2620fa..1b078c9a2d60 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -377,7 +377,7 @@ struct svc_version { u32 vs_vers; /* version number */ u32 vs_nproc; /* number of procedures */ const struct svc_procedure *vs_proc; /* per-procedure info */ - unsigned int *vs_count; /* call counts */ + unsigned long __percpu *vs_count; /* call counts */ u32 vs_xdrsize; /* xdrsize needed for this version */ /* Don't register with rpcbind */ -- cgit v1.2.3 From ccf08bed6e7a80519569456edd2ea21b7b1701c6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 10 Jan 2023 10:32:00 -0500 Subject: SUNRPC: Replace pool stats with per-CPU variables Eliminate the use of bus-locked operations in svc_xprt_enqueue(), which is a hot path. Replace them with per-cpu variables to reduce cross-CPU memory bus traffic. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 1b078c9a2d60..877891536c2f 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -21,14 +21,6 @@ #include #include -/* statistics for svc_pool structures */ -struct svc_pool_stats { - atomic_long_t packets; - unsigned long sockets_queued; - atomic_long_t threads_woken; - atomic_long_t threads_timedout; -}; - /* * * RPC service thread pool. @@ -45,7 +37,12 @@ struct svc_pool { struct list_head sp_sockets; /* pending sockets */ unsigned int sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ - struct svc_pool_stats sp_stats; /* statistics on pool operation */ + + /* statistics on pool operation */ + struct percpu_counter sp_sockets_queued; + struct percpu_counter sp_threads_woken; + struct percpu_counter sp_threads_timedout; + #define SP_TASK_PENDING (0) /* still work to do even if no * xprt is queued. */ #define SP_CONGESTED (1) -- cgit v1.2.3 From 97648b94bd9dbb987fe7e4067deecdb47d4fd7e7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:20:22 -0500 Subject: SUNRPC: Add header ifdefs to linux/sunrpc/gss_krb5.h Standard convention: Ensure the contents of the header are included only once per source file. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 91f43d86879d..0135139ddf20 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -1,6 +1,4 @@ /* - * linux/include/linux/sunrpc/gss_krb5_types.h - * * Adapted from MIT Kerberos 5-1.2.1 lib/include/krb5.h, * lib/gssapi/krb5/gssapiP_krb5.h, and others * @@ -36,6 +34,9 @@ * */ +#ifndef _LINUX_SUNRPC_GSS_KRB5_H +#define _LINUX_SUNRPC_GSS_KRB5_H + #include #include #include @@ -316,3 +317,5 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len, void gss_krb5_make_confounder(char *p, u32 conflen); + +#endif /* _LINUX_SUNRPC_GSS_KRB5_H */ -- cgit v1.2.3 From f03640a1a9782f4bf7c1db63e2e6a9598c6d2c6e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:20:28 -0500 Subject: SUNRPC: Remove .blocksize field from struct gss_krb5_enctype It is not clear from documenting comments, specifications, or code usage what value the gss_krb5_enctype.blocksize field is supposed to store. The "encryption blocksize" depends only on the cipher being used, so that value can be derived where it's needed instead of stored as a constant. RFC 3961 Section 5.2 says: > cipher block size, c > This is the block size of the block cipher underlying the > encryption and decryption functions indicated above, used for key > derivation and for the size of the message confounder and initial > vector. (If a block cipher is not in use, some comparable > parameter should be determined.) It must be at least 5 octets. > > This is not actually an independent parameter; rather, it is a > property of the functions E and D. It is listed here to clarify > the distinction between it and the message block size, m. In the Linux kernel's implemenation of the SunRPC RPCSEC GSS Kerberos 5 mechanism, the cipher block size, which is dependent on the encryption and decryption transforms, is used only in krb5_derive_key(), so it is straightforward to replace it. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 0135139ddf20..9a833825b55b 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -64,7 +64,6 @@ struct gss_krb5_enctype { const char *cksum_name; /* crypto checksum name */ const u16 signalg; /* signing algorithm */ const u16 sealalg; /* sealing algorithm */ - const u32 blocksize; /* encryption blocksize */ const u32 conflen; /* confounder length (normally the same as the blocksize) */ -- cgit v1.2.3 From 4be416a5f2803d421c950cc48e8e0c1eaaa8c773 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:20:35 -0500 Subject: SUNRPC: Remove .conflen field from struct gss_krb5_enctype Now that arcfour-hmac is gone, the confounder length is again the same as the cipher blocksize for every implemented enctype. The gss_krb5_enctype::conflen field is no longer necessary. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 9a833825b55b..51860e3a0216 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -64,9 +64,6 @@ struct gss_krb5_enctype { const char *cksum_name; /* crypto checksum name */ const u16 signalg; /* signing algorithm */ const u16 sealalg; /* sealing algorithm */ - const u32 conflen; /* confounder length - (normally the same as - the blocksize) */ const u32 cksumlength; /* checksum length */ const u32 keyed_cksum; /* is it a keyed cksum? */ const u32 keybytes; /* raw key len, in bytes */ -- cgit v1.2.3 From 7f675ca7757bfeb70e19d187dc3be44deb836da8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:20:41 -0500 Subject: SUNRPC: Improve Kerberos confounder generation Other common Kerberos implementations use a fully random confounder for encryption. The reason for this is explained in the new comment added by this patch. The current get_random_bytes() implementation does not exhaust system entropy. Since confounder generation is part of Kerberos itself rather than the GSS-API Kerberos mechanism, the function is renamed and moved. Note that light top-down analysis shows that the SHA-1 transform is by far the most CPU-intensive part of encryption. Thus we do not expect this change to result in a significant performance impact. However, eventually it might be necessary to generate an independent stream of confounders for each Kerberos context to help improve I/O parallelism. Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 51860e3a0216..f0fac1e69731 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -311,7 +311,4 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len, struct xdr_buf *buf, u32 *plainoffset, u32 *plainlen); -void -gss_krb5_make_confounder(char *p, u32 conflen); - #endif /* _LINUX_SUNRPC_GSS_KRB5_H */ -- cgit v1.2.3 From 7989a4f4ab5437ec82b0b59984594f848f12b36a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:20:54 -0500 Subject: SUNRPC: Refactor set-up for aux_cipher Hoist the name of the aux_cipher into struct gss_krb5_enctype to prepare for obscuring the encryption keys just after they are derived. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index f0fac1e69731..34d54714c6a3 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -61,6 +61,7 @@ struct gss_krb5_enctype { const u32 ctype; /* checksum type */ const char *name; /* "friendly" name */ const char *encrypt_name; /* crypto encrypt name */ + const char *aux_cipher; /* aux encrypt cipher name */ const char *cksum_name; /* crypto checksum name */ const u16 signalg; /* signing algorithm */ const u16 sealalg; /* sealing algorithm */ -- cgit v1.2.3 From 9f0b49f933ab1ec5e7140a43eec72b0c5181cabf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:21:01 -0500 Subject: SUNRPC: Obscure Kerberos encryption keys The encryption subkeys are not used after the cipher transforms have been allocated and keyed. There is no need to retain them in struct krb5_ctx. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 34d54714c6a3..46eaa2ee9c21 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -110,8 +110,6 @@ struct krb5_ctx { struct xdr_netobj mech_used; u8 initiator_sign[GSS_KRB5_MAX_KEYLEN]; u8 acceptor_sign[GSS_KRB5_MAX_KEYLEN]; - u8 initiator_seal[GSS_KRB5_MAX_KEYLEN]; - u8 acceptor_seal[GSS_KRB5_MAX_KEYLEN]; u8 initiator_integ[GSS_KRB5_MAX_KEYLEN]; u8 acceptor_integ[GSS_KRB5_MAX_KEYLEN]; }; -- cgit v1.2.3 From 2dbe0cac3cd6d747579b0b347145326eddfd4e5c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:21:07 -0500 Subject: SUNRPC: Obscure Kerberos signing keys There's no need to keep the signing keys around if we instead allocate and key an ahash and keep that. This not only enables the subkeys to be destroyed immediately after deriving them, but it makes the Kerberos signing code path more efficient. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 46eaa2ee9c21..9d897f1ac85a 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -102,14 +102,14 @@ struct krb5_ctx { struct crypto_sync_skcipher *initiator_enc; struct crypto_sync_skcipher *acceptor_enc_aux; struct crypto_sync_skcipher *initiator_enc_aux; + struct crypto_ahash *acceptor_sign; + struct crypto_ahash *initiator_sign; u8 Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */ u8 cksum[GSS_KRB5_MAX_KEYLEN]; atomic_t seq_send; atomic64_t seq_send64; time64_t endtime; struct xdr_netobj mech_used; - u8 initiator_sign[GSS_KRB5_MAX_KEYLEN]; - u8 acceptor_sign[GSS_KRB5_MAX_KEYLEN]; u8 initiator_integ[GSS_KRB5_MAX_KEYLEN]; u8 acceptor_integ[GSS_KRB5_MAX_KEYLEN]; }; @@ -252,7 +252,6 @@ u32 gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset, int len, struct xdr_buf *buf); - u32 krb5_encrypt(struct crypto_sync_skcipher *key, void *iv, void *in, void *out, int length); -- cgit v1.2.3 From 8270dbfcebea5b68037a84ad1710e2cfa499b82f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:21:13 -0500 Subject: SUNRPC: Obscure Kerberos integrity keys There's no need to keep the integrity keys around if we instead allocate and key a pair of ahashes and keep those. This not only enables the subkeys to be destroyed immediately after deriving them, but it makes the Kerberos integrity code path more efficient. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 9d897f1ac85a..85e65232bb61 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -104,14 +104,14 @@ struct krb5_ctx { struct crypto_sync_skcipher *initiator_enc_aux; struct crypto_ahash *acceptor_sign; struct crypto_ahash *initiator_sign; + struct crypto_ahash *initiator_integ; + struct crypto_ahash *acceptor_integ; u8 Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */ u8 cksum[GSS_KRB5_MAX_KEYLEN]; atomic_t seq_send; atomic64_t seq_send64; time64_t endtime; struct xdr_netobj mech_used; - u8 initiator_integ[GSS_KRB5_MAX_KEYLEN]; - u8 acceptor_integ[GSS_KRB5_MAX_KEYLEN]; }; /* The length of the Kerberos GSS token header */ @@ -233,11 +233,6 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, struct xdr_buf *body, int body_offset, u8 *cksumkey, unsigned int usage, struct xdr_netobj *cksumout); -u32 -make_checksum_v2(struct krb5_ctx *, char *header, int hdrlen, - struct xdr_buf *body, int body_offset, u8 *key, - unsigned int usage, struct xdr_netobj *cksum); - u32 gss_get_mic_kerberos(struct gss_ctx *, struct xdr_buf *, struct xdr_netobj *); -- cgit v1.2.3 From e01b2c79f4af1298b961116aba3e64367fe73286 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:21:20 -0500 Subject: SUNRPC: Refactor the GSS-API Per Message calls in the Kerberos mechanism Replace a number of switches on encryption type so that all of them don't have to be modified when adding or removing support for an enctype. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 85e65232bb61..f1201478fdd5 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -84,6 +84,15 @@ struct gss_krb5_enctype { u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset, u32 len, struct xdr_buf *buf, u32 *headskip, u32 *tailskip); /* v2 decryption function */ + u32 (*get_mic)(struct krb5_ctx *kctx, struct xdr_buf *text, + struct xdr_netobj *token); + u32 (*verify_mic)(struct krb5_ctx *kctx, struct xdr_buf *message_buffer, + struct xdr_netobj *read_token); + u32 (*wrap)(struct krb5_ctx *kctx, int offset, + struct xdr_buf *buf, struct page **pages); + u32 (*unwrap)(struct krb5_ctx *kctx, int offset, int len, + struct xdr_buf *buf, unsigned int *slack, + unsigned int *align); }; /* krb5_ctx flags definitions */ @@ -233,20 +242,6 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, struct xdr_buf *body, int body_offset, u8 *cksumkey, unsigned int usage, struct xdr_netobj *cksumout); -u32 gss_get_mic_kerberos(struct gss_ctx *, struct xdr_buf *, - struct xdr_netobj *); - -u32 gss_verify_mic_kerberos(struct gss_ctx *, struct xdr_buf *, - struct xdr_netobj *); - -u32 -gss_wrap_kerberos(struct gss_ctx *ctx_id, int offset, - struct xdr_buf *outbuf, struct page **pages); - -u32 -gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset, int len, - struct xdr_buf *buf); - u32 krb5_encrypt(struct crypto_sync_skcipher *key, void *iv, void *in, void *out, int length); -- cgit v1.2.3 From 279a67cdd491a53028eb0b52508383098c6d992b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:21:26 -0500 Subject: SUNRPC: Remove another switch on ctx->enctype Replace another switch on encryption type so that it does not have to be modified when adding or removing support for an enctype. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index f1201478fdd5..68ae0c3d4cf7 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -75,6 +75,7 @@ struct gss_krb5_enctype { u32 (*decrypt) (struct crypto_sync_skcipher *tfm, void *iv, void *in, void *out, int length); /* decryption function */ + int (*import_ctx)(struct krb5_ctx *ctx, gfp_t gfp_mask); u32 (*mk_key) (const struct gss_krb5_enctype *gk5e, struct xdr_netobj *in, struct xdr_netobj *out); /* complete key generation */ -- cgit v1.2.3 From 17781b2ce41a8915163d7cdada021f809ccd49f0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:21:45 -0500 Subject: SUNRPC: Replace KRB5_SUPPORTED_ENCTYPES macro Now that all consumers of the KRB5_SUPPORTED_ENCTYPES macro are within the SunRPC layer, the macro can be replaced with something private and more flexible. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5_enctypes.h | 41 -------------------------------- 1 file changed, 41 deletions(-) delete mode 100644 include/linux/sunrpc/gss_krb5_enctypes.h (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5_enctypes.h b/include/linux/sunrpc/gss_krb5_enctypes.h deleted file mode 100644 index 87eea679d750..000000000000 --- a/include/linux/sunrpc/gss_krb5_enctypes.h +++ /dev/null @@ -1,41 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Define the string that exports the set of kernel-supported - * Kerberos enctypes. This list is sent via upcall to gssd, and - * is also exposed via the nfsd /proc API. The consumers generally - * treat this as an ordered list, where the first item in the list - * is the most preferred. - */ - -#ifndef _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H -#define _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H - -#ifdef CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES - -/* - * NB: This list includes DES3_CBC_SHA1, which was deprecated by RFC 8429. - * - * ENCTYPE_AES256_CTS_HMAC_SHA1_96 - * ENCTYPE_AES128_CTS_HMAC_SHA1_96 - * ENCTYPE_DES3_CBC_SHA1 - */ -#define KRB5_SUPPORTED_ENCTYPES "18,17,16" - -#else /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */ - -/* - * NB: This list includes encryption types that were deprecated - * by RFC 8429 and RFC 6649. - * - * ENCTYPE_AES256_CTS_HMAC_SHA1_96 - * ENCTYPE_AES128_CTS_HMAC_SHA1_96 - * ENCTYPE_DES3_CBC_SHA1 - * ENCTYPE_DES_CBC_MD5 - * ENCTYPE_DES_CBC_CRC - * ENCTYPE_DES_CBC_MD4 - */ -#define KRB5_SUPPORTED_ENCTYPES "18,17,16,3,1,2" - -#endif /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */ - -#endif /* _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H */ -- cgit v1.2.3 From d50b8152c992ac88c5f1f0cc8ade6ee0aa0a3704 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:21:58 -0500 Subject: SUNRPC: Remove ->encrypt and ->decrypt methods from struct gss_krb5_enctype Clean up: ->encrypt is set to only one value. Replace the two remaining call sites with direct calls to krb5_encrypt(). There have never been any call sites for the ->decrypt() method. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 68ae0c3d4cf7..a0646df12beb 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -69,12 +69,6 @@ struct gss_krb5_enctype { const u32 keyed_cksum; /* is it a keyed cksum? */ const u32 keybytes; /* raw key len, in bytes */ const u32 keylength; /* final key len, in bytes */ - u32 (*encrypt) (struct crypto_sync_skcipher *tfm, - void *iv, void *in, void *out, - int length); /* encryption function */ - u32 (*decrypt) (struct crypto_sync_skcipher *tfm, - void *iv, void *in, void *out, - int length); /* decryption function */ int (*import_ctx)(struct krb5_ctx *ctx, gfp_t gfp_mask); u32 (*mk_key) (const struct gss_krb5_enctype *gk5e, struct xdr_netobj *in, @@ -243,14 +237,6 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, struct xdr_buf *body, int body_offset, u8 *cksumkey, unsigned int usage, struct xdr_netobj *cksumout); -u32 -krb5_encrypt(struct crypto_sync_skcipher *key, - void *iv, void *in, void *out, int length); - -u32 -krb5_decrypt(struct crypto_sync_skcipher *key, - void *iv, void *in, void *out, int length); - int gss_encrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *outbuf, int offset, struct page **pages); -- cgit v1.2.3 From ae6ad5d0b7901b301234143e93624417ac9fd9ef Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:22:04 -0500 Subject: SUNRPC: Rename .encrypt_v2 and .decrypt_v2 methods Clean up: there is now only one encrypt and only one decrypt method, thus there is no longer a need for the v2-suffixed method names. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index a0646df12beb..85fce36c242d 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -73,12 +73,10 @@ struct gss_krb5_enctype { u32 (*mk_key) (const struct gss_krb5_enctype *gk5e, struct xdr_netobj *in, struct xdr_netobj *out); /* complete key generation */ - u32 (*encrypt_v2) (struct krb5_ctx *kctx, u32 offset, - struct xdr_buf *buf, - struct page **pages); /* v2 encryption function */ - u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset, u32 len, - struct xdr_buf *buf, u32 *headskip, - u32 *tailskip); /* v2 decryption function */ + u32 (*encrypt)(struct krb5_ctx *kctx, u32 offset, + struct xdr_buf *buf, struct page **pages); + u32 (*decrypt)(struct krb5_ctx *kctx, u32 offset, u32 len, + struct xdr_buf *buf, u32 *headskip, u32 *tailskip); u32 (*get_mic)(struct krb5_ctx *kctx, struct xdr_buf *text, struct xdr_netobj *token); u32 (*verify_mic)(struct krb5_ctx *kctx, struct xdr_buf *message_buffer, @@ -276,14 +274,4 @@ gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e, struct xdr_netobj *randombits, struct xdr_netobj *key); -u32 -gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, - struct xdr_buf *buf, - struct page **pages); - -u32 -gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len, - struct xdr_buf *buf, u32 *plainoffset, - u32 *plainlen); - #endif /* _LINUX_SUNRPC_GSS_KRB5_H */ -- cgit v1.2.3 From 2691a27d9b3e6a48adeb87a9dcf4e8a0ca84a26e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:22:11 -0500 Subject: SUNRPC: Hoist KDF into struct gss_krb5_enctype Each Kerberos enctype can have a different KDF. Refactor the key derivation path to support different KDFs for the enctypes introduced in subsequent patches. In particular, expose the key derivation function in struct gss_krb5_enctype instead of the enctype's preferred random-to-key function. The latter is usually the identity function and is only ever called during key derivation, so have each KDF call it directly. A couple of extra clean-ups: - Deduplicate the set_cdata() helper - Have ->derive_key return negative errnos, in accordance with usual kernel coding conventions This patch is a little bigger than I'd like, but these are all mechanical changes and they are all to the same areas of code. No behavior change is intended. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 85fce36c242d..04addef79177 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -70,9 +70,11 @@ struct gss_krb5_enctype { const u32 keybytes; /* raw key len, in bytes */ const u32 keylength; /* final key len, in bytes */ int (*import_ctx)(struct krb5_ctx *ctx, gfp_t gfp_mask); - u32 (*mk_key) (const struct gss_krb5_enctype *gk5e, - struct xdr_netobj *in, - struct xdr_netobj *out); /* complete key generation */ + int (*derive_key)(const struct gss_krb5_enctype *gk5e, + const struct xdr_netobj *in, + struct xdr_netobj *out, + const struct xdr_netobj *label, + gfp_t gfp_mask); u32 (*encrypt)(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, struct page **pages); u32 (*decrypt)(struct krb5_ctx *kctx, u32 offset, u32 len, @@ -257,21 +259,4 @@ krb5_get_seq_num(struct krb5_ctx *kctx, int xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen); -u32 -krb5_derive_key(const struct gss_krb5_enctype *gk5e, - const struct xdr_netobj *inkey, - struct xdr_netobj *outkey, - const struct xdr_netobj *in_constant, - gfp_t gfp_mask); - -u32 -gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e, - struct xdr_netobj *randombits, - struct xdr_netobj *key); - -u32 -gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e, - struct xdr_netobj *randombits, - struct xdr_netobj *key); - #endif /* _LINUX_SUNRPC_GSS_KRB5_H */ -- cgit v1.2.3 From af664fc9023e69d1a90800c0f815c296bf727e18 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:22:30 -0500 Subject: SUNRPC: Add new subkey length fields The aes256-cts-hmac-sha384-192 enctype specifies the length of its checksum and integrity subkeys as 192 bits, but the length of its encryption subkey (Ke) as 256 bits. Add new fields to struct gss_krb5_enctype that specify the key lengths individually, and where needed, use the correct new field instead of ->keylength. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 04addef79177..3e97d2a7c87d 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -42,10 +42,16 @@ #include #include +/* + * The RFCs often specify payload lengths in bits. This helper + * converts a specified bit-length to the number of octets/bytes. + */ +#define BITS2OCTETS(x) ((x) / 8) + /* Length of constant used in key derivation */ #define GSS_KRB5_K5CLENGTH (5) -/* Maximum key length (in bytes) for the supported crypto algorithms*/ +/* Maximum key length (in bytes) for the supported crypto algorithms */ #define GSS_KRB5_MAX_KEYLEN (32) /* Maximum checksum function output for the supported crypto algorithms */ @@ -68,7 +74,11 @@ struct gss_krb5_enctype { const u32 cksumlength; /* checksum length */ const u32 keyed_cksum; /* is it a keyed cksum? */ const u32 keybytes; /* raw key len, in bytes */ - const u32 keylength; /* final key len, in bytes */ + const u32 keylength; /* protocol key length, in octets */ + const u32 Kc_length; /* checksum subkey length, in octets */ + const u32 Ke_length; /* encryption subkey length, in octets */ + const u32 Ki_length; /* integrity subkey length, in octets */ + int (*import_ctx)(struct krb5_ctx *ctx, gfp_t gfp_mask); int (*derive_key)(const struct gss_krb5_enctype *gk5e, const struct xdr_netobj *in, -- cgit v1.2.3 From a40cf7530d3104793f9361e69e84ada7960724f2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:22:43 -0500 Subject: SUNRPC: Add gk5e definitions for RFC 8009 encryption types Fill in entries in the supported_gss_krb5_enctypes array for the encryption types defined in RFC 8009. These new enctypes use the SHA-256 and SHA-384 message digest algorithms (as defined in FIPS-180) instead of the deprecated SHA-1 algorithm, and are thus more secure. Note that NIST has scheduled SHA-1 for deprecation: https://www.nist.gov/news-events/news/2022/12/nist-retires-sha-1-cryptographic-algorithm Thus these new encryption types are placed under a separate CONFIG option to enable distributors to separately introduce support for the AES-SHA2 enctypes and deprecate support for the current set of AES-SHA1 encryption types as their user space allows. As this implementation is still a "beta", the default is to not build it automatically. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 3e97d2a7c87d..8ff397b5c04b 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -54,8 +54,8 @@ /* Maximum key length (in bytes) for the supported crypto algorithms */ #define GSS_KRB5_MAX_KEYLEN (32) -/* Maximum checksum function output for the supported crypto algorithms */ -#define GSS_KRB5_MAX_CKSUM_LEN (20) +/* Maximum checksum function output for the supported enctypes */ +#define GSS_KRB5_MAX_CKSUM_LEN (24) /* Maximum blocksize for the supported crypto algorithms */ #define GSS_KRB5_MAX_BLOCKSIZE (16) @@ -160,6 +160,12 @@ enum seal_alg { SEAL_ALG_DES3KD = 0x0002 }; +/* + * These values are assigned by IANA and published via the + * subregistry at the link below: + * + * https://www.iana.org/assignments/kerberos-parameters/kerberos-parameters.xhtml#kerberos-parameters-2 + */ #define CKSUMTYPE_CRC32 0x0001 #define CKSUMTYPE_RSA_MD4 0x0002 #define CKSUMTYPE_RSA_MD4_DES 0x0003 @@ -170,6 +176,8 @@ enum seal_alg { #define CKSUMTYPE_HMAC_SHA1_DES3 0x000c #define CKSUMTYPE_HMAC_SHA1_96_AES128 0x000f #define CKSUMTYPE_HMAC_SHA1_96_AES256 0x0010 +#define CKSUMTYPE_HMAC_SHA256_128_AES128 0x0013 +#define CKSUMTYPE_HMAC_SHA384_192_AES256 0x0014 #define CKSUMTYPE_HMAC_MD5_ARCFOUR -138 /* Microsoft md5 hmac cksumtype */ /* from gssapi_err_krb5.h */ @@ -190,6 +198,11 @@ enum seal_alg { /* per Kerberos v5 protocol spec crypto types from the wire. * these get mapped to linux kernel crypto routines. + * + * These values are assigned by IANA and published via the + * subregistry at the link below: + * + * https://www.iana.org/assignments/kerberos-parameters/kerberos-parameters.xhtml#kerberos-parameters-1 */ #define ENCTYPE_NULL 0x0000 #define ENCTYPE_DES_CBC_CRC 0x0001 /* DES cbc mode with CRC-32 */ @@ -203,6 +216,8 @@ enum seal_alg { #define ENCTYPE_DES3_CBC_SHA1 0x0010 #define ENCTYPE_AES128_CTS_HMAC_SHA1_96 0x0011 #define ENCTYPE_AES256_CTS_HMAC_SHA1_96 0x0012 +#define ENCTYPE_AES128_CTS_HMAC_SHA256_128 0x0013 +#define ENCTYPE_AES256_CTS_HMAC_SHA384_192 0x0014 #define ENCTYPE_ARCFOUR_HMAC 0x0017 #define ENCTYPE_ARCFOUR_HMAC_EXP 0x0018 #define ENCTYPE_UNKNOWN 0x01ff -- cgit v1.2.3 From 3394682fba3b9010c6147e94f37633f044876e5e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:23:08 -0500 Subject: SUNRPC: Support the Camellia enctypes RFC 6803 defines two encryption types that use Camellia ciphers (RFC 3713) and CMAC digests. Implement support for those in SunRPC's GSS Kerberos 5 mechanism. There has not been an explicit request to support these enctypes. However, this new set of enctypes provides a good alternative to the AES-SHA1 enctypes that are to be deprecated at some point. As this implementation is still a "beta", the default is to not build it automatically. Tested-by: Scott Mayhew Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 8ff397b5c04b..cbb6c8192890 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -176,6 +176,8 @@ enum seal_alg { #define CKSUMTYPE_HMAC_SHA1_DES3 0x000c #define CKSUMTYPE_HMAC_SHA1_96_AES128 0x000f #define CKSUMTYPE_HMAC_SHA1_96_AES256 0x0010 +#define CKSUMTYPE_CMAC_CAMELLIA128 0x0011 +#define CKSUMTYPE_CMAC_CAMELLIA256 0x0012 #define CKSUMTYPE_HMAC_SHA256_128_AES128 0x0013 #define CKSUMTYPE_HMAC_SHA384_192_AES256 0x0014 #define CKSUMTYPE_HMAC_MD5_ARCFOUR -138 /* Microsoft md5 hmac cksumtype */ @@ -220,6 +222,8 @@ enum seal_alg { #define ENCTYPE_AES256_CTS_HMAC_SHA384_192 0x0014 #define ENCTYPE_ARCFOUR_HMAC 0x0017 #define ENCTYPE_ARCFOUR_HMAC_EXP 0x0018 +#define ENCTYPE_CAMELLIA128_CTS_CMAC 0x0019 +#define ENCTYPE_CAMELLIA256_CTS_CMAC 0x001A #define ENCTYPE_UNKNOWN 0x01ff /* -- cgit v1.2.3 From 6e460c230d2dfb0e5a02b6e0995546bb4b9d208e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 15 Jan 2023 12:23:27 -0500 Subject: SUNRPC: Move remaining internal definitions to gss_krb5_internal.h The goal is to leave only protocol-defined items in gss_krb5.h so that it can be easily replaced by a generic header. Implementation specific items are moved to the new internal header. Tested-by: Scott Mayhew Reviewed-by: Simo Sorce Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_krb5.h | 117 ---------------------------------------- 1 file changed, 117 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index cbb6c8192890..78a80bf3fdcb 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -42,12 +42,6 @@ #include #include -/* - * The RFCs often specify payload lengths in bits. This helper - * converts a specified bit-length to the number of octets/bytes. - */ -#define BITS2OCTETS(x) ((x) / 8) - /* Length of constant used in key derivation */ #define GSS_KRB5_K5CLENGTH (5) @@ -60,74 +54,6 @@ /* Maximum blocksize for the supported crypto algorithms */ #define GSS_KRB5_MAX_BLOCKSIZE (16) -struct krb5_ctx; - -struct gss_krb5_enctype { - const u32 etype; /* encryption (key) type */ - const u32 ctype; /* checksum type */ - const char *name; /* "friendly" name */ - const char *encrypt_name; /* crypto encrypt name */ - const char *aux_cipher; /* aux encrypt cipher name */ - const char *cksum_name; /* crypto checksum name */ - const u16 signalg; /* signing algorithm */ - const u16 sealalg; /* sealing algorithm */ - const u32 cksumlength; /* checksum length */ - const u32 keyed_cksum; /* is it a keyed cksum? */ - const u32 keybytes; /* raw key len, in bytes */ - const u32 keylength; /* protocol key length, in octets */ - const u32 Kc_length; /* checksum subkey length, in octets */ - const u32 Ke_length; /* encryption subkey length, in octets */ - const u32 Ki_length; /* integrity subkey length, in octets */ - - int (*import_ctx)(struct krb5_ctx *ctx, gfp_t gfp_mask); - int (*derive_key)(const struct gss_krb5_enctype *gk5e, - const struct xdr_netobj *in, - struct xdr_netobj *out, - const struct xdr_netobj *label, - gfp_t gfp_mask); - u32 (*encrypt)(struct krb5_ctx *kctx, u32 offset, - struct xdr_buf *buf, struct page **pages); - u32 (*decrypt)(struct krb5_ctx *kctx, u32 offset, u32 len, - struct xdr_buf *buf, u32 *headskip, u32 *tailskip); - u32 (*get_mic)(struct krb5_ctx *kctx, struct xdr_buf *text, - struct xdr_netobj *token); - u32 (*verify_mic)(struct krb5_ctx *kctx, struct xdr_buf *message_buffer, - struct xdr_netobj *read_token); - u32 (*wrap)(struct krb5_ctx *kctx, int offset, - struct xdr_buf *buf, struct page **pages); - u32 (*unwrap)(struct krb5_ctx *kctx, int offset, int len, - struct xdr_buf *buf, unsigned int *slack, - unsigned int *align); -}; - -/* krb5_ctx flags definitions */ -#define KRB5_CTX_FLAG_INITIATOR 0x00000001 -#define KRB5_CTX_FLAG_CFX 0x00000002 -#define KRB5_CTX_FLAG_ACCEPTOR_SUBKEY 0x00000004 - -struct krb5_ctx { - int initiate; /* 1 = initiating, 0 = accepting */ - u32 enctype; - u32 flags; - const struct gss_krb5_enctype *gk5e; /* enctype-specific info */ - struct crypto_sync_skcipher *enc; - struct crypto_sync_skcipher *seq; - struct crypto_sync_skcipher *acceptor_enc; - struct crypto_sync_skcipher *initiator_enc; - struct crypto_sync_skcipher *acceptor_enc_aux; - struct crypto_sync_skcipher *initiator_enc_aux; - struct crypto_ahash *acceptor_sign; - struct crypto_ahash *initiator_sign; - struct crypto_ahash *initiator_integ; - struct crypto_ahash *acceptor_integ; - u8 Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */ - u8 cksum[GSS_KRB5_MAX_KEYLEN]; - atomic_t seq_send; - atomic64_t seq_send64; - time64_t endtime; - struct xdr_netobj mech_used; -}; - /* The length of the Kerberos GSS token header */ #define GSS_KRB5_TOK_HDR_LEN (16) @@ -245,47 +171,4 @@ enum seal_alg { #define KG_USAGE_INITIATOR_SEAL (24) #define KG_USAGE_INITIATOR_SIGN (25) -/* - * This compile-time check verifies that we will not exceed the - * slack space allotted by the client and server auth_gss code - * before they call gss_wrap(). - */ -#define GSS_KRB5_MAX_SLACK_NEEDED \ - (GSS_KRB5_TOK_HDR_LEN /* gss token header */ \ - + GSS_KRB5_MAX_CKSUM_LEN /* gss token checksum */ \ - + GSS_KRB5_MAX_BLOCKSIZE /* confounder */ \ - + GSS_KRB5_MAX_BLOCKSIZE /* possible padding */ \ - + GSS_KRB5_TOK_HDR_LEN /* encrypted hdr in v2 token */\ - + GSS_KRB5_MAX_CKSUM_LEN /* encryption hmac */ \ - + 4 + 4 /* RPC verifier */ \ - + GSS_KRB5_TOK_HDR_LEN \ - + GSS_KRB5_MAX_CKSUM_LEN) - -u32 -make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, - struct xdr_buf *body, int body_offset, u8 *cksumkey, - unsigned int usage, struct xdr_netobj *cksumout); - -int -gss_encrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *outbuf, - int offset, struct page **pages); - -int -gss_decrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *inbuf, - int offset); - -s32 -krb5_make_seq_num(struct krb5_ctx *kctx, - struct crypto_sync_skcipher *key, - int direction, - u32 seqnum, unsigned char *cksum, unsigned char *buf); - -s32 -krb5_get_seq_num(struct krb5_ctx *kctx, - unsigned char *cksum, - unsigned char *buf, int *direction, u32 *seqnum); - -int -xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen); - #endif /* _LINUX_SUNRPC_GSS_KRB5_H */ -- cgit v1.2.3 From 319951eba0fc412a78a8fe3d2ee5e143cc318c14 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 24 Jan 2023 15:40:22 -0500 Subject: SUNRPC: Remove ->xpo_secure_port() There's no need for the cost of this extra virtual function call during every RPC transaction: the RQ_SECURE bit can be set properly in ->xpo_recvfrom() instead. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_xprt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index d42a75b3be10..775368802762 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -26,7 +26,6 @@ struct svc_xprt_ops { void (*xpo_release_rqst)(struct svc_rqst *); void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); - void (*xpo_secure_port)(struct svc_rqst *rqstp); void (*xpo_kill_temp_xprt)(struct svc_xprt *); void (*xpo_start_tls)(struct svc_xprt *); }; -- cgit v1.2.3 From 225a05043cd87a37455bfbff8c35d4e276519387 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 8 Feb 2023 14:56:11 +0000 Subject: filemap: Remove lock_page_killable() There are no more callers; remove this function before any more appear. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Howells Signed-off-by: Steve French --- include/linux/pagemap.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 29e1f9e76eb6..2f5b36f446cc 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -977,16 +977,6 @@ static inline int folio_lock_killable(struct folio *folio) return 0; } -/* - * lock_page_killable is like lock_page but can be interrupted by fatal - * signals. It returns 0 if it locked the page and -EINTR if it was - * killed while waiting. - */ -static inline int lock_page_killable(struct page *page) -{ - return folio_lock_killable(page_folio(page)); -} - /* * folio_lock_or_retry - Lock the folio, unless this would block and the * caller indicated that it can handle a retry. -- cgit v1.2.3 From 9747b9e92418b61c2281561e0651803f1fad0159 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 15 Feb 2023 18:39:36 +0800 Subject: mm: hugetlb: change to return bool for isolate_hugetlb() Now the isolate_hugetlb() only returns 0 or -EBUSY, and most users did not care about the negative value, thus we can convert the isolate_hugetlb() to return a boolean value to make code more clear when checking the hugetlb isolation state. Moreover converts 2 users which will consider the negative value returned by isolate_hugetlb(). No functional changes intended. [akpm@linux-foundation.org: shorten locked section, per SeongJae Park] Link: https://lkml.kernel.org/r/12a287c5bebc13df304387087bbecc6421510849.1676424378.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Acked-by: David Hildenbrand Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Mike Kravetz Acked-by: Linus Torvalds Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index df6dd624ccfe..5f5e4177b2e0 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -171,7 +171,7 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to, vm_flags_t vm_flags); long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); -int isolate_hugetlb(struct folio *folio, struct list_head *list); +bool isolate_hugetlb(struct folio *folio, struct list_head *list); int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison); int get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared); @@ -413,9 +413,9 @@ static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, return NULL; } -static inline int isolate_hugetlb(struct folio *folio, struct list_head *list) +static inline bool isolate_hugetlb(struct folio *folio, struct list_head *list) { - return -EBUSY; + return false; } static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison) -- cgit v1.2.3 From cd7755800eb54e8522f5e51f4e71e6494c1f1572 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 15 Feb 2023 18:39:37 +0800 Subject: mm: change to return bool for isolate_movable_page() Now the isolate_movable_page() can only return 0 or -EBUSY, and no users will care about the negative return value, thus we can convert the isolate_movable_page() to return a boolean value to make the code more clear when checking the movable page isolation state. No functional changes intended. [akpm@linux-foundation.org: remove unneeded comment, per Matthew] Link: https://lkml.kernel.org/r/cb877f73f4fff8d309611082ec740a7065b1ade0.1676424378.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Acked-by: David Hildenbrand Reviewed-by: Matthew Wilcox (Oracle) Acked-by: Linus Torvalds Reviewed-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/migrate.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index c88b96b48be7..6b252f519c86 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -71,7 +71,7 @@ extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason, unsigned int *ret_succeeded); extern struct page *alloc_migration_target(struct page *page, unsigned long private); -extern int isolate_movable_page(struct page *page, isolate_mode_t mode); +extern bool isolate_movable_page(struct page *page, isolate_mode_t mode); int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src); @@ -92,8 +92,8 @@ static inline int migrate_pages(struct list_head *l, new_page_t new, static inline struct page *alloc_migration_target(struct page *page, unsigned long private) { return NULL; } -static inline int isolate_movable_page(struct page *page, isolate_mode_t mode) - { return -EBUSY; } +static inline bool isolate_movable_page(struct page *page, isolate_mode_t mode) + { return false; } static inline int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src) -- cgit v1.2.3 From f9366f4c2a29d14f5992b195e268240c2deb116e Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 16 Feb 2023 14:44:24 -0800 Subject: include/linux/migrate.h: remove unneeded externs As suggested by Matthew. Suggested-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/migrate.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 6b252f519c86..6241a1596a75 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -62,16 +62,16 @@ extern const char *migrate_reason_names[MR_TYPES]; #ifdef CONFIG_MIGRATION -extern void putback_movable_pages(struct list_head *l); +void putback_movable_pages(struct list_head *l); int migrate_folio_extra(struct address_space *mapping, struct folio *dst, struct folio *src, enum migrate_mode mode, int extra_count); int migrate_folio(struct address_space *mapping, struct folio *dst, struct folio *src, enum migrate_mode mode); -extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, - unsigned long private, enum migrate_mode mode, int reason, - unsigned int *ret_succeeded); -extern struct page *alloc_migration_target(struct page *page, unsigned long private); -extern bool isolate_movable_page(struct page *page, isolate_mode_t mode); +int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, + unsigned long private, enum migrate_mode mode, int reason, + unsigned int *ret_succeeded); +struct page *alloc_migration_target(struct page *page, unsigned long private); +bool isolate_movable_page(struct page *page, isolate_mode_t mode); int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src); @@ -142,8 +142,8 @@ const struct movable_operations *page_movable_ops(struct page *page) } #ifdef CONFIG_NUMA_BALANCING -extern int migrate_misplaced_page(struct page *page, - struct vm_area_struct *vma, int node); +int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, + int node); #else static inline int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, int node) -- cgit v1.2.3 From d9194e009efef9613f48022f3c885191c48120f9 Mon Sep 17 00:00:00 2001 From: Frank Rowand Date: Mon, 13 Feb 2023 12:57:02 -0600 Subject: of: dynamic: add lifecycle docbook info to node creation functions The existing docbook comments for the functions related to creating a devicetree node do not explain the reference count of a newly created node, how decrementing the reference count to zero will free the associated memory, and the caller's responsibility to call of_node_put() on the node. Explain what happens when the reference count is decremented to zero. Signed-off-by: Frank Rowand Link: https://lore.kernel.org/r/20230213185702.395776-8-frowand.list@gmail.com Signed-off-by: Rob Herring --- include/linux/of.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 37afd04f36eb..1cb659e682f5 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -100,6 +100,17 @@ struct of_reconfig_data { struct property *old_prop; }; +/** + * of_node_init - initialize a devicetree node + * @node: Pointer to device node that has been created by kzalloc() + * @phandle_name: Name of property holding a phandle value + * + * On return the device_node refcount is set to one. Use of_node_put() + * on @node when done to free the memory allocated for it. If the node + * is NOT a dynamic node the memory will not be freed. The decision of + * whether to free the memory will be done by node->release(), which is + * of_node_release(). + */ /* initialize a node */ extern const struct kobj_type of_node_ktype; extern const struct fwnode_operations of_fwnode_ops; -- cgit v1.2.3 From 07073eb01c5f630344bc1c3e56b0e0d94aedf919 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Feb 2023 15:01:42 +0000 Subject: splice: Add a func to do a splice from a buffered file without ITER_PIPE Provide a function to do splice read from a buffered file, pulling the folios out of the pagecache directly by calling filemap_get_pages() to do any required reading and then pasting the returned folios into the pipe. A helper function is provided to do the actual folio pasting and will handle multipage folios by splicing as many of the relevant subpages as will fit into the pipe. The code is loosely based on filemap_read() and might belong in mm/filemap.c with that as it needs to use filemap_get_pages(). Signed-off-by: David Howells Reviewed-by: Jens Axboe cc: Christoph Hellwig cc: Al Viro cc: David Hildenbrand cc: John Hubbard cc: linux-mm@kvack.org cc: linux-block@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- include/linux/fs.h | 3 +++ include/linux/pipe_fs_i.h | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index c1769a2c5d70..28743e38df91 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3163,6 +3163,9 @@ ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb, struct iov_iter *iter); /* fs/splice.c */ +ssize_t filemap_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, unsigned int flags); extern ssize_t generic_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); extern ssize_t iter_file_splice_write(struct pipe_inode_info *, diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 6cb65df3e3ba..d2c3f16cf6b1 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -156,6 +156,26 @@ static inline bool pipe_full(unsigned int head, unsigned int tail, return pipe_occupancy(head, tail) >= limit; } +/** + * pipe_buf - Return the pipe buffer for the specified slot in the pipe ring + * @pipe: The pipe to access + * @slot: The slot of interest + */ +static inline struct pipe_buffer *pipe_buf(const struct pipe_inode_info *pipe, + unsigned int slot) +{ + return &pipe->bufs[slot & (pipe->ring_size - 1)]; +} + +/** + * pipe_head_buf - Return the pipe buffer at the head of the pipe ring + * @pipe: The pipe to access + */ +static inline struct pipe_buffer *pipe_head_buf(const struct pipe_inode_info *pipe) +{ + return pipe_buf(pipe, pipe->head); +} + /** * pipe_buf_get - get a reference to a pipe_buffer * @pipe: the pipe that the buffer belongs to -- cgit v1.2.3 From 33b3b041543e8b3abf9a692d0f8c2ab0e07c50cd Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 7 Feb 2023 10:45:40 +0000 Subject: splice: Add a func to do a splice from an O_DIRECT file without ITER_PIPE Implement a function, direct_file_splice(), that deals with this by using an ITER_BVEC iterator instead of an ITER_PIPE iterator as the former won't free its buffers when reverted. The function bulk allocates all the buffers it thinks it is going to use in advance, does the read synchronously and only then trims the buffer down. The pages we did use get pushed into the pipe. This fixes a problem with the upcoming iov_iter_extract_pages() function, whereby pages extracted from a non-user-backed iterator such as ITER_PIPE aren't pinned. __iomap_dio_rw(), however, calls iov_iter_revert() to shorten the iterator to just the bufferage it is going to use - which has the side-effect of freeing the excess pipe buffers, even though they're attached to a bio and may get written to by DMA (thanks to Hillf Danton for spotting this[1]). This then causes memory corruption that is particularly noticeable when the syzbot test[2] is run. The test boils down to: out = creat(argv[1], 0666); ftruncate(out, 0x800); lseek(out, 0x200, SEEK_SET); in = open(argv[1], O_RDONLY | O_DIRECT | O_NOFOLLOW); sendfile(out, in, NULL, 0x1dd00); run repeatedly in parallel. What I think is happening is that ftruncate() occasionally shortens the DIO read that's about to be made by sendfile's splice core by reducing i_size. This should be more efficient for DIO read by virtue of doing a bulk page allocation, but slightly less efficient by ignoring any partial page in the pipe. Reported-by: syzbot+a440341a59e3b7142895@syzkaller.appspotmail.com Signed-off-by: David Howells Reviewed-by: Jens Axboe cc: Christoph Hellwig cc: Al Viro cc: David Hildenbrand cc: John Hubbard cc: linux-mm@kvack.org cc: linux-block@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20230207094731.1390-1-hdanton@sina.com/ [1] Link: https://lore.kernel.org/r/000000000000b0b3c005f3a09383@google.com/ [2] Signed-off-by: Steve French --- include/linux/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 28743e38df91..551c9403f9b3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3166,6 +3166,9 @@ ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb, ssize_t filemap_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); +ssize_t direct_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, unsigned int flags); extern ssize_t generic_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); extern ssize_t iter_file_splice_write(struct pipe_inode_info *, -- cgit v1.2.3 From f62e52d1276b6cd329fe72d36bdf912b2ce4caaf Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 19 Jan 2023 12:47:23 +0000 Subject: iov_iter: Define flags to qualify page extraction. Define flags to qualify page extraction to pass into iov_iter_*_pages*() rather than passing in FOLL_* flags. For now only a flag to allow peer-to-peer DMA is supported. Signed-off-by: David Howells Reviewed-by: Christoph Hellwig Reviewed-by: John Hubbard Reviewed-by: Jens Axboe cc: Al Viro cc: Logan Gunthorpe cc: linux-fsdevel@vger.kernel.org cc: linux-block@vger.kernel.org Signed-off-by: Steve French --- include/linux/uio.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 9f158238edba..eec6ed8a627a 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -13,6 +13,8 @@ struct page; struct pipe_inode_info; +typedef unsigned int __bitwise iov_iter_extraction_t; + struct kvec { void *iov_base; /* and that should *never* hold a userland pointer */ size_t iov_len; @@ -252,12 +254,12 @@ void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray * loff_t start, size_t count); ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start, - unsigned gup_flags); + iov_iter_extraction_t extraction_flags); ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start); ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start, - unsigned gup_flags); + iov_iter_extraction_t extraction_flags); ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start); int iov_iter_npages(const struct iov_iter *i, int maxpages); @@ -360,4 +362,8 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, }; } +/* Flags for iov_iter_get/extract_pages*() */ +/* Allow P2PDMA on the extracted pages */ +#define ITER_ALLOW_P2PDMA ((__force iov_iter_extraction_t)0x01) + #endif -- cgit v1.2.3 From 7d58fe731028128f3a7e20b9c492be48aae133ee Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 28 Oct 2022 21:50:30 +0100 Subject: iov_iter: Add a function to extract a page list from an iterator Add a function, iov_iter_extract_pages(), to extract a list of pages from an iterator. The pages may be returned with a pin added or nothing, depending on the type of iterator. Add a second function, iov_iter_extract_will_pin(), to determine how the cleanup should be done. There are two cases: (1) ITER_IOVEC or ITER_UBUF iterator. Extracted pages will have pins (FOLL_PIN) obtained on them so that a concurrent fork() will forcibly copy the page so that DMA is done to/from the parent's buffer and is unavailable to/unaffected by the child process. iov_iter_extract_will_pin() will return true for this case. The caller should use something like unpin_user_page() to dispose of the page. (2) Any other sort of iterator. No refs or pins are obtained on the page, the assumption is made that the caller will manage page retention. iov_iter_extract_will_pin() will return false. The pages don't need additional disposal. Signed-off-by: David Howells Reviewed-by: Christoph Hellwig Reviewed-by: Jens Axboe cc: Al Viro cc: John Hubbard cc: David Hildenbrand cc: Matthew Wilcox cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org Signed-off-by: Steve French --- include/linux/uio.h | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index eec6ed8a627a..514e3b7b06b8 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -361,9 +361,34 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction, .count = count }; } - /* Flags for iov_iter_get/extract_pages*() */ /* Allow P2PDMA on the extracted pages */ #define ITER_ALLOW_P2PDMA ((__force iov_iter_extraction_t)0x01) +ssize_t iov_iter_extract_pages(struct iov_iter *i, struct page ***pages, + size_t maxsize, unsigned int maxpages, + iov_iter_extraction_t extraction_flags, + size_t *offset0); + +/** + * iov_iter_extract_will_pin - Indicate how pages from the iterator will be retained + * @iter: The iterator + * + * Examine the iterator and indicate by returning true or false as to how, if + * at all, pages extracted from the iterator will be retained by the extraction + * function. + * + * %true indicates that the pages will have a pin placed in them that the + * caller must unpin. This is must be done for DMA/async DIO to force fork() + * to forcibly copy a page for the child (the parent must retain the original + * page). + * + * %false indicates that no measures are taken and that it's up to the caller + * to retain the pages. + */ +static inline bool iov_iter_extract_will_pin(const struct iov_iter *iter) +{ + return user_backed_iter(iter); +} + #endif -- cgit v1.2.3 From 85dd2c8ff368b1446be9febde84afe1d7aec4261 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 20 Oct 2022 16:16:32 +0100 Subject: netfs: Add a function to extract a UBUF or IOVEC into a BVEC iterator Add a function to extract the pages from a user-space supplied iterator (UBUF- or IOVEC-type) into a BVEC-type iterator, retaining the pages by getting a pin on them (as FOLL_PIN) as we go. This is useful in three situations: (1) A userspace thread may have a sibling that unmaps or remaps the process's VM during the operation, changing the assignment of the pages and potentially causing an error. Retaining the pages keeps some pages around, even if this occurs; futher, we find out at the point of extraction if EFAULT is going to be incurred. (2) Pages might get swapped out/discarded if not retained, so we want to retain them to avoid the reload causing a deadlock due to a DIO from/to an mmapped region on the same file. (3) The iterator may get passed to sendmsg() by the filesystem. If a fault occurs, we may get a short write to a TCP stream that's then tricky to recover from. We don't deal with other types of iterator here, leaving it to other mechanisms to retain the pages (eg. PG_locked, PG_writeback and the pipe lock). Signed-off-by: David Howells cc: Jeff Layton cc: Steve French cc: Shyam Prasad N cc: Rohith Surabattula cc: linux-cachefs@redhat.com cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- include/linux/netfs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 4c76ddfb6a67..b11a84f6c32b 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -17,6 +17,7 @@ #include #include #include +#include enum netfs_sreq_ref_trace; @@ -296,6 +297,9 @@ void netfs_get_subrequest(struct netfs_io_subrequest *subreq, void netfs_put_subrequest(struct netfs_io_subrequest *subreq, bool was_async, enum netfs_sreq_ref_trace what); void netfs_stats_show(struct seq_file *); +ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, + struct iov_iter *new, + iov_iter_extraction_t extraction_flags); /** * netfs_inode - Get the netfs inode context from the inode -- cgit v1.2.3 From 0185846975339a5c348373aa450a977f5242366b Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 27 Oct 2022 16:19:44 +0100 Subject: netfs: Add a function to extract an iterator into a scatterlist Provide a function for filling in a scatterlist from the list of pages contained in an iterator. If the iterator is UBUF- or IOBUF-type, the pages have a pin taken on them (as FOLL_PIN). If the iterator is BVEC-, KVEC- or XARRAY-type, no pin is taken on the pages and it is left to the caller to manage their lifetime. It cannot be assumed that a ref can be validly taken, particularly in the case of a KVEC iterator. Signed-off-by: David Howells cc: Jeff Layton cc: Steve French cc: Shyam Prasad N cc: Rohith Surabattula cc: linux-cachefs@redhat.com cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- include/linux/netfs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index b11a84f6c32b..a1f3522daa69 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -300,6 +300,10 @@ void netfs_stats_show(struct seq_file *); ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, struct iov_iter *new, iov_iter_extraction_t extraction_flags); +struct sg_table; +ssize_t netfs_extract_iter_to_sg(struct iov_iter *iter, size_t len, + struct sg_table *sgtable, unsigned int sg_max, + iov_iter_extraction_t extraction_flags); /** * netfs_inode - Get the netfs inode context from the inode -- cgit v1.2.3 From 868a6fc0ca2407622d2833adefe1c4d284766c4c Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Tue, 21 Feb 2023 08:49:16 +0900 Subject: x86/kprobes: Fix __recover_optprobed_insn check optimizing logic Since the following commit: commit f66c0447cca1 ("kprobes: Set unoptimized flag after unoptimizing code") modified the update timing of the KPROBE_FLAG_OPTIMIZED, a optimized_kprobe may be in the optimizing or unoptimizing state when op.kp->flags has KPROBE_FLAG_OPTIMIZED and op->list is not empty. The __recover_optprobed_insn check logic is incorrect, a kprobe in the unoptimizing state may be incorrectly determined as unoptimizing. As a result, incorrect instructions are copied. The optprobe_queued_unopt function needs to be exported for invoking in arch directory. Link: https://lore.kernel.org/all/20230216034247.32348-2-yangjihong1@huawei.com/ Fixes: f66c0447cca1 ("kprobes: Set unoptimized flag after unoptimizing code") Cc: stable@vger.kernel.org Signed-off-by: Yang Jihong Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- include/linux/kprobes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index a0b92be98984..ab39285f71a6 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -378,6 +378,7 @@ extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs); DEFINE_INSN_CACHE_OPS(optinsn); extern void wait_for_kprobe_optimizer(void); +bool optprobe_queued_unopt(struct optimized_kprobe *op); #else /* !CONFIG_OPTPROBES */ static inline void wait_for_kprobe_optimizer(void) { } #endif /* CONFIG_OPTPROBES */ -- cgit v1.2.3 From f1c97a1b4ef709e3f066f82e3ba3108c3b133ae6 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Tue, 21 Feb 2023 08:49:16 +0900 Subject: x86/kprobes: Fix arch_check_optimized_kprobe check within optimized_kprobe range When arch_prepare_optimized_kprobe calculating jump destination address, it copies original instructions from jmp-optimized kprobe (see __recover_optprobed_insn), and calculated based on length of original instruction. arch_check_optimized_kprobe does not check KPROBE_FLAG_OPTIMATED when checking whether jmp-optimized kprobe exists. As a result, setup_detour_execution may jump to a range that has been overwritten by jump destination address, resulting in an inval opcode error. For example, assume that register two kprobes whose addresses are and in "func" function. The original code of "func" function is as follows: 0xffffffff816cb5e9 <+9>: push %r12 0xffffffff816cb5eb <+11>: xor %r12d,%r12d 0xffffffff816cb5ee <+14>: test %rdi,%rdi 0xffffffff816cb5f1 <+17>: setne %r12b 0xffffffff816cb5f5 <+21>: push %rbp 1.Register the kprobe for , assume that is kp1, corresponding optimized_kprobe is op1. After the optimization, "func" code changes to: 0xffffffff816cc079 <+9>: push %r12 0xffffffff816cc07b <+11>: jmp 0xffffffffa0210000 0xffffffff816cc080 <+16>: incl 0xf(%rcx) 0xffffffff816cc083 <+19>: xchg %eax,%ebp 0xffffffff816cc084 <+20>: (bad) 0xffffffff816cc085 <+21>: push %rbp Now op1->flags == KPROBE_FLAG_OPTIMATED; 2. Register the kprobe for , assume that is kp2, corresponding optimized_kprobe is op2. register_kprobe(kp2) register_aggr_kprobe alloc_aggr_kprobe __prepare_optimized_kprobe arch_prepare_optimized_kprobe __recover_optprobed_insn // copy original bytes from kp1->optinsn.copied_insn, // jump address = 3. disable kp1: disable_kprobe(kp1) __disable_kprobe ... if (p == orig_p || aggr_kprobe_disabled(orig_p)) { ret = disarm_kprobe(orig_p, true) // add op1 in unoptimizing_list, not unoptimized orig_p->flags |= KPROBE_FLAG_DISABLED; // op1->flags == KPROBE_FLAG_OPTIMATED | KPROBE_FLAG_DISABLED ... 4. unregister kp2 __unregister_kprobe_top ... if (!kprobe_disabled(ap) && !kprobes_all_disarmed) { optimize_kprobe(op) ... if (arch_check_optimized_kprobe(op) < 0) // because op1 has KPROBE_FLAG_DISABLED, here not return return; p->kp.flags |= KPROBE_FLAG_OPTIMIZED; // now op2 has KPROBE_FLAG_OPTIMIZED } "func" code now is: 0xffffffff816cc079 <+9>: int3 0xffffffff816cc07a <+10>: push %rsp 0xffffffff816cc07b <+11>: jmp 0xffffffffa0210000 0xffffffff816cc080 <+16>: incl 0xf(%rcx) 0xffffffff816cc083 <+19>: xchg %eax,%ebp 0xffffffff816cc084 <+20>: (bad) 0xffffffff816cc085 <+21>: push %rbp 5. if call "func", int3 handler call setup_detour_execution: if (p->flags & KPROBE_FLAG_OPTIMIZED) { ... regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; ... } The code for the destination address is 0xffffffffa021072c: push %r12 0xffffffffa021072e: xor %r12d,%r12d 0xffffffffa0210731: jmp 0xffffffff816cb5ee However, is not a valid start instruction address. As a result, an error occurs. Link: https://lore.kernel.org/all/20230216034247.32348-3-yangjihong1@huawei.com/ Fixes: f66c0447cca1 ("kprobes: Set unoptimized flag after unoptimizing code") Signed-off-by: Yang Jihong Cc: stable@vger.kernel.org Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- include/linux/kprobes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index ab39285f71a6..85a64cb95d75 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -379,6 +379,7 @@ DEFINE_INSN_CACHE_OPS(optinsn); extern void wait_for_kprobe_optimizer(void); bool optprobe_queued_unopt(struct optimized_kprobe *op); +bool kprobe_disarmed(struct kprobe *p); #else /* !CONFIG_OPTPROBES */ static inline void wait_for_kprobe_optimizer(void) { } #endif /* CONFIG_OPTPROBES */ -- cgit v1.2.3 From db6c4dee4c104f50ed163af71c53bfdb878a8318 Mon Sep 17 00:00:00 2001 From: Alvaro Karsz Date: Tue, 10 Jan 2023 18:56:36 +0200 Subject: PCI: Add SolidRun vendor ID Add SolidRun vendor ID to pci_ids.h The vendor ID is used in 2 different source files, the SNET vDPA driver and PCI quirks. Signed-off-by: Alvaro Karsz Acked-by: Bjorn Helgaas Message-Id: <20230110165638.123745-2-alvaro.karsz@solid-run.com> Signed-off-by: Michael S. Tsirkin --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index b362d90eb9b0..6716e6371a18 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -3092,6 +3092,8 @@ #define PCI_VENDOR_ID_3COM_2 0xa727 +#define PCI_VENDOR_ID_SOLIDRUN 0xd063 + #define PCI_VENDOR_ID_DIGIUM 0xd161 #define PCI_DEVICE_ID_DIGIUM_HFC4S 0xb410 -- cgit v1.2.3 From 1538a8a49ecbe6d3302cd7f347632338e56857f8 Mon Sep 17 00:00:00 2001 From: Sebastien Boeuf Date: Tue, 3 Jan 2023 11:51:05 +0100 Subject: vdpa: Add resume operation Add a new operation to allow a vDPA device to be resumed after it has been suspended. Trying to resume a device that wasn't suspended will result in a no-op. This operation is optional. If it's not implemented, the associated backend feature bit will not be exposed. And if the feature bit is not exposed, invoking this operation will return an error. Acked-by: Jason Wang Signed-off-by: Sebastien Boeuf Message-Id: <6e05c4b31b47f3e29cb2bd7ebd56c81f84b8f48a.1672742878.git.sebastien.boeuf@intel.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- include/linux/vdpa.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 6d0f5e4e82c2..96d308cbf97b 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -219,7 +219,10 @@ struct vdpa_map_file { * @reset: Reset device * @vdev: vdpa device * Returns integer: success (0) or error (< 0) - * @suspend: Suspend or resume the device (optional) + * @suspend: Suspend the device (optional) + * @vdev: vdpa device + * Returns integer: success (0) or error (< 0) + * @resume: Resume the device (optional) * @vdev: vdpa device * Returns integer: success (0) or error (< 0) * @get_config_size: Get the size of the configuration space includes @@ -324,6 +327,7 @@ struct vdpa_config_ops { void (*set_status)(struct vdpa_device *vdev, u8 status); int (*reset)(struct vdpa_device *vdev); int (*suspend)(struct vdpa_device *vdev); + int (*resume)(struct vdpa_device *vdev); size_t (*get_config_size)(struct vdpa_device *vdev); void (*get_config)(struct vdpa_device *vdev, unsigned int offset, void *buf, unsigned int len); -- cgit v1.2.3 From 2713ea3c7d930aa1ff5ef7d4a57a1e4fcc3b9985 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 19 Jan 2023 14:15:21 +0800 Subject: virtio_ring: per virtqueue dma device This patch introduces a per virtqueue dma device. This will be used for virtio devices whose virtqueue are backed by different underlayer devices. One example is the vDPA that where the control virtqueue could be implemented through software mediation. Some of the work are actually done before since the helper like vring_dma_device(). This work left are: - Let vring_dma_device() return the per virtqueue dma device instead of the vdev's parent. - Allow passing a dma_device when creating the virtqueue through a new helper, old vring creation helper will keep using vdev's parent. Reviewed-by: Eli Cohen Tested-by: Eli Cohen Signed-off-by: Jason Wang Message-Id: <20230119061525.75068-2-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_ring.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index 8b8af1a38991..8b95b69ef694 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -76,6 +76,22 @@ struct virtqueue *vring_create_virtqueue(unsigned int index, void (*callback)(struct virtqueue *vq), const char *name); +/* + * Creates a virtqueue and allocates the descriptor ring with per + * virtqueue DMA device. + */ +struct virtqueue *vring_create_virtqueue_dma(unsigned int index, + unsigned int num, + unsigned int vring_align, + struct virtio_device *vdev, + bool weak_barriers, + bool may_reduce_num, + bool ctx, + bool (*notify)(struct virtqueue *vq), + void (*callback)(struct virtqueue *vq), + const char *name, + struct device *dma_dev); + /* * Creates a virtqueue with a standard layout but a caller-allocated * ring. -- cgit v1.2.3 From 25da258fa61b1a902c781406a4e24a8284fc3d8a Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 19 Jan 2023 14:15:22 +0800 Subject: vdpa: introduce get_vq_dma_device() This patch introduces a new method to query the dma device that is use for a specific virtqueue. Reviewed-by: Eli Cohen Tested-by: Eli Cohen Signed-off-by: Jason Wang Message-Id: <20230119061525.75068-3-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 96d308cbf97b..43f59ef10cc9 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -285,6 +285,11 @@ struct vdpa_map_file { * @iova: iova to be unmapped * @size: size of the area * Returns integer: success (0) or error (< 0) + * @get_vq_dma_dev: Get the dma device for a specific + * virtqueue (optional) + * @vdev: vdpa device + * @idx: virtqueue index + * Returns pointer to structure device or error (NULL) * @free: Free resources that belongs to vDPA (optional) * @vdev: vdpa device */ @@ -345,6 +350,7 @@ struct vdpa_config_ops { u64 iova, u64 size); int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group, unsigned int asid); + struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx); /* Free device resources */ void (*free)(struct vdpa_device *vdev); -- cgit v1.2.3 From 2e44ca3f1f0ba2022f949003f02cc091144e54a3 Mon Sep 17 00:00:00 2001 From: Shunsuke Mie Date: Thu, 2 Feb 2023 19:42:48 +0900 Subject: vringh: fix a typo in comments for vringh_kiov Probably it is a simple copy error from struct vring_iov. Signed-off-by: Shunsuke Mie Message-Id: <20230202104248.2040652-1-mie@igel.co.jp> Signed-off-by: Michael S. Tsirkin --- include/linux/vringh.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vringh.h b/include/linux/vringh.h index 212892cf9822..1991a02c6431 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -92,7 +92,7 @@ struct vringh_iov { }; /** - * struct vringh_iov - kvec mangler. + * struct vringh_kiov - kvec mangler. * * Mangles kvec in place, and restores it. * Remaining data is iov + i, of used - i elements. -- cgit v1.2.3 From 9e0c7efa5ea231d85c0d41693a5115b3b971717c Mon Sep 17 00:00:00 2001 From: Juhyung Park Date: Fri, 3 Feb 2023 11:40:29 +0900 Subject: block: remove more NULL checks after bdev_get_queue() bdev_get_queue() never returns NULL. Several commits [1][2] have been made before to remove such superfluous checks, but some still remained. For places where bdev_get_queue() is called solely for NULL checks, it is removed entirely. [1] commit ec9fd2a13d74 ("blk-lib: don't check bdev_get_queue() NULL check") [2] commit fea127b36c93 ("block: remove superfluous check for request queue in bdev_is_zoned()") Signed-off-by: Juhyung Park Reviewed-by: Pankaj Raghav Link: https://lore.kernel.org/r/20230203024029.48260-1-qkrwngud825@gmail.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b9637d63e6f0..89dd9b02b45b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1276,12 +1276,7 @@ static inline bool bdev_nowait(struct block_device *bdev) static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev) { - struct request_queue *q = bdev_get_queue(bdev); - - if (q) - return blk_queue_zoned_model(q); - - return BLK_ZONED_NONE; + return blk_queue_zoned_model(bdev_get_queue(bdev)); } static inline bool bdev_is_zoned(struct block_device *bdev) -- cgit v1.2.3 From 6c40624930c58529185a257380442547580ed837 Mon Sep 17 00:00:00 2001 From: Souradeep Chowdhury Date: Wed, 22 Feb 2023 08:27:49 +0900 Subject: bootconfig: Increase max nodes of bootconfig from 1024 to 8192 for DCC support The Data Capture and Compare(DCC) is a debugging tool that uses the bootconfig for configuring the register values during boot-time. Increase the max nodes supported by bootconfig to cater to the requirements of the Data Capture and Compare Driver. Link: https://lore.kernel.org/all/1674536682-18404-1-git-send-email-quic_schowdhu@quicinc.com/ Signed-off-by: Souradeep Chowdhury Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) --- include/linux/bootconfig.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index 1611f9db878e..ca73940e26df 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -59,7 +59,7 @@ struct xbc_node { /* Maximum size of boot config is 32KB - 1 */ #define XBC_DATA_MAX (XBC_VALUE - 1) -#define XBC_NODE_MAX 1024 +#define XBC_NODE_MAX 8192 #define XBC_KEYLEN_MAX 256 #define XBC_DEPTH_MAX 16 -- cgit v1.2.3 From fdf6491193e411087ae77bcbc6468e3e1cff99ed Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 20 Feb 2023 17:24:00 +0100 Subject: netfilter: ctnetlink: make event listener tracking global pernet tracking doesn't work correctly because other netns might have set NETLINK_LISTEN_ALL_NSID on its event socket. In this case its expected that events originating in other net namespaces are also received. Making pernet-tracking work while also honoring NETLINK_LISTEN_ALL_NSID requires much more intrusive changes both in netlink and nfnetlink, f.e. adding a 'setsockopt' callback that lets nfnetlink know that the event socket entered (or left) ALL_NSID mode. Move to global tracking instead: if there is an event socket anywhere on the system, all net namespaces which have conntrack enabled and use autobind mode will allocate the ecache extension. netlink_has_listeners() returns false only if the given group has no subscribers in any net namespace, the 'net' argument passed to nfnetlink_has_listeners is only used to derive the protocol (nfnetlink), it has no other effect. For proper NETLINK_LISTEN_ALL_NSID-aware pernet tracking of event listeners a new netlink_has_net_listeners() is also needed. Fixes: 90d1daa45849 ("netfilter: conntrack: add nf_conntrack_events autodetect mode") Reported-by: Bryce Kahle Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index d8817d381c14..bef8db9d6c08 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -488,4 +488,9 @@ extern const struct nfnl_ct_hook __rcu *nfnl_ct_hook; */ DECLARE_PER_CPU(bool, nf_skb_duplicated); +/** + * Contains bitmask of ctnetlink event subscribers, if any. + * Can't be pernet due to NETLINK_LISTEN_ALL_NSID setsockopt flag. + */ +extern u8 nf_ctnetlink_has_listener; #endif /*__LINUX_NETFILTER_H*/ -- cgit v1.2.3 From 8781ba7f45695af3ab8e8d1b55a31f527c9201a3 Mon Sep 17 00:00:00 2001 From: Aren Moynihan Date: Thu, 8 Dec 2022 17:02:26 -0500 Subject: mfd: axp20x: Fix order of pek rise and fall events The power button can get "stuck" if the rising edge and falling edge irq are read in the same pass. This can often be triggered when resuming from suspend if the power button is released before the kernel handles the interrupt. Swapping the order of the rise and fall events makes sure that the press event is handled first, which prevents this situation. Signed-off-by: Aren Moynihan Reviewed-by: Samuel Holland Tested-by: Samuel Holland Acked-by: Chen-Yu Tsai Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20221208220225.635414-1-aren@peacevolution.org --- include/linux/mfd/axp20x.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index 9ab0e2fca7ea..2058194807bd 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -432,8 +432,9 @@ enum { AXP152_IRQ_PEK_SHORT, AXP152_IRQ_PEK_LONG, AXP152_IRQ_TIMER, - AXP152_IRQ_PEK_RIS_EDGE, + /* out of bit order to make sure the press event is handled first */ AXP152_IRQ_PEK_FAL_EDGE, + AXP152_IRQ_PEK_RIS_EDGE, AXP152_IRQ_GPIO3_INPUT, AXP152_IRQ_GPIO2_INPUT, AXP152_IRQ_GPIO1_INPUT, @@ -472,8 +473,9 @@ enum { AXP20X_IRQ_LOW_PWR_LVL1, AXP20X_IRQ_LOW_PWR_LVL2, AXP20X_IRQ_TIMER, - AXP20X_IRQ_PEK_RIS_EDGE, + /* out of bit order to make sure the press event is handled first */ AXP20X_IRQ_PEK_FAL_EDGE, + AXP20X_IRQ_PEK_RIS_EDGE, AXP20X_IRQ_GPIO3_INPUT, AXP20X_IRQ_GPIO2_INPUT, AXP20X_IRQ_GPIO1_INPUT, @@ -502,8 +504,9 @@ enum axp22x_irqs { AXP22X_IRQ_LOW_PWR_LVL1, AXP22X_IRQ_LOW_PWR_LVL2, AXP22X_IRQ_TIMER, - AXP22X_IRQ_PEK_RIS_EDGE, + /* out of bit order to make sure the press event is handled first */ AXP22X_IRQ_PEK_FAL_EDGE, + AXP22X_IRQ_PEK_RIS_EDGE, AXP22X_IRQ_GPIO1_INPUT, AXP22X_IRQ_GPIO0_INPUT, }; @@ -571,8 +574,9 @@ enum axp803_irqs { AXP803_IRQ_LOW_PWR_LVL1, AXP803_IRQ_LOW_PWR_LVL2, AXP803_IRQ_TIMER, - AXP803_IRQ_PEK_RIS_EDGE, + /* out of bit order to make sure the press event is handled first */ AXP803_IRQ_PEK_FAL_EDGE, + AXP803_IRQ_PEK_RIS_EDGE, AXP803_IRQ_PEK_SHORT, AXP803_IRQ_PEK_LONG, AXP803_IRQ_PEK_OVER_OFF, @@ -623,8 +627,9 @@ enum axp809_irqs { AXP809_IRQ_LOW_PWR_LVL1, AXP809_IRQ_LOW_PWR_LVL2, AXP809_IRQ_TIMER, - AXP809_IRQ_PEK_RIS_EDGE, + /* out of bit order to make sure the press event is handled first */ AXP809_IRQ_PEK_FAL_EDGE, + AXP809_IRQ_PEK_RIS_EDGE, AXP809_IRQ_PEK_SHORT, AXP809_IRQ_PEK_LONG, AXP809_IRQ_PEK_OVER_OFF, -- cgit v1.2.3 From ccc91b3ed3f641efa8e9050c587ef509b0e2be3a Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Thu, 8 Dec 2022 22:57:23 +0100 Subject: mfd: twl: Fix TWL6032 phy vbus detection TWL6032 has a few charging registers prepended before the charging registers the TWL6030 has. To be able to use common register defines declare the additional registers as additional module. At the moment this affects the access to CHARGERUSB_CTRL1 in phy-twl6030-usb. Without this patch, it is accessing the wrong register on TWL6032. The consequence is that presence of Vbus is not reported. Cc: Bin Liu Cc: Tony Lindgren Signed-off-by: Andreas Kemnade Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20221208215723.217557-1-andreas@kemnade.info --- include/linux/mfd/twl.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/twl.h b/include/linux/mfd/twl.h index eaa233038254..6e3d99b7a0ee 100644 --- a/include/linux/mfd/twl.h +++ b/include/linux/mfd/twl.h @@ -69,6 +69,8 @@ enum twl6030_module_ids { TWL6030_MODULE_GPADC, TWL6030_MODULE_GASGAUGE, + /* A few extra registers before the registers shared with the 6030 */ + TWL6032_MODULE_CHARGE, TWL6030_MODULE_LAST, }; -- cgit v1.2.3 From 88a32c2c5e98d72765846db83c1739e7b036770a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 9 Jan 2023 14:33:22 +0100 Subject: mfd: core: Spelling s/compement/complement/ Fix a misspelling of "complement". Signed-off-by: Geert Uytterhoeven Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/aa7abd7103a0e4be954ea63de78f12e8251b2964.1673271092.git.geert+renesas@glider.be --- include/linux/mfd/core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index 0bc7cba798a3..14ca7b471576 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -88,7 +88,7 @@ struct mfd_cell { const char *of_compatible; /* - * Address as defined in Device Tree. Used to compement 'of_compatible' + * Address as defined in Device Tree. Used to complement 'of_compatible' * (above) when matching OF nodes with devices that have identical * compatible strings */ -- cgit v1.2.3 From 5a7fbe452ad9e4a8988d4c20d7acf148383f8106 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 17 Nov 2022 08:21:51 +0100 Subject: backlight: pwm_bl: Drop support for legacy PWM probing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no in-tree user left which relies on legacy probing. So drop support for it which removes another user of the deprecated pwm_request() function. Signed-off-by: Uwe Kleine-König Reviewed-by: Daniel Thompson Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20221117072151.3789691-1-u.kleine-koenig@pengutronix.de --- include/linux/pwm_backlight.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pwm_backlight.h b/include/linux/pwm_backlight.h index 06086cb93b6f..cdd2ac366bc7 100644 --- a/include/linux/pwm_backlight.h +++ b/include/linux/pwm_backlight.h @@ -8,7 +8,6 @@ #include struct platform_pwm_backlight_data { - int pwm_id; unsigned int max_brightness; unsigned int dft_brightness; unsigned int lth_brightness; -- cgit v1.2.3 From ca78476e4888f1f1caac26c48ec715e546baf432 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 5 Jan 2023 14:46:13 +0100 Subject: mfd: Remove toshiba tmio drivers Four separate mfd drivers are in the "tmio" family, and all of them were used in now-removed PXA machines (eseries, tosa, and hx4700), so the mfd drivers and all its children can be removed as well. Signed-off-by: Arnd Bergmann Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230105134622.254560-19-arnd@kernel.org --- include/linux/mfd/asic3.h | 313 ------------------------------------------- include/linux/mfd/t7l66xb.h | 29 ---- include/linux/mfd/tc6387xb.h | 19 --- include/linux/mfd/tc6393xb.h | 53 -------- include/linux/mfd/tmio.h | 5 - 5 files changed, 419 deletions(-) delete mode 100644 include/linux/mfd/asic3.h delete mode 100644 include/linux/mfd/t7l66xb.h delete mode 100644 include/linux/mfd/tc6387xb.h delete mode 100644 include/linux/mfd/tc6393xb.h (limited to 'include/linux') diff --git a/include/linux/mfd/asic3.h b/include/linux/mfd/asic3.h deleted file mode 100644 index 61e686dbaa74..000000000000 --- a/include/linux/mfd/asic3.h +++ /dev/null @@ -1,313 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * include/linux/mfd/asic3.h - * - * Compaq ASIC3 headers. - * - * Copyright 2001 Compaq Computer Corporation. - * Copyright 2007-2008 OpenedHand Ltd. - */ - -#ifndef __ASIC3_H__ -#define __ASIC3_H__ - -#include - -struct led_classdev; -struct asic3_led { - const char *name; - const char *default_trigger; - struct led_classdev *cdev; -}; - -struct asic3_platform_data { - u16 *gpio_config; - unsigned int gpio_config_num; - - unsigned int irq_base; - - unsigned int gpio_base; - - unsigned int clock_rate; - - struct asic3_led *leds; -}; - -#define ASIC3_NUM_GPIO_BANKS 4 -#define ASIC3_GPIOS_PER_BANK 16 -#define ASIC3_NUM_GPIOS 64 -#define ASIC3_NR_IRQS ASIC3_NUM_GPIOS + 6 - -#define ASIC3_IRQ_LED0 64 -#define ASIC3_IRQ_LED1 65 -#define ASIC3_IRQ_LED2 66 -#define ASIC3_IRQ_SPI 67 -#define ASIC3_IRQ_SMBUS 68 -#define ASIC3_IRQ_OWM 69 - -#define ASIC3_TO_GPIO(gpio) (NR_BUILTIN_GPIO + (gpio)) - -#define ASIC3_GPIO_BANK_A 0 -#define ASIC3_GPIO_BANK_B 1 -#define ASIC3_GPIO_BANK_C 2 -#define ASIC3_GPIO_BANK_D 3 - -#define ASIC3_GPIO(bank, gpio) \ - ((ASIC3_GPIOS_PER_BANK * ASIC3_GPIO_BANK_##bank) + (gpio)) -#define ASIC3_GPIO_bit(gpio) (1 << (gpio & 0xf)) -/* All offsets below are specified with this address bus shift */ -#define ASIC3_DEFAULT_ADDR_SHIFT 2 - -#define ASIC3_OFFSET(base, reg) (ASIC3_##base##_BASE + ASIC3_##base##_##reg) -#define ASIC3_GPIO_OFFSET(base, reg) \ - (ASIC3_GPIO_##base##_BASE + ASIC3_GPIO_##reg) - -#define ASIC3_GPIO_A_BASE 0x0000 -#define ASIC3_GPIO_B_BASE 0x0100 -#define ASIC3_GPIO_C_BASE 0x0200 -#define ASIC3_GPIO_D_BASE 0x0300 - -#define ASIC3_GPIO_TO_BANK(gpio) ((gpio) >> 4) -#define ASIC3_GPIO_TO_BIT(gpio) ((gpio) - \ - (ASIC3_GPIOS_PER_BANK * ((gpio) >> 4))) -#define ASIC3_GPIO_TO_MASK(gpio) (1 << ASIC3_GPIO_TO_BIT(gpio)) -#define ASIC3_GPIO_TO_BASE(gpio) (ASIC3_GPIO_A_BASE + (((gpio) >> 4) * 0x0100)) -#define ASIC3_BANK_TO_BASE(bank) (ASIC3_GPIO_A_BASE + ((bank) * 0x100)) - -#define ASIC3_GPIO_MASK 0x00 /* R/W 0:don't mask */ -#define ASIC3_GPIO_DIRECTION 0x04 /* R/W 0:input */ -#define ASIC3_GPIO_OUT 0x08 /* R/W 0:output low */ -#define ASIC3_GPIO_TRIGGER_TYPE 0x0c /* R/W 0:level */ -#define ASIC3_GPIO_EDGE_TRIGGER 0x10 /* R/W 0:falling */ -#define ASIC3_GPIO_LEVEL_TRIGGER 0x14 /* R/W 0:low level detect */ -#define ASIC3_GPIO_SLEEP_MASK 0x18 /* R/W 0:don't mask in sleep mode */ -#define ASIC3_GPIO_SLEEP_OUT 0x1c /* R/W level 0:low in sleep mode */ -#define ASIC3_GPIO_BAT_FAULT_OUT 0x20 /* R/W level 0:low in batt_fault */ -#define ASIC3_GPIO_INT_STATUS 0x24 /* R/W 0:none, 1:detect */ -#define ASIC3_GPIO_ALT_FUNCTION 0x28 /* R/W 1:LED register control */ -#define ASIC3_GPIO_SLEEP_CONF 0x2c /* - * R/W bit 1: autosleep - * 0: disable gposlpout in normal mode, - * enable gposlpout in sleep mode. - */ -#define ASIC3_GPIO_STATUS 0x30 /* R Pin status */ - -/* - * ASIC3 GPIO config - * - * Bits 0..6 gpio number - * Bits 7..13 Alternate function - * Bit 14 Direction - * Bit 15 Initial value - * - */ -#define ASIC3_CONFIG_GPIO_PIN(config) ((config) & 0x7f) -#define ASIC3_CONFIG_GPIO_ALT(config) (((config) & (0x7f << 7)) >> 7) -#define ASIC3_CONFIG_GPIO_DIR(config) ((config & (1 << 14)) >> 14) -#define ASIC3_CONFIG_GPIO_INIT(config) ((config & (1 << 15)) >> 15) -#define ASIC3_CONFIG_GPIO(gpio, alt, dir, init) (((gpio) & 0x7f) \ - | (((alt) & 0x7f) << 7) | (((dir) & 0x1) << 14) \ - | (((init) & 0x1) << 15)) -#define ASIC3_CONFIG_GPIO_DEFAULT(gpio, dir, init) \ - ASIC3_CONFIG_GPIO((gpio), 0, (dir), (init)) -#define ASIC3_CONFIG_GPIO_DEFAULT_OUT(gpio, init) \ - ASIC3_CONFIG_GPIO((gpio), 0, 1, (init)) - -/* - * Alternate functions - */ -#define ASIC3_GPIOA11_PWM0 ASIC3_CONFIG_GPIO(11, 1, 1, 0) -#define ASIC3_GPIOA12_PWM1 ASIC3_CONFIG_GPIO(12, 1, 1, 0) -#define ASIC3_GPIOA15_CONTROL_CX ASIC3_CONFIG_GPIO(15, 1, 1, 0) -#define ASIC3_GPIOC0_LED0 ASIC3_CONFIG_GPIO(32, 1, 0, 0) -#define ASIC3_GPIOC1_LED1 ASIC3_CONFIG_GPIO(33, 1, 0, 0) -#define ASIC3_GPIOC2_LED2 ASIC3_CONFIG_GPIO(34, 1, 0, 0) -#define ASIC3_GPIOC3_SPI_RXD ASIC3_CONFIG_GPIO(35, 1, 0, 0) -#define ASIC3_GPIOC4_CF_nCD ASIC3_CONFIG_GPIO(36, 1, 0, 0) -#define ASIC3_GPIOC4_SPI_TXD ASIC3_CONFIG_GPIO(36, 1, 1, 0) -#define ASIC3_GPIOC5_SPI_CLK ASIC3_CONFIG_GPIO(37, 1, 1, 0) -#define ASIC3_GPIOC5_nCIOW ASIC3_CONFIG_GPIO(37, 1, 1, 0) -#define ASIC3_GPIOC6_nCIOR ASIC3_CONFIG_GPIO(38, 1, 1, 0) -#define ASIC3_GPIOC7_nPCE_1 ASIC3_CONFIG_GPIO(39, 1, 0, 0) -#define ASIC3_GPIOC8_nPCE_2 ASIC3_CONFIG_GPIO(40, 1, 0, 0) -#define ASIC3_GPIOC9_nPOE ASIC3_CONFIG_GPIO(41, 1, 0, 0) -#define ASIC3_GPIOC10_nPWE ASIC3_CONFIG_GPIO(42, 1, 0, 0) -#define ASIC3_GPIOC11_PSKTSEL ASIC3_CONFIG_GPIO(43, 1, 0, 0) -#define ASIC3_GPIOC12_nPREG ASIC3_CONFIG_GPIO(44, 1, 0, 0) -#define ASIC3_GPIOC13_nPWAIT ASIC3_CONFIG_GPIO(45, 1, 1, 0) -#define ASIC3_GPIOC14_nPIOIS16 ASIC3_CONFIG_GPIO(46, 1, 1, 0) -#define ASIC3_GPIOC15_nPIOR ASIC3_CONFIG_GPIO(47, 1, 0, 0) -#define ASIC3_GPIOD4_CF_nCD ASIC3_CONFIG_GPIO(52, 1, 0, 0) -#define ASIC3_GPIOD11_nCIOIS16 ASIC3_CONFIG_GPIO(59, 1, 0, 0) -#define ASIC3_GPIOD12_nCWAIT ASIC3_CONFIG_GPIO(60, 1, 0, 0) -#define ASIC3_GPIOD15_nPIOW ASIC3_CONFIG_GPIO(63, 1, 0, 0) - - -#define ASIC3_SPI_Base 0x0400 -#define ASIC3_SPI_Control 0x0000 -#define ASIC3_SPI_TxData 0x0004 -#define ASIC3_SPI_RxData 0x0008 -#define ASIC3_SPI_Int 0x000c -#define ASIC3_SPI_Status 0x0010 - -#define SPI_CONTROL_SPR(clk) ((clk) & 0x0f) /* Clock rate */ - -#define ASIC3_PWM_0_Base 0x0500 -#define ASIC3_PWM_1_Base 0x0600 -#define ASIC3_PWM_TimeBase 0x0000 -#define ASIC3_PWM_PeriodTime 0x0004 -#define ASIC3_PWM_DutyTime 0x0008 - -#define PWM_TIMEBASE_VALUE(x) ((x)&0xf) /* Low 4 bits sets time base */ -#define PWM_TIMEBASE_ENABLE (1 << 4) /* Enable clock */ - -#define ASIC3_NUM_LEDS 3 -#define ASIC3_LED_0_Base 0x0700 -#define ASIC3_LED_1_Base 0x0800 -#define ASIC3_LED_2_Base 0x0900 -#define ASIC3_LED_TimeBase 0x0000 /* R/W 7 bits */ -#define ASIC3_LED_PeriodTime 0x0004 /* R/W 12 bits */ -#define ASIC3_LED_DutyTime 0x0008 /* R/W 12 bits */ -#define ASIC3_LED_AutoStopCount 0x000c /* R/W 16 bits */ - -/* LED TimeBase bits - match ASIC2 */ -#define LED_TBS 0x0f /* Low 4 bits sets time base, max = 13 */ - /* Note: max = 5 on hx4700 */ - /* 0: maximum time base */ - /* 1: maximum time base / 2 */ - /* n: maximum time base / 2^n */ - -#define LED_EN (1 << 4) /* LED ON/OFF 0:off, 1:on */ -#define LED_AUTOSTOP (1 << 5) /* LED ON/OFF auto stop 0:disable, 1:enable */ -#define LED_ALWAYS (1 << 6) /* LED Interrupt Mask 0:No mask, 1:mask */ - -#define ASIC3_CLOCK_BASE 0x0A00 -#define ASIC3_CLOCK_CDEX 0x00 -#define ASIC3_CLOCK_SEL 0x04 - -#define CLOCK_CDEX_SOURCE (1 << 0) /* 2 bits */ -#define CLOCK_CDEX_SOURCE0 (1 << 0) -#define CLOCK_CDEX_SOURCE1 (1 << 1) -#define CLOCK_CDEX_SPI (1 << 2) -#define CLOCK_CDEX_OWM (1 << 3) -#define CLOCK_CDEX_PWM0 (1 << 4) -#define CLOCK_CDEX_PWM1 (1 << 5) -#define CLOCK_CDEX_LED0 (1 << 6) -#define CLOCK_CDEX_LED1 (1 << 7) -#define CLOCK_CDEX_LED2 (1 << 8) - -/* Clocks settings: 1 for 24.576 MHz, 0 for 12.288Mhz */ -#define CLOCK_CDEX_SD_HOST (1 << 9) /* R/W: SD host clock source */ -#define CLOCK_CDEX_SD_BUS (1 << 10) /* R/W: SD bus clock source ctrl */ -#define CLOCK_CDEX_SMBUS (1 << 11) -#define CLOCK_CDEX_CONTROL_CX (1 << 12) - -#define CLOCK_CDEX_EX0 (1 << 13) /* R/W: 32.768 kHz crystal */ -#define CLOCK_CDEX_EX1 (1 << 14) /* R/W: 24.576 MHz crystal */ - -#define CLOCK_SEL_SD_HCLK_SEL (1 << 0) /* R/W: SDIO host clock select */ -#define CLOCK_SEL_SD_BCLK_SEL (1 << 1) /* R/W: SDIO bus clock select */ - -/* R/W: INT clock source control (32.768 kHz) */ -#define CLOCK_SEL_CX (1 << 2) - - -#define ASIC3_INTR_BASE 0x0B00 - -#define ASIC3_INTR_INT_MASK 0x00 /* Interrupt mask control */ -#define ASIC3_INTR_P_INT_STAT 0x04 /* Peripheral interrupt status */ -#define ASIC3_INTR_INT_CPS 0x08 /* Interrupt timer clock pre-scale */ -#define ASIC3_INTR_INT_TBS 0x0c /* Interrupt timer set */ - -#define ASIC3_INTMASK_GINTMASK (1 << 0) /* Global INTs mask 1:enable */ -#define ASIC3_INTMASK_GINTEL (1 << 1) /* 1: rising edge, 0: hi level */ -#define ASIC3_INTMASK_MASK0 (1 << 2) -#define ASIC3_INTMASK_MASK1 (1 << 3) -#define ASIC3_INTMASK_MASK2 (1 << 4) -#define ASIC3_INTMASK_MASK3 (1 << 5) -#define ASIC3_INTMASK_MASK4 (1 << 6) -#define ASIC3_INTMASK_MASK5 (1 << 7) - -#define ASIC3_INTR_PERIPHERAL_A (1 << 0) -#define ASIC3_INTR_PERIPHERAL_B (1 << 1) -#define ASIC3_INTR_PERIPHERAL_C (1 << 2) -#define ASIC3_INTR_PERIPHERAL_D (1 << 3) -#define ASIC3_INTR_LED0 (1 << 4) -#define ASIC3_INTR_LED1 (1 << 5) -#define ASIC3_INTR_LED2 (1 << 6) -#define ASIC3_INTR_SPI (1 << 7) -#define ASIC3_INTR_SMBUS (1 << 8) -#define ASIC3_INTR_OWM (1 << 9) - -#define ASIC3_INTR_CPS(x) ((x)&0x0f) /* 4 bits, max 14 */ -#define ASIC3_INTR_CPS_SET (1 << 4) /* Time base enable */ - - -/* Basic control of the SD ASIC */ -#define ASIC3_SDHWCTRL_BASE 0x0E00 -#define ASIC3_SDHWCTRL_SDCONF 0x00 - -#define ASIC3_SDHWCTRL_SUSPEND (1 << 0) /* 1=suspend all SD operations */ -#define ASIC3_SDHWCTRL_CLKSEL (1 << 1) /* 1=SDICK, 0=HCLK */ -#define ASIC3_SDHWCTRL_PCLR (1 << 2) /* All registers of SDIO cleared */ -#define ASIC3_SDHWCTRL_LEVCD (1 << 3) /* SD card detection: 0:low */ - -/* SD card write protection: 0=high */ -#define ASIC3_SDHWCTRL_LEVWP (1 << 4) -#define ASIC3_SDHWCTRL_SDLED (1 << 5) /* SD card LED signal 0=disable */ - -/* SD card power supply ctrl 1=enable */ -#define ASIC3_SDHWCTRL_SDPWR (1 << 6) - -#define ASIC3_EXTCF_BASE 0x1100 - -#define ASIC3_EXTCF_SELECT 0x00 -#define ASIC3_EXTCF_RESET 0x04 - -#define ASIC3_EXTCF_SMOD0 (1 << 0) /* slot number of mode 0 */ -#define ASIC3_EXTCF_SMOD1 (1 << 1) /* slot number of mode 1 */ -#define ASIC3_EXTCF_SMOD2 (1 << 2) /* slot number of mode 2 */ -#define ASIC3_EXTCF_OWM_EN (1 << 4) /* enable onewire module */ -#define ASIC3_EXTCF_OWM_SMB (1 << 5) /* OWM bus selection */ -#define ASIC3_EXTCF_OWM_RESET (1 << 6) /* ?? used by OWM and CF */ -#define ASIC3_EXTCF_CF0_SLEEP_MODE (1 << 7) /* CF0 sleep state */ -#define ASIC3_EXTCF_CF1_SLEEP_MODE (1 << 8) /* CF1 sleep state */ -#define ASIC3_EXTCF_CF0_PWAIT_EN (1 << 10) /* CF0 PWAIT_n control */ -#define ASIC3_EXTCF_CF1_PWAIT_EN (1 << 11) /* CF1 PWAIT_n control */ -#define ASIC3_EXTCF_CF0_BUF_EN (1 << 12) /* CF0 buffer control */ -#define ASIC3_EXTCF_CF1_BUF_EN (1 << 13) /* CF1 buffer control */ -#define ASIC3_EXTCF_SD_MEM_ENABLE (1 << 14) -#define ASIC3_EXTCF_CF_SLEEP (1 << 15) /* CF sleep mode control */ - -/********************************************* - * The Onewire interface (DS1WM) is handled - * by the ds1wm driver. - * - *********************************************/ - -#define ASIC3_OWM_BASE 0xC00 - -/***************************************************************************** - * The SD configuration registers are at a completely different location - * in memory. They are divided into three sets of registers: - * - * SD_CONFIG Core configuration register - * SD_CTRL Control registers for SD operations - * SDIO_CTRL Control registers for SDIO operations - * - *****************************************************************************/ -#define ASIC3_SD_CONFIG_BASE 0x0400 /* Assumes 32 bit addressing */ -#define ASIC3_SD_CONFIG_SIZE 0x0200 /* Assumes 32 bit addressing */ -#define ASIC3_SD_CTRL_BASE 0x1000 -#define ASIC3_SDIO_CTRL_BASE 0x1200 - -#define ASIC3_MAP_SIZE_32BIT 0x2000 -#define ASIC3_MAP_SIZE_16BIT 0x1000 - -/* Functions needed by leds-asic3 */ - -struct asic3; -extern void asic3_write_register(struct asic3 *asic, unsigned int reg, u32 val); -extern u32 asic3_read_register(struct asic3 *asic, unsigned int reg); - -#endif /* __ASIC3_H__ */ diff --git a/include/linux/mfd/t7l66xb.h b/include/linux/mfd/t7l66xb.h deleted file mode 100644 index ae3e7a5c5219..000000000000 --- a/include/linux/mfd/t7l66xb.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * This file contains the definitions for the T7L66XB - * - * (C) Copyright 2005 Ian Molton - */ -#ifndef MFD_T7L66XB_H -#define MFD_T7L66XB_H - -#include -#include - -struct t7l66xb_platform_data { - int (*enable)(struct platform_device *dev); - int (*suspend)(struct platform_device *dev); - int (*resume)(struct platform_device *dev); - - int irq_base; /* The base for subdevice irqs */ - - struct tmio_nand_data *nand_data; -}; - - -#define IRQ_T7L66XB_MMC (1) -#define IRQ_T7L66XB_NAND (3) - -#define T7L66XB_NR_IRQS 8 - -#endif diff --git a/include/linux/mfd/tc6387xb.h b/include/linux/mfd/tc6387xb.h deleted file mode 100644 index aacf1dcc86b9..000000000000 --- a/include/linux/mfd/tc6387xb.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * This file contains the definitions for the TC6387XB - * - * (C) Copyright 2005 Ian Molton - * - * May be copied or modified under the terms of the GNU General Public - * License. See linux/COPYING for more information. - * - */ -#ifndef MFD_TC6387XB_H -#define MFD_TC6387XB_H - -struct tc6387xb_platform_data { - int (*enable)(struct platform_device *dev); - int (*suspend)(struct platform_device *dev); - int (*resume)(struct platform_device *dev); -}; - -#endif diff --git a/include/linux/mfd/tc6393xb.h b/include/linux/mfd/tc6393xb.h deleted file mode 100644 index d17807f2d0c9..000000000000 --- a/include/linux/mfd/tc6393xb.h +++ /dev/null @@ -1,53 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Toshiba TC6393XB SoC support - * - * Copyright(c) 2005-2006 Chris Humbert - * Copyright(c) 2005 Dirk Opfer - * Copyright(c) 2005 Ian Molton - * Copyright(c) 2007 Dmitry Baryshkov - * - * Based on code written by Sharp/Lineo for 2.4 kernels - * Based on locomo.c - */ - -#ifndef MFD_TC6393XB_H -#define MFD_TC6393XB_H - -#include - -/* Also one should provide the CK3P6MI clock */ -struct tc6393xb_platform_data { - u16 scr_pll2cr; /* PLL2 Control */ - u16 scr_gper; /* GP Enable */ - - int (*enable)(struct platform_device *dev); - void (*disable)(struct platform_device *dev); - int (*suspend)(struct platform_device *dev); - int (*resume)(struct platform_device *dev); - - int irq_base; /* base for subdevice irqs */ - - struct tmio_nand_data *nand_data; - struct tmio_fb_data *fb_data; - - unsigned resume_restore : 1; /* make special actions - to preserve the state - on suspend/resume */ -}; - -extern int tc6393xb_lcd_mode(struct platform_device *fb, - const struct fb_videomode *mode); -extern int tc6393xb_lcd_set_power(struct platform_device *fb, bool on); - -/* - * Relative to irq_base - */ -#define IRQ_TC6393_NAND 0 -#define IRQ_TC6393_MMC 1 -#define IRQ_TC6393_OHCI 2 -#define IRQ_TC6393_FB 4 - -#define TC6393XB_NR_IRQS 8 - -#endif diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index e8bf90281ba0..eace8ea6cda0 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -84,11 +84,6 @@ /* Some controllers have a CBSY bit */ #define TMIO_MMC_HAVE_CBSY BIT(11) -int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base); -int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base); -void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state); -void tmio_core_mmc_clk_div(void __iomem *cnf, int shift, int state); - struct dma_chan; /* -- cgit v1.2.3 From 8a15b4daed3d45c9162f23d393a06df2a7be4778 Mon Sep 17 00:00:00 2001 From: Andreas Kemnade Date: Fri, 27 Jan 2023 17:58:28 +0100 Subject: mfd: ntxec: Add version number for EC in Tolino Vision The EC firmware has a different version number than anything defined until now. Signed-off-by: Andreas Kemnade Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230127165828.3256170-1-andreas@kemnade.info --- include/linux/mfd/ntxec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/ntxec.h b/include/linux/mfd/ntxec.h index cc6f07bfa2b3..e5880c346da9 100644 --- a/include/linux/mfd/ntxec.h +++ b/include/linux/mfd/ntxec.h @@ -34,5 +34,5 @@ static inline u16 ntxec_reg8(u8 value) /* Known firmware versions */ #define NTXEC_VERSION_KOBO_AURA 0xd726 /* found in Kobo Aura */ #define NTXEC_VERSION_TOLINO_SHINE2 0xf110 /* found in Tolino Shine 2 HD */ - +#define NTXEC_VERSION_TOLINO_VISION 0xe135 /* found in Tolino Vision, contains RTC, ADC, PWM, home pad */ #endif -- cgit v1.2.3 From c1855dd0a62b7ead360eb9231fb65c2108efaf47 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 22 Feb 2023 06:31:10 -0800 Subject: clk: qcom: Revert sync_state based clk_disable_unused Revert the postponement of clk_disable_unused() for clock providers that implement sync_state, and the change to drivers implementing this, until agreement on the implementation has been reached. This reverts: 29e31415e14e ("clk: qcom: Remove need for clk_ignore_unused on sc8280xp") 99c0f7d35c4b ("clk: qcom: sdm845: Use generic clk_sync_state_disable_unused callback") 26b36df75166 ("clk: Add generic sync_state callback for disabling unused clocks") Requested-by: Stephen Boyd Signed-off-by: Bjorn Andersson --- include/linux/clk-provider.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index cf1adfeaf257..842e72a5348f 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -720,7 +720,6 @@ struct clk *clk_register_divider_table(struct device *dev, const char *name, void __iomem *reg, u8 shift, u8 width, u8 clk_divider_flags, const struct clk_div_table *table, spinlock_t *lock); -void clk_sync_state_disable_unused(struct device *dev); /** * clk_register_divider - register a divider clock with the clock framework * @dev: device registering this clock -- cgit v1.2.3 From 8d664282a03fec09682f10252d3c785c2513691d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 17 Feb 2023 08:27:23 -0700 Subject: io_uring: rename 'in_idle' to 'in_cancel' This better describes what it does - it's incremented when the task is currently undergoing a cancelation operation, due to exiting or exec'ing. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 0efe4d784358..00689c12f6ab 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -58,7 +58,7 @@ struct io_uring_task { struct xarray xa; struct wait_queue_head wait; - atomic_t in_idle; + atomic_t in_cancel; atomic_t inflight_tracked; struct percpu_counter inflight; -- cgit v1.2.3 From 16f8a08643b6d63d2e8252ddaa9e17e2408a2531 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 13 Jan 2023 20:14:05 +0300 Subject: dmaengine: dw-edma: Add mem-mapped LL-entries support Currently the DW eDMA driver only supports the linked lists memory allocated locally with respect to the remote eDMA engine setup. It means the linked lists will be accessible by the CPU via the MMIO space only. If eDMA is embedded into the DW PCIe Root Ports or local Endpoints (which support will be added in subsequent commits) the linked lists are supposed to be allocated in the CPU memory. In that case the LL-entries can be directly accessed, while the former case implies using the MMIO accessors for that. In order to have both cases supported by the driver, the dw_edma_region descriptor should be fixed to contain the MMIO-backed and just memory-based virtual addresses. The linked lists initialization procedure will use one of them depending on the eDMA device nature. If the eDMA engine is embedded into the local DW PCIe Root Port/Endpoint controllers, the list entries will be directly accessed by referencing the corresponding structure fields. Otherwise the MMIO accessors usage will be preserved. Link: https://lore.kernel.org/r/20230113171409.30470-24-Sergey.Semin@baikalelectronics.ru Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Acked-by: Vinod Koul --- include/linux/dma/edma.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma/edma.h b/include/linux/dma/edma.h index 96bfd0173f3a..3b80040b95cc 100644 --- a/include/linux/dma/edma.h +++ b/include/linux/dma/edma.h @@ -19,7 +19,10 @@ struct dw_edma; struct dw_edma_region { u64 paddr; - void __iomem *vaddr; + union { + void *mem; + void __iomem *io; + } vaddr; size_t sz; }; -- cgit v1.2.3 From 3bc0f149405e07c7e59985a24ce96db83973f8d7 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 13 Jan 2023 20:14:06 +0300 Subject: dmaengine: dw-edma: Prepare dw_edma_probe() for builtin callers When CONFIG_DW_EDMA=m, dw_edma_probe() is built as a module. Previously edma.h declared it as extern, but the implementation isn't available for builtin callers. A subsequent commit will add calls from dw_pcie_host_init() and dw_pcie_ep_init(), which can only be built-in. Make it safe for such builtin callers to call dw_edma_probe() by using IS_REACHABLE() to define a stub when CONFIG_DW_EDMA=m. When CONFIG_DW_EDMA=m, these builtin callers will fail to detect and register eDMA devices, so eDMA won't be usable even if the dw-edma module is loaded. [bhelgaas: split to separate patch, commit log] Link: https://lore.kernel.org/r/20230113171409.30470-25-Sergey.Semin@baikalelectronics.ru Signed-off-by: Serge Semin Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Acked-by: Vinod Koul --- include/linux/dma/edma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma/edma.h b/include/linux/dma/edma.h index 3b80040b95cc..d2638d9259dc 100644 --- a/include/linux/dma/edma.h +++ b/include/linux/dma/edma.h @@ -101,7 +101,7 @@ struct dw_edma_chip { }; /* Export to the platform drivers */ -#if IS_ENABLED(CONFIG_DW_EDMA) +#if IS_REACHABLE(CONFIG_DW_EDMA) int dw_edma_probe(struct dw_edma_chip *chip); int dw_edma_remove(struct dw_edma_chip *chip); #else -- cgit v1.2.3 From b6478b8c93304fa0483e4657779b44634a1711c7 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 22 Feb 2023 06:50:41 +0100 Subject: net: phy: c45: add genphy_c45_an_config_eee_aneg() function Add new genphy_c45_an_config_eee_aneg() function and replace some of genphy_c45_write_eee_adv() calls. This will be needed by the next patch. Signed-off-by: Oleksij Rempel Reviewed-by: Russell King (Oracle) Signed-off-by: Paolo Abeni --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 727bff531a14..19d83e112beb 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1765,6 +1765,7 @@ int genphy_c45_ethtool_get_eee(struct phy_device *phydev, int genphy_c45_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data); int genphy_c45_write_eee_adv(struct phy_device *phydev, unsigned long *adv); +int genphy_c45_an_config_eee_aneg(struct phy_device *phydev); /* Generic C45 PHY driver */ extern struct phy_driver genphy_c45_driver; -- cgit v1.2.3 From 3eeca4e199cee2066c65b872391cecee5cbbbb81 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 22 Feb 2023 06:50:42 +0100 Subject: net: phy: do not force EEE support With following patches: commit 9b01c885be36 ("net: phy: c22: migrate to genphy_c45_write_eee_adv()") commit 5827b168125d ("net: phy: c45: migrate to genphy_c45_write_eee_adv()") we set the advertisement to potentially supported values. This behavior may introduce new regressions on systems where EEE was disabled by default (BIOS or boot loader configuration or by other ways.) At same time, with this patches, we would overwrite EEE advertisement configuration made over ethtool. To avoid this issues, we need to cache initial and ethtool advertisement configuration and store it for later use. Fixes: 9b01c885be36 ("net: phy: c22: migrate to genphy_c45_write_eee_adv()") Fixes: 5827b168125d ("net: phy: c45: migrate to genphy_c45_write_eee_adv()") Fixes: 022c3f87f88e ("net: phy: add genphy_c45_ethtool_get/set_eee() support") Signed-off-by: Oleksij Rempel Reviewed-by: Russell King (Oracle) Signed-off-by: Paolo Abeni --- include/linux/phy.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 19d83e112beb..36bf0bbc8efa 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -575,6 +575,8 @@ struct macsec_ops; * @advertising: Currently advertised linkmodes * @adv_old: Saved advertised while power saving for WoL * @supported_eee: supported PHY EEE linkmodes + * @advertising_eee: Currently advertised EEE linkmodes + * @eee_enabled: Flag indicating whether the EEE feature is enabled * @lp_advertising: Current link partner advertised linkmodes * @host_interfaces: PHY interface modes supported by host * @eee_broken_modes: Energy efficient ethernet modes which should be prohibited @@ -681,6 +683,8 @@ struct phy_device { __ETHTOOL_DECLARE_LINK_MODE_MASK(adv_old); /* used for eee validation */ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported_eee); + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising_eee); + bool eee_enabled; /* Host supported PHY interface types. Should be ignored if empty. */ DECLARE_PHY_INTERFACE_MASK(host_interfaces); @@ -1766,6 +1770,7 @@ int genphy_c45_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data); int genphy_c45_write_eee_adv(struct phy_device *phydev, unsigned long *adv); int genphy_c45_an_config_eee_aneg(struct phy_device *phydev); +int genphy_c45_read_eee_adv(struct phy_device *phydev, unsigned long *adv); /* Generic C45 PHY driver */ extern struct phy_driver genphy_c45_driver; -- cgit v1.2.3 From 0322ef49c1ac6f0e2ef37b146c0bf8440873072c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 24 Feb 2023 17:52:33 +0200 Subject: net: dsa: seville: ignore mscc-miim read errors from Lynx PCS During the refactoring in the commit below, vsc9953_mdio_read() was replaced with mscc_miim_read(), which has one extra step: it checks for the MSCC_MIIM_DATA_ERROR bits before returning the result. On T1040RDB, there are 8 QSGMII PCSes belonging to the switch, and they are organized in 2 groups. First group responds to MDIO addresses 4-7 because QSGMIIACR1[MDEV_PORT] is 1, and the second group responds to MDIO addresses 8-11 because QSGMIIBCR1[MDEV_PORT] is 2. I have double checked that these values are correctly set in the SERDES, as well as PCCR1[QSGMA_CFG] and PCCR1[QSGMB_CFG] are both 0b01. mscc_miim_read: phyad 8 reg 0x1 MIIM_DATA 0x2d mscc_miim_read: phyad 8 reg 0x5 MIIM_DATA 0x5801 mscc_miim_read: phyad 8 reg 0x1 MIIM_DATA 0x2d mscc_miim_read: phyad 8 reg 0x5 MIIM_DATA 0x5801 mscc_miim_read: phyad 9 reg 0x1 MIIM_DATA 0x2d mscc_miim_read: phyad 9 reg 0x5 MIIM_DATA 0x5801 mscc_miim_read: phyad 9 reg 0x1 MIIM_DATA 0x2d mscc_miim_read: phyad 9 reg 0x5 MIIM_DATA 0x5801 mscc_miim_read: phyad 10 reg 0x1 MIIM_DATA 0x2d mscc_miim_read: phyad 10 reg 0x5 MIIM_DATA 0x5801 mscc_miim_read: phyad 10 reg 0x1 MIIM_DATA 0x2d mscc_miim_read: phyad 10 reg 0x5 MIIM_DATA 0x5801 mscc_miim_read: phyad 11 reg 0x1 MIIM_DATA 0x2d mscc_miim_read: phyad 11 reg 0x5 MIIM_DATA 0x5801 mscc_miim_read: phyad 11 reg 0x1 MIIM_DATA 0x2d mscc_miim_read: phyad 11 reg 0x5 MIIM_DATA 0x5801 mscc_miim_read: phyad 4 reg 0x1 MIIM_DATA 0x3002d, ERROR mscc_miim_read: phyad 4 reg 0x5 MIIM_DATA 0x3da01, ERROR mscc_miim_read: phyad 5 reg 0x1 MIIM_DATA 0x3002d, ERROR mscc_miim_read: phyad 5 reg 0x5 MIIM_DATA 0x35801, ERROR mscc_miim_read: phyad 5 reg 0x1 MIIM_DATA 0x3002d, ERROR mscc_miim_read: phyad 5 reg 0x5 MIIM_DATA 0x35801, ERROR mscc_miim_read: phyad 6 reg 0x1 MIIM_DATA 0x3002d, ERROR mscc_miim_read: phyad 6 reg 0x5 MIIM_DATA 0x35801, ERROR mscc_miim_read: phyad 6 reg 0x1 MIIM_DATA 0x3002d, ERROR mscc_miim_read: phyad 6 reg 0x5 MIIM_DATA 0x35801, ERROR mscc_miim_read: phyad 7 reg 0x1 MIIM_DATA 0x3002d, ERROR mscc_miim_read: phyad 7 reg 0x5 MIIM_DATA 0x35801, ERROR mscc_miim_read: phyad 7 reg 0x1 MIIM_DATA 0x3002d, ERROR mscc_miim_read: phyad 7 reg 0x5 MIIM_DATA 0x35801, ERROR As can be seen, the data in MIIM_DATA is still valid despite having the MSCC_MIIM_DATA_ERROR bits set. The driver as introduced in commit 84705fc16552 ("net: dsa: felix: introduce support for Seville VSC9953 switch") was ignoring these bits, perhaps deliberately (although unbeknownst to me). This is an old IP and the hardware team cannot seem to be able to help me track down a plausible reason for these failures. I'll keep investigating, but in the meantime, this is a direct regression which must be restored to a working state. The only thing I can do is keep ignoring the errors as before. Fixes: b99658452355 ("net: dsa: ocelot: felix: utilize shared mscc-miim driver for indirect MDIO access") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/mdio/mdio-mscc-miim.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mdio/mdio-mscc-miim.h b/include/linux/mdio/mdio-mscc-miim.h index 5b4ed2c3cbb9..1ce699740af6 100644 --- a/include/linux/mdio/mdio-mscc-miim.h +++ b/include/linux/mdio/mdio-mscc-miim.h @@ -14,6 +14,6 @@ int mscc_miim_setup(struct device *device, struct mii_bus **bus, const char *name, struct regmap *mii_regmap, - int status_offset); + int status_offset, bool ignore_read_errors); #endif -- cgit v1.2.3 From 250870824c1cf199b032b1ef889c8e8d69d9123a Mon Sep 17 00:00:00 2001 From: Michael Karcher Date: Tue, 24 Jan 2023 22:48:16 +0100 Subject: sh: intc: Avoid spurious sizeof-pointer-div warning GCC warns about the pattern sizeof(void*)/sizeof(void), as it looks like the abuse of a pattern to calculate the array size. This pattern appears in the unevaluated part of the ternary operator in _INTC_ARRAY if the parameter is NULL. The replacement uses an alternate approach to return 0 in case of NULL which does not generate the pattern sizeof(void*)/sizeof(void), but still emits the warning if _INTC_ARRAY is called with a nonarray parameter. This patch is required for successful compilation with -Werror enabled. The idea to use _Generic for type distinction is taken from Comment #7 in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108483 by Jakub Jelinek Signed-off-by: Michael Karcher Acked-by: Randy Dunlap # build-tested Link: https://lore.kernel.org/r/619fa552-c988-35e5-b1d7-fe256c46a272@mkarcher.dialup.fu-berlin.de Signed-off-by: John Paul Adrian Glaubitz --- include/linux/sh_intc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h index c255273b0281..37ad81058d6a 100644 --- a/include/linux/sh_intc.h +++ b/include/linux/sh_intc.h @@ -97,7 +97,10 @@ struct intc_hw_desc { unsigned int nr_subgroups; }; -#define _INTC_ARRAY(a) a, __same_type(a, NULL) ? 0 : sizeof(a)/sizeof(*a) +#define _INTC_SIZEOF_OR_ZERO(a) (_Generic(a, \ + typeof(NULL): 0, \ + default: sizeof(a))) +#define _INTC_ARRAY(a) a, _INTC_SIZEOF_OR_ZERO(a)/sizeof(*a) #define INTC_HW_DESC(vectors, groups, mask_regs, \ prio_regs, sense_regs, ack_regs) \ -- cgit v1.2.3 From a4eecbae092759537748360299de03e434c9a956 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Wed, 25 Jan 2023 16:55:56 +0100 Subject: capability: add cap_isidentical Signed-off-by: Mateusz Guzik Reviewed-by: Serge Hallyn Cc: Al Viro Signed-off-by: Linus Torvalds --- include/linux/capability.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index 03c2a613ad40..d3c6c2d1ff45 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -157,6 +157,16 @@ static inline bool cap_isclear(const kernel_cap_t a) return true; } +static inline bool cap_isidentical(const kernel_cap_t a, const kernel_cap_t b) +{ + unsigned __capi; + CAP_FOR_EACH_U32(__capi) { + if (a.cap[__capi] != b.cap[__capi]) + return false; + } + return true; +} + /* * Check if "a" is a subset of "set". * return true if ALL of the capabilities in "a" are also in "set" -- cgit v1.2.3 From 6da6b1d4a7df8c35770186b53ef65d388398e139 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Tue, 21 Feb 2023 17:59:05 +0900 Subject: mm/hwpoison: convert TTU_IGNORE_HWPOISON to TTU_HWPOISON After a memory error happens on a clean folio, a process unexpectedly receives SIGBUS when it accesses the error page. This SIGBUS killing is pointless and simply degrades the level of RAS of the system, because the clean folio can be dropped without any data lost on memory error handling as we do for a clean pagecache. When memory_failure() is called on a clean folio, try_to_unmap() is called twice (one from split_huge_page() and one from hwpoison_user_mappings()). The root cause of the issue is that pte conversion to hwpoisoned entry is now done in the first call of try_to_unmap() because PageHWPoison is already set at this point, while it's actually expected to be done in the second call. This behavior disturbs the error handling operation like removing pagecache, which results in the malfunction described above. So convert TTU_IGNORE_HWPOISON into TTU_HWPOISON and set TTU_HWPOISON only when we really intend to convert pte to hwpoison entry. This can prevent other callers of try_to_unmap() from accidentally converting to hwpoison entries. Link: https://lkml.kernel.org/r/20230221085905.1465385-1-naoya.horiguchi@linux.dev Fixes: a42634a6c07d ("readahead: Use a folio in read_pages()") Signed-off-by: Naoya Horiguchi Cc: David Hildenbrand Cc: Hugh Dickins Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Minchan Kim Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- include/linux/rmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index a4570da03e58..b87d01660412 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -94,7 +94,7 @@ enum ttu_flags { TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */ TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */ TTU_SYNC = 0x10, /* avoid racy checks with PVMW_SYNC */ - TTU_IGNORE_HWPOISON = 0x20, /* corrupted page is recoverable */ + TTU_HWPOISON = 0x20, /* do convert pte to hwpoison entry */ TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible * and caller guarantees they will * do a final flush if necessary */ -- cgit v1.2.3 From f122a08b197d076ccf136c73fae0146875812a88 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 28 Feb 2023 11:39:09 -0800 Subject: capability: just use a 'u64' instead of a 'u32[2]' array Back in 2008 we extended the capability bits from 32 to 64, and we did it by extending the single 32-bit capability word from one word to an array of two words. It was then obfuscated by hiding the "2" behind two macro expansions, with the reasoning being that maybe it gets extended further some day. That reasoning may have been valid at the time, but the last thing we want to do is to extend the capability set any more. And the array of values not only causes source code oddities (with loops to deal with it), but also results in worse code generation. It's a lose-lose situation. So just change the 'u32[2]' into a 'u64' and be done with it. We still have to deal with the fact that the user space interface is designed around an array of these 32-bit values, but that was the case before too, since the array layouts were different (ie user space doesn't use an array of 32-bit values for individual capability masks, but an array of 32-bit slices of multiple masks). So that marshalling of data is actually simplified too, even if it does remain somewhat obscure and odd. This was all triggered by my reaction to the new "cap_isidentical()" introduced recently. By just using a saner data structure, it went from unsigned __capi; CAP_FOR_EACH_U32(__capi) { if (a.cap[__capi] != b.cap[__capi]) return false; } return true; to just being return a.val == b.val; instead. Which is rather more obvious both to humans and to compilers. Cc: Mateusz Guzik Cc: Casey Schaufler Cc: Serge Hallyn Cc: Al Viro Cc: Paul Moore Signed-off-by: Linus Torvalds --- include/linux/capability.h | 131 ++++++++++----------------------------------- 1 file changed, 29 insertions(+), 102 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index d3c6c2d1ff45..0c356a517991 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -15,28 +15,25 @@ #include #include +#include #define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3 -#define _KERNEL_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3 extern int file_caps_enabled; -typedef struct kernel_cap_struct { - __u32 cap[_KERNEL_CAPABILITY_U32S]; -} kernel_cap_t; +typedef struct { u64 val; } kernel_cap_t; /* same as vfs_ns_cap_data but in cpu endian and always filled completely */ struct cpu_vfs_cap_data { __u32 magic_etc; + kuid_t rootid; kernel_cap_t permitted; kernel_cap_t inheritable; - kuid_t rootid; }; #define _USER_CAP_HEADER_SIZE (sizeof(struct __user_cap_header_struct)) #define _KERNEL_CAP_T_SIZE (sizeof(kernel_cap_t)) - struct file; struct inode; struct dentry; @@ -44,16 +41,6 @@ struct task_struct; struct user_namespace; struct mnt_idmap; -extern const kernel_cap_t __cap_empty_set; -extern const kernel_cap_t __cap_init_eff_set; - -/* - * Internal kernel functions only - */ - -#define CAP_FOR_EACH_U32(__capi) \ - for (__capi = 0; __capi < _KERNEL_CAPABILITY_U32S; ++__capi) - /* * CAP_FS_MASK and CAP_NFSD_MASKS: * @@ -67,104 +54,52 @@ extern const kernel_cap_t __cap_init_eff_set; * 2. The security.* and trusted.* xattrs are fs-related MAC permissions */ -# define CAP_FS_MASK_B0 (CAP_TO_MASK(CAP_CHOWN) \ - | CAP_TO_MASK(CAP_MKNOD) \ - | CAP_TO_MASK(CAP_DAC_OVERRIDE) \ - | CAP_TO_MASK(CAP_DAC_READ_SEARCH) \ - | CAP_TO_MASK(CAP_FOWNER) \ - | CAP_TO_MASK(CAP_FSETID)) - -# define CAP_FS_MASK_B1 (CAP_TO_MASK(CAP_MAC_OVERRIDE)) - -#if _KERNEL_CAPABILITY_U32S != 2 -# error Fix up hand-coded capability macro initializers -#else /* HAND-CODED capability initializers */ +# define CAP_FS_MASK (BIT_ULL(CAP_CHOWN) \ + | BIT_ULL(CAP_MKNOD) \ + | BIT_ULL(CAP_DAC_OVERRIDE) \ + | BIT_ULL(CAP_DAC_READ_SEARCH) \ + | BIT_ULL(CAP_FOWNER) \ + | BIT_ULL(CAP_FSETID) \ + | BIT_ULL(CAP_MAC_OVERRIDE)) +#define CAP_VALID_MASK (BIT_ULL(CAP_LAST_CAP+1)-1) -#define CAP_LAST_U32 ((_KERNEL_CAPABILITY_U32S) - 1) -#define CAP_LAST_U32_VALID_MASK (CAP_TO_MASK(CAP_LAST_CAP + 1) -1) +# define CAP_EMPTY_SET ((kernel_cap_t) { 0 }) +# define CAP_FULL_SET ((kernel_cap_t) { CAP_VALID_MASK }) +# define CAP_FS_SET ((kernel_cap_t) { CAP_FS_MASK | BIT_ULL(CAP_LINUX_IMMUTABLE) }) +# define CAP_NFSD_SET ((kernel_cap_t) { CAP_FS_MASK | BIT_ULL(CAP_SYS_RESOURCE) }) -# define CAP_EMPTY_SET ((kernel_cap_t){{ 0, 0 }}) -# define CAP_FULL_SET ((kernel_cap_t){{ ~0, CAP_LAST_U32_VALID_MASK }}) -# define CAP_FS_SET ((kernel_cap_t){{ CAP_FS_MASK_B0 \ - | CAP_TO_MASK(CAP_LINUX_IMMUTABLE), \ - CAP_FS_MASK_B1 } }) -# define CAP_NFSD_SET ((kernel_cap_t){{ CAP_FS_MASK_B0 \ - | CAP_TO_MASK(CAP_SYS_RESOURCE), \ - CAP_FS_MASK_B1 } }) +# define cap_clear(c) do { (c).val = 0; } while (0) -#endif /* _KERNEL_CAPABILITY_U32S != 2 */ - -# define cap_clear(c) do { (c) = __cap_empty_set; } while (0) - -#define cap_raise(c, flag) ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag)) -#define cap_lower(c, flag) ((c).cap[CAP_TO_INDEX(flag)] &= ~CAP_TO_MASK(flag)) -#define cap_raised(c, flag) ((c).cap[CAP_TO_INDEX(flag)] & CAP_TO_MASK(flag)) - -#define CAP_BOP_ALL(c, a, b, OP) \ -do { \ - unsigned __capi; \ - CAP_FOR_EACH_U32(__capi) { \ - c.cap[__capi] = a.cap[__capi] OP b.cap[__capi]; \ - } \ -} while (0) - -#define CAP_UOP_ALL(c, a, OP) \ -do { \ - unsigned __capi; \ - CAP_FOR_EACH_U32(__capi) { \ - c.cap[__capi] = OP a.cap[__capi]; \ - } \ -} while (0) +#define cap_raise(c, flag) ((c).val |= BIT_ULL(flag)) +#define cap_lower(c, flag) ((c).val &= ~BIT_ULL(flag)) +#define cap_raised(c, flag) (((c).val & BIT_ULL(flag)) != 0) static inline kernel_cap_t cap_combine(const kernel_cap_t a, const kernel_cap_t b) { - kernel_cap_t dest; - CAP_BOP_ALL(dest, a, b, |); - return dest; + return (kernel_cap_t) { a.val | b.val }; } static inline kernel_cap_t cap_intersect(const kernel_cap_t a, const kernel_cap_t b) { - kernel_cap_t dest; - CAP_BOP_ALL(dest, a, b, &); - return dest; + return (kernel_cap_t) { a.val & b.val }; } static inline kernel_cap_t cap_drop(const kernel_cap_t a, const kernel_cap_t drop) { - kernel_cap_t dest; - CAP_BOP_ALL(dest, a, drop, &~); - return dest; -} - -static inline kernel_cap_t cap_invert(const kernel_cap_t c) -{ - kernel_cap_t dest; - CAP_UOP_ALL(dest, c, ~); - return dest; + return (kernel_cap_t) { a.val &~ drop.val }; } static inline bool cap_isclear(const kernel_cap_t a) { - unsigned __capi; - CAP_FOR_EACH_U32(__capi) { - if (a.cap[__capi] != 0) - return false; - } - return true; + return !a.val; } static inline bool cap_isidentical(const kernel_cap_t a, const kernel_cap_t b) { - unsigned __capi; - CAP_FOR_EACH_U32(__capi) { - if (a.cap[__capi] != b.cap[__capi]) - return false; - } - return true; + return a.val == b.val; } /* @@ -176,39 +111,31 @@ static inline bool cap_isidentical(const kernel_cap_t a, const kernel_cap_t b) */ static inline bool cap_issubset(const kernel_cap_t a, const kernel_cap_t set) { - kernel_cap_t dest; - dest = cap_drop(a, set); - return cap_isclear(dest); + return !(a.val & ~set.val); } /* Used to decide between falling back on the old suser() or fsuser(). */ static inline kernel_cap_t cap_drop_fs_set(const kernel_cap_t a) { - const kernel_cap_t __cap_fs_set = CAP_FS_SET; - return cap_drop(a, __cap_fs_set); + return cap_drop(a, CAP_FS_SET); } static inline kernel_cap_t cap_raise_fs_set(const kernel_cap_t a, const kernel_cap_t permitted) { - const kernel_cap_t __cap_fs_set = CAP_FS_SET; - return cap_combine(a, - cap_intersect(permitted, __cap_fs_set)); + return cap_combine(a, cap_intersect(permitted, CAP_FS_SET)); } static inline kernel_cap_t cap_drop_nfsd_set(const kernel_cap_t a) { - const kernel_cap_t __cap_fs_set = CAP_NFSD_SET; - return cap_drop(a, __cap_fs_set); + return cap_drop(a, CAP_NFSD_SET); } static inline kernel_cap_t cap_raise_nfsd_set(const kernel_cap_t a, const kernel_cap_t permitted) { - const kernel_cap_t __cap_nfsd_set = CAP_NFSD_SET; - return cap_combine(a, - cap_intersect(permitted, __cap_nfsd_set)); + return cap_combine(a, cap_intersect(permitted, CAP_NFSD_SET)); } #ifdef CONFIG_MULTIUSER -- cgit v1.2.3 From 0fb7fb713461e44b12e72c292bf90ee300f40710 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 1 Mar 2023 22:07:48 +0100 Subject: genirq/msi, platform-msi: Ensure that MSI descriptors are unreferenced Miquel reported a warning in the MSI core which is triggered when interrupts are freed via platform_msi_device_domain_free(). This code got reworked to use core functions for freeing the MSI descriptors, but nothing took care to clear the msi_desc->irq entry, which then triggers the warning in msi_free_msi_desc() which uses desc->irq to validate that the descriptor has been torn down. The same issue exists in msi_domain_populate_irqs(). Up to the point that msi_free_msi_descs() grew a warning for this case, this went un-noticed. Provide the counterpart of msi_domain_populate_irqs() and invoke it in platform_msi_device_domain_free() before freeing the interrupts and MSI descriptors and also in the error path of msi_domain_populate_irqs(). Fixes: 2f2940d16823 ("genirq/msi: Remove filter from msi_free_descs_free_range()") Reported-by: Miquel Raynal Signed-off-by: Thomas Gleixner Tested-by: Miquel Raynal Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/87mt4wkwnv.ffs@tglx --- include/linux/msi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index a112b913fff9..15dd71817996 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -631,6 +631,8 @@ int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *args); int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, int virq, int nvec, msi_alloc_info_t *args); +void msi_domain_depopulate_descs(struct device *dev, int virq, int nvec); + struct irq_domain * __platform_msi_create_device_domain(struct device *dev, unsigned int nvec, -- cgit v1.2.3 From 95207db8166ab95c42a03fdc5e3abd212c9987dc Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 17 Oct 2022 03:23:49 +0900 Subject: Remove Intel compiler support include/linux/compiler-intel.h had no update in the past 3 years. We often forget about the third C compiler to build the kernel. For example, commit a0a12c3ed057 ("asm goto: eradicate CC_HAS_ASM_GOTO") only mentioned GCC and Clang. init/Kconfig defines CC_IS_GCC and CC_IS_CLANG but not CC_IS_ICC, and nobody has reported any issue. I guess the Intel Compiler support is broken, and nobody is caring about it. Harald Arnesen pointed out ICC (classic Intel C/C++ compiler) is deprecated: $ icc -v icc: remark #10441: The Intel(R) C++ Compiler Classic (ICC) is deprecated and will be removed from product release in the second half of 2023. The Intel(R) oneAPI DPC++/C++ Compiler (ICX) is the recommended compiler moving forward. Please transition to use this compiler. Use '-diag-disable=10441' to disable this message. icc version 2021.7.0 (gcc version 12.1.0 compatibility) Arnd Bergmann provided a link to the article, "Intel C/C++ compilers complete adoption of LLVM". lib/zstd/common/compiler.h and lib/zstd/compress/zstd_fast.c were kept untouched for better sync with https://github.com/facebook/zstd Link: https://www.intel.com/content/www/us/en/developer/articles/technical/adoption-of-llvm-complete-icx.html Signed-off-by: Masahiro Yamada Acked-by: Arnd Bergmann Reviewed-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Reviewed-by: Miguel Ojeda Signed-off-by: Linus Torvalds --- include/linux/compiler-intel.h | 34 ---------------------------------- include/linux/compiler_attributes.h | 14 +------------- include/linux/compiler_types.h | 2 -- 3 files changed, 1 insertion(+), 49 deletions(-) delete mode 100644 include/linux/compiler-intel.h (limited to 'include/linux') diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h deleted file mode 100644 index b17f3cd18334..000000000000 --- a/include/linux/compiler-intel.h +++ /dev/null @@ -1,34 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_COMPILER_TYPES_H -#error "Please don't include directly, include instead." -#endif - -#ifdef __ECC - -/* Compiler specific definitions for Intel ECC compiler */ - -#include - -/* Intel ECC compiler doesn't support gcc specific asm stmts. - * It uses intrinsics to do the equivalent things. - */ - -#define barrier() __memory_barrier() -#define barrier_data(ptr) barrier() - -#define RELOC_HIDE(ptr, off) \ - ({ unsigned long __ptr; \ - __ptr = (unsigned long) (ptr); \ - (typeof(ptr)) (__ptr + (off)); }) - -/* This should act as an optimization barrier on var. - * Given that this compiler does not have inline assembly, a compiler barrier - * is the best we can do. - */ -#define OPTIMIZER_HIDE_VAR(var) barrier() - -#endif - -/* icc has this, but it's called _bswap16 */ -#define __HAVE_BUILTIN_BSWAP16__ -#define __builtin_bswap16 _bswap16 diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 4a3bd114a24f..e659cb6fded3 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -64,16 +64,10 @@ * compiler should see some alignment anyway, when the return value is * massaged by 'flags = ptr & 3; ptr &= ~3;'). * - * Optional: not supported by icc - * * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-assume_005faligned-function-attribute * clang: https://clang.llvm.org/docs/AttributeReference.html#assume-aligned */ -#if __has_attribute(__assume_aligned__) -# define __assume_aligned(a, ...) __attribute__((__assume_aligned__(a, ## __VA_ARGS__))) -#else -# define __assume_aligned(a, ...) -#endif +#define __assume_aligned(a, ...) __attribute__((__assume_aligned__(a, ## __VA_ARGS__))) /* * Note the long name. @@ -85,7 +79,6 @@ /* * Optional: only supported since gcc >= 9 * Optional: not supported by clang - * Optional: not supported by icc * * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-copy-function-attribute */ @@ -98,7 +91,6 @@ /* * Optional: not supported by gcc * Optional: only supported since clang >= 14.0 - * Optional: not supported by icc * * clang: https://clang.llvm.org/docs/AttributeReference.html#diagnose_as_builtin */ @@ -122,7 +114,6 @@ /* * Optional: not supported by clang - * Optional: not supported by icc * * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-designated_005finit-type-attribute */ @@ -236,7 +227,6 @@ /* * Optional: only supported since gcc >= 8 * Optional: not supported by clang - * Optional: not supported by icc * * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-nonstring-variable-attribute */ @@ -267,7 +257,6 @@ /* * Optional: not supported by gcc. - * Optional: not supported by icc. * * clang: https://clang.llvm.org/docs/AttributeReference.html#overloadable */ @@ -287,7 +276,6 @@ * Note: the "type" argument should match any __builtin_object_size(p, type) usage. * * Optional: not supported by gcc. - * Optional: not supported by icc. * * clang: https://clang.llvm.org/docs/AttributeReference.html#pass-object-size-pass-dynamic-object-size */ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 690c7c826fbf..547ea1ff806e 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -120,8 +120,6 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } /* Compiler specific macros. */ #ifdef __clang__ #include -#elif defined(__INTEL_COMPILER) -#include #elif defined(__GNUC__) /* The above compilers also define __GNUC__, so order is important here. */ #include -- cgit v1.2.3 From 596ff4a09b8981790e15572e8e7bc904df5835e7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 4 Mar 2023 13:35:43 -0800 Subject: cpumask: re-introduce constant-sized cpumask optimizations Commit aa47a7c215e7 ("lib/cpumask: deprecate nr_cpumask_bits") resulted in the cpumask operations potentially becoming hugely less efficient, because suddenly the cpumask was always considered to be variable-sized. The optimization was then later added back in a limited form by commit 6f9c07be9d02 ("lib/cpumask: add FORCE_NR_CPUS config option"), but that FORCE_NR_CPUS option is not useful in a generic kernel and more of a special case for embedded situations with fixed hardware. Instead, just re-introduce the optimization, with some changes. Instead of depending on CPUMASK_OFFSTACK being false, and then always using the full constant cpumask width, this introduces three different cpumask "sizes": - the exact size (nr_cpumask_bits) remains identical to nr_cpu_ids. This is used for situations where we should use the exact size. - the "small" size (small_cpumask_bits) is the NR_CPUS constant if it fits in a single word and the bitmap operations thus end up able to trigger the "small_const_nbits()" optimizations. This is used for the operations that have optimized single-word cases that get inlined, notably the bit find and scanning functions. - the "large" size (large_cpumask_bits) is the NR_CPUS constant if it is an sufficiently small constant that makes simple "copy" and "clear" operations more efficient. This is arbitrarily set at four words or less. As a an example of this situation, without this fixed size optimization, cpumask_clear() will generate code like movl nr_cpu_ids(%rip), %edx addq $63, %rdx shrq $3, %rdx andl $-8, %edx callq memset@PLT on x86-64, because it would calculate the "exact" number of longwords that need to be cleared. In contrast, with this patch, using a MAX_CPU of 64 (which is quite a reasonable value to use), the above becomes a single movq $0,cpumask instruction instead, because instead of caring to figure out exactly how many CPU's the system has, it just knows that the cpumask will be a single word and can just clear it all. Note that this does end up tightening the rules a bit from the original version in another way: operations that set bits in the cpumask are now limited to the actual nr_cpu_ids limit, whereas we used to do the nr_cpumask_bits thing almost everywhere in the cpumask code. But if you just clear bits, or scan for bits, we can use the simpler compile-time constants. In the process, remove 'cpumask_complement()' and 'for_each_cpu_not()' which were not useful, and which fundamentally have to be limited to 'nr_cpu_ids'. Better remove them now than have somebody introduce use of them later. Of course, on x86-64 with MAXSMP there is no sane small compile-time constant for the cpumask sizes, and we end up using the actual CPU bits, and will generate the above kind of horrors regardless. Please don't use MAXSMP unless you really expect to have machines with thousands of cores. Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 125 +++++++++++++++++++++++++++--------------------- 1 file changed, 70 insertions(+), 55 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 10c92bd9b807..8fbe76607965 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -50,8 +50,41 @@ static inline void set_nr_cpu_ids(unsigned int nr) #endif } -/* Deprecated. Always use nr_cpu_ids. */ -#define nr_cpumask_bits nr_cpu_ids +/* + * We have several different "preferred sizes" for the cpumask + * operations, depending on operation. + * + * For example, the bitmap scanning and operating operations have + * optimized routines that work for the single-word case, but only when + * the size is constant. So if NR_CPUS fits in one single word, we are + * better off using that small constant, in order to trigger the + * optimized bit finding. That is 'small_cpumask_size'. + * + * The clearing and copying operations will similarly perform better + * with a constant size, but we limit that size arbitrarily to four + * words. We call this 'large_cpumask_size'. + * + * Finally, some operations just want the exact limit, either because + * they set bits or just don't have any faster fixed-sized versions. We + * call this just 'nr_cpumask_size'. + * + * Note that these optional constants are always guaranteed to be at + * least as big as 'nr_cpu_ids' itself is, and all our cpumask + * allocations are at least that size (see cpumask_size()). The + * optimization comes from being able to potentially use a compile-time + * constant instead of a run-time generated exact number of CPUs. + */ +#if NR_CPUS <= BITS_PER_LONG + #define small_cpumask_bits ((unsigned int)NR_CPUS) + #define large_cpumask_bits ((unsigned int)NR_CPUS) +#elif NR_CPUS <= 4*BITS_PER_LONG + #define small_cpumask_bits nr_cpu_ids + #define large_cpumask_bits ((unsigned int)NR_CPUS) +#else + #define small_cpumask_bits nr_cpu_ids + #define large_cpumask_bits nr_cpu_ids +#endif +#define nr_cpumask_bits nr_cpu_ids /* * The following particular system cpumasks and operations manage @@ -126,7 +159,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu) */ static inline unsigned int cpumask_first(const struct cpumask *srcp) { - return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits); + return find_first_bit(cpumask_bits(srcp), small_cpumask_bits); } /** @@ -137,7 +170,7 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) */ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) { - return find_first_zero_bit(cpumask_bits(srcp), nr_cpumask_bits); + return find_first_zero_bit(cpumask_bits(srcp), small_cpumask_bits); } /** @@ -150,7 +183,7 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) static inline unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2) { - return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), nr_cpumask_bits); + return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits); } /** @@ -161,7 +194,7 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask */ static inline unsigned int cpumask_last(const struct cpumask *srcp) { - return find_last_bit(cpumask_bits(srcp), nr_cpumask_bits); + return find_last_bit(cpumask_bits(srcp), small_cpumask_bits); } /** @@ -177,7 +210,7 @@ unsigned int cpumask_next(int n, const struct cpumask *srcp) /* -1 is a legal arg here. */ if (n != -1) cpumask_check(n); - return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n + 1); + return find_next_bit(cpumask_bits(srcp), small_cpumask_bits, n + 1); } /** @@ -192,7 +225,7 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp) /* -1 is a legal arg here. */ if (n != -1) cpumask_check(n); - return find_next_zero_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1); + return find_next_zero_bit(cpumask_bits(srcp), small_cpumask_bits, n+1); } #if NR_CPUS == 1 @@ -235,7 +268,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p, if (n != -1) cpumask_check(n); return find_next_and_bit(cpumask_bits(src1p), cpumask_bits(src2p), - nr_cpumask_bits, n + 1); + small_cpumask_bits, n + 1); } /** @@ -246,17 +279,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p, * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu(cpu, mask) \ - for_each_set_bit(cpu, cpumask_bits(mask), nr_cpumask_bits) - -/** - * for_each_cpu_not - iterate over every cpu in a complemented mask - * @cpu: the (optionally unsigned) integer iterator - * @mask: the cpumask pointer - * - * After the loop, cpu is >= nr_cpu_ids. - */ -#define for_each_cpu_not(cpu, mask) \ - for_each_clear_bit(cpu, cpumask_bits(mask), nr_cpumask_bits) + for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits) #if NR_CPUS == 1 static inline @@ -290,7 +313,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu_wrap(cpu, mask, start) \ - for_each_set_bit_wrap(cpu, cpumask_bits(mask), nr_cpumask_bits, start) + for_each_set_bit_wrap(cpu, cpumask_bits(mask), small_cpumask_bits, start) /** * for_each_cpu_and - iterate over every cpu in both masks @@ -307,7 +330,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu_and(cpu, mask1, mask2) \ - for_each_and_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), nr_cpumask_bits) + for_each_and_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits) /** * for_each_cpu_andnot - iterate over every cpu present in one mask, excluding @@ -325,7 +348,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta * After the loop, cpu is >= nr_cpu_ids. */ #define for_each_cpu_andnot(cpu, mask1, mask2) \ - for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), nr_cpumask_bits) + for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits) /** * cpumask_any_but - return a "random" in a cpumask, but not this one. @@ -356,7 +379,7 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) */ static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp) { - return find_nth_bit(cpumask_bits(srcp), nr_cpumask_bits, cpumask_check(cpu)); + return find_nth_bit(cpumask_bits(srcp), small_cpumask_bits, cpumask_check(cpu)); } /** @@ -372,7 +395,7 @@ unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1, const struct cpumask *srcp2) { return find_nth_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), - nr_cpumask_bits, cpumask_check(cpu)); + small_cpumask_bits, cpumask_check(cpu)); } /** @@ -388,7 +411,7 @@ unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1, const struct cpumask *srcp2) { return find_nth_andnot_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), - nr_cpumask_bits, cpumask_check(cpu)); + small_cpumask_bits, cpumask_check(cpu)); } /** @@ -408,7 +431,7 @@ unsigned int cpumask_nth_and_andnot(unsigned int cpu, const struct cpumask *srcp return find_nth_and_andnot_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), cpumask_bits(srcp3), - nr_cpumask_bits, cpumask_check(cpu)); + small_cpumask_bits, cpumask_check(cpu)); } #define CPU_BITS_NONE \ @@ -495,10 +518,14 @@ static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask * /** * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask * @dstp: the cpumask pointer + * + * Note: since we set bits, we should use the tighter 'bitmap_set()' with + * the eact number of bits, not 'bitmap_fill()' that will fill past the + * end. */ static inline void cpumask_setall(struct cpumask *dstp) { - bitmap_fill(cpumask_bits(dstp), nr_cpumask_bits); + bitmap_set(cpumask_bits(dstp), 0, nr_cpumask_bits); } /** @@ -507,7 +534,7 @@ static inline void cpumask_setall(struct cpumask *dstp) */ static inline void cpumask_clear(struct cpumask *dstp) { - bitmap_zero(cpumask_bits(dstp), nr_cpumask_bits); + bitmap_zero(cpumask_bits(dstp), large_cpumask_bits); } /** @@ -523,7 +550,7 @@ static inline bool cpumask_and(struct cpumask *dstp, const struct cpumask *src2p) { return bitmap_and(cpumask_bits(dstp), cpumask_bits(src1p), - cpumask_bits(src2p), nr_cpumask_bits); + cpumask_bits(src2p), small_cpumask_bits); } /** @@ -536,7 +563,7 @@ static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p, const struct cpumask *src2p) { bitmap_or(cpumask_bits(dstp), cpumask_bits(src1p), - cpumask_bits(src2p), nr_cpumask_bits); + cpumask_bits(src2p), small_cpumask_bits); } /** @@ -550,7 +577,7 @@ static inline void cpumask_xor(struct cpumask *dstp, const struct cpumask *src2p) { bitmap_xor(cpumask_bits(dstp), cpumask_bits(src1p), - cpumask_bits(src2p), nr_cpumask_bits); + cpumask_bits(src2p), small_cpumask_bits); } /** @@ -566,19 +593,7 @@ static inline bool cpumask_andnot(struct cpumask *dstp, const struct cpumask *src2p) { return bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p), - cpumask_bits(src2p), nr_cpumask_bits); -} - -/** - * cpumask_complement - *dstp = ~*srcp - * @dstp: the cpumask result - * @srcp: the input to invert - */ -static inline void cpumask_complement(struct cpumask *dstp, - const struct cpumask *srcp) -{ - bitmap_complement(cpumask_bits(dstp), cpumask_bits(srcp), - nr_cpumask_bits); + cpumask_bits(src2p), small_cpumask_bits); } /** @@ -590,7 +605,7 @@ static inline bool cpumask_equal(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_equal(cpumask_bits(src1p), cpumask_bits(src2p), - nr_cpumask_bits); + small_cpumask_bits); } /** @@ -604,7 +619,7 @@ static inline bool cpumask_or_equal(const struct cpumask *src1p, const struct cpumask *src3p) { return bitmap_or_equal(cpumask_bits(src1p), cpumask_bits(src2p), - cpumask_bits(src3p), nr_cpumask_bits); + cpumask_bits(src3p), small_cpumask_bits); } /** @@ -616,7 +631,7 @@ static inline bool cpumask_intersects(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_intersects(cpumask_bits(src1p), cpumask_bits(src2p), - nr_cpumask_bits); + small_cpumask_bits); } /** @@ -630,7 +645,7 @@ static inline bool cpumask_subset(const struct cpumask *src1p, const struct cpumask *src2p) { return bitmap_subset(cpumask_bits(src1p), cpumask_bits(src2p), - nr_cpumask_bits); + small_cpumask_bits); } /** @@ -639,7 +654,7 @@ static inline bool cpumask_subset(const struct cpumask *src1p, */ static inline bool cpumask_empty(const struct cpumask *srcp) { - return bitmap_empty(cpumask_bits(srcp), nr_cpumask_bits); + return bitmap_empty(cpumask_bits(srcp), small_cpumask_bits); } /** @@ -657,7 +672,7 @@ static inline bool cpumask_full(const struct cpumask *srcp) */ static inline unsigned int cpumask_weight(const struct cpumask *srcp) { - return bitmap_weight(cpumask_bits(srcp), nr_cpumask_bits); + return bitmap_weight(cpumask_bits(srcp), small_cpumask_bits); } /** @@ -668,7 +683,7 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp) static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1, const struct cpumask *srcp2) { - return bitmap_weight_and(cpumask_bits(srcp1), cpumask_bits(srcp2), nr_cpumask_bits); + return bitmap_weight_and(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits); } /** @@ -681,7 +696,7 @@ static inline void cpumask_shift_right(struct cpumask *dstp, const struct cpumask *srcp, int n) { bitmap_shift_right(cpumask_bits(dstp), cpumask_bits(srcp), n, - nr_cpumask_bits); + small_cpumask_bits); } /** @@ -705,7 +720,7 @@ static inline void cpumask_shift_left(struct cpumask *dstp, static inline void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp) { - bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), nr_cpumask_bits); + bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), large_cpumask_bits); } /** @@ -789,7 +804,7 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) */ static inline unsigned int cpumask_size(void) { - return BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long); + return BITS_TO_LONGS(large_cpumask_bits) * sizeof(long); } /* -- cgit v1.2.3 From 80c16b2b121fbc3380dbffa9bab7559acbaaa2ed Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 6 Mar 2023 17:22:04 +0200 Subject: cpumask: Fix typo nr_cpumask_size --> nr_cpumask_bits The never used nr_cpumask_size is just a typo, hence use existing redefinition that's called nr_cpumask_bits. Signed-off-by: Andy Shevchenko Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 8fbe76607965..ce8eb7ef2107 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -66,7 +66,7 @@ static inline void set_nr_cpu_ids(unsigned int nr) * * Finally, some operations just want the exact limit, either because * they set bits or just don't have any faster fixed-sized versions. We - * call this just 'nr_cpumask_size'. + * call this just 'nr_cpumask_bits'. * * Note that these optional constants are always guaranteed to be at * least as big as 'nr_cpu_ids' itself is, and all our cpumask -- cgit v1.2.3 From c28cd1f3433c7e339315d1ddacaeacf0fdfbe252 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Wed, 1 Mar 2023 17:46:38 -0800 Subject: clk: Mark a fwnode as initialized when using CLK_OF_DECLARE() macro We already mark fwnodes as initialized when they are registered as clock providers. We do this so that fw_devlink can tell when a clock driver doesn't use the driver core framework to probe/initialize its device. This ensures fw_devlink doesn't block the consumers of such a clock provider indefinitely. However, some users of CLK_OF_DECLARE() macros don't use the same node that matches the macro as the node for the clock provider, but they initialize the entire node. To cover these cases, also mark the nodes that match the macros as initialized when the init callback function is called. An example of this is "stericsson,u8500-clks" that's handled using CLK_OF_DECLARE() and looks something like this: clocks { compatible = "stericsson,u8500-clks"; prcmu_clk: prcmu-clock { #clock-cells = <1>; }; prcc_pclk: prcc-periph-clock { #clock-cells = <2>; }; prcc_kclk: prcc-kernel-clock { #clock-cells = <2>; }; prcc_reset: prcc-reset-controller { #reset-cells = <2>; }; ... }; This patch makes sure that "clocks" is marked as initialized so that fw_devlink knows that all nodes under it have been initialized. If the driver creates struct devices for some of the subnodes, fw_devlink is smart enough to know to wait for those devices to probe, so no special handling is required for those cases. Cc: Greg Kroah-Hartman Reported-by: Linus Walleij Link: https://lore.kernel.org/lkml/CACRpkdamxDX6EBVjKX5=D3rkHp17f5pwGdBVhzFU90-0MHY6dQ@mail.gmail.com/ Fixes: 4a032827daa8 ("of: property: Simplify of_link_to_phandle()") Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20230302014639.297514-1-saravanak@google.com Reviewed-by: Linus Walleij Tested-by: Linus Walleij Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 842e72a5348f..c9f5276006a0 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -1363,7 +1363,13 @@ struct clk_hw_onecell_data { struct clk_hw *hws[]; }; -#define CLK_OF_DECLARE(name, compat, fn) OF_DECLARE_1(clk, name, compat, fn) +#define CLK_OF_DECLARE(name, compat, fn) \ + static void __init name##_of_clk_init_declare(struct device_node *np) \ + { \ + fn(np); \ + fwnode_dev_initialized(of_fwnode_handle(np), true); \ + } \ + OF_DECLARE_1(clk, name, compat, name##_of_clk_init_declare) /* * Use this macro when you have a driver that requires two initialization -- cgit v1.2.3 From 849ad04cf562ac63b0371a825eed473d84de9c6d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Mar 2023 01:50:53 -0500 Subject: new helper: put_and_unmap_page() kunmap_local() + put_page(), as done by e.g. ext2 directory handling. Signed-off-by: Al Viro --- include/linux/highmem.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index b06254e76d99..8fc10089e19e 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -481,4 +481,10 @@ static inline void folio_zero_range(struct folio *folio, zero_user_segments(&folio->page, start, start + length, 0, 0); } +static inline void put_and_unmap_page(struct page *page, void *addr) +{ + kunmap_local(addr); + put_page(page); +} + #endif /* _LINUX_HIGHMEM_H */ -- cgit v1.2.3 From 63355b9884b3d1677de6bd1517cd2b8a9bf53978 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 7 Mar 2023 12:16:18 -0800 Subject: cpumask: be more careful with 'cpumask_setall()' Commit 596ff4a09b89 ("cpumask: re-introduce constant-sized cpumask optimizations") changed cpumask_setall() to use "bitmap_set()" instead of "bitmap_fill()", because bitmap_fill() would explicitly set all the bits of a constant sized small bitmap, and that's exactly what we don't want: we want to only set bits up to 'nr_cpu_ids', which is what "bitmap_set()" does. However, Yury correctly points out that while "bitmap_set()" does indeed only set bits up to the required bitmap size, it doesn't _clear_ bits above that size, so the upper bits would still not have well-defined values. Now, none of this should really matter, since any bits set past 'nr_cpu_ids' should always be ignored in the first place. Yes, the bit scanning functions might return them as a result, but since users should always consider the ">= nr_cpu_ids" condition to mean "no more bits", that shouldn't have any actual effect (see previous commit 8ca09d5fa354 "cpumask: fix incorrect cpumask scanning result checks"). But let's just do it right, the way the code was _intended_ to work. We have had enough lazy code that works but bites us in the *rse later (again, see previous commit) that there's no reason to not just do this properly. It turns out that "bitmap_fill()" gets this all right for the complex case, and really only fails for the inlined optimized case that just fills the whole word. And while we could just fix bitmap_fill() to use the proper last word mask, there's two issues with that: - the cpumask case wants to do the _optimization_ based on "NR_CPUS is a small constant", but then wants to do the actual bit _fill_ based on "nr_cpu_ids" that isn't necessarily that same constant - we have lots of non-cpumask users of bitmap_fill(), and while they hopefully don't care, and probably would want the proper semantics anyway ("only set bits up to the limit"), I do not want the cpumask changes to impact other parts So this ends up just doing the single-word optimization by hand in the cpumask code. If our cpumask is fundamentally limited to a single word, just do the proper "fill in that word" exactly. And if it's the more complex multi-word case, then the generic bitmap_fill() will DTRT. This is all an example of how our bitmap function optimizations really are somewhat broken. They conflate the "this is size of the bitmap" optimizations with the actual bit(s) we want to set. In many cases we really want to have the two be separate things: sometimes we base our optimizations on the size of the whole bitmap ("I know this whole bitmap fits in a single word, so I'll just use single-word accesses"), and sometimes we base them on the bit we are looking at ("this is just acting on bits that are in the first word, so I'll use single-word accesses"). Notice how the end result of the two optimizations are the same, but the way we get to them are quite different. And all our cpumask optimization games are really about that fundamental distinction, and we'd often really want to pass in both the "this is the bit I'm working on" (which _can_ be a small constant but might be variable), and "I know it's in this range even if it's variable" (based on CONFIG_NR_CPUS). So this cpumask_setall() implementation just makes that explicit. It checks the "I statically know the size is small" using the known static size of the cpumask (which is what that 'small_cpumask_bits' is all about), but then sets the actual bits using the exact number of cpus we have (ie 'nr_cpumask_bits') Of course, in a perfect world, the compiler would have done all the range analysis (possibly with help from us just telling it that "this value is always in this range"), and would do all of this for us. But that is not the world we live in. While we dream of that perfect world, this does that manual logic to make it all work out. And this was a very long explanation for a small code change that shouldn't even matter. Reported-by: Yury Norov Link: https://lore.kernel.org/lkml/ZAV9nGG9e1%2FrV+L%2F@yury-laptop/ Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index ce8eb7ef2107..63d637d18e79 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -518,14 +518,14 @@ static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask * /** * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask * @dstp: the cpumask pointer - * - * Note: since we set bits, we should use the tighter 'bitmap_set()' with - * the eact number of bits, not 'bitmap_fill()' that will fill past the - * end. */ static inline void cpumask_setall(struct cpumask *dstp) { - bitmap_set(cpumask_bits(dstp), 0, nr_cpumask_bits); + if (small_const_nbits(small_cpumask_bits)) { + cpumask_bits(dstp)[0] = BITMAP_LAST_WORD_MASK(nr_cpumask_bits); + return; + } + bitmap_fill(cpumask_bits(dstp), nr_cpumask_bits); } /** -- cgit v1.2.3 From eb59eca0d8ac15f8c1b7f1cd35999455a90292c0 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Mar 2023 08:56:31 +0100 Subject: interconnect: fix provider registration API The current interconnect provider interface is inherently racy as providers are expected to be added before being fully initialised. Specifically, nodes are currently not added and the provider data is not initialised until after registering the provider which can cause racing DT lookups to fail. Add a new provider API which will be used to fix up the interconnect drivers. The old API is reimplemented using the new interface and will be removed once all drivers have been fixed. Fixes: 11f1ceca7031 ("interconnect: Add generic on-chip interconnect API") Fixes: 87e3031b6fbd ("interconnect: Allow endpoints translation via DT") Cc: stable@vger.kernel.org # 5.1 Reviewed-by: Konrad Dybcio Signed-off-by: Johan Hovold Tested-by: Luca Ceresoli # i.MX8MP MSC SM2-MB-EP1 Board Link: https://lore.kernel.org/r/20230306075651.2449-4-johan+linaro@kernel.org Signed-off-by: Georgi Djakov --- include/linux/interconnect-provider.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h index cd5c5a27557f..d12cd18aab3f 100644 --- a/include/linux/interconnect-provider.h +++ b/include/linux/interconnect-provider.h @@ -122,6 +122,9 @@ int icc_link_destroy(struct icc_node *src, struct icc_node *dst); void icc_node_add(struct icc_node *node, struct icc_provider *provider); void icc_node_del(struct icc_node *node); int icc_nodes_remove(struct icc_provider *provider); +void icc_provider_init(struct icc_provider *provider); +int icc_provider_register(struct icc_provider *provider); +void icc_provider_deregister(struct icc_provider *provider); int icc_provider_add(struct icc_provider *provider); void icc_provider_del(struct icc_provider *provider); struct icc_node_data *of_icc_get_from_provider(struct of_phandle_args *spec); @@ -167,6 +170,15 @@ static inline int icc_nodes_remove(struct icc_provider *provider) return -ENOTSUPP; } +static inline void icc_provider_init(struct icc_provider *provider) { } + +static inline int icc_provider_register(struct icc_provider *provider) +{ + return -ENOTSUPP; +} + +static inline void icc_provider_deregister(struct icc_provider *provider) { } + static inline int icc_provider_add(struct icc_provider *provider) { return -ENOTSUPP; -- cgit v1.2.3 From 03c835f498b540087244a6757e87dfe7ef10999b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sun, 26 Feb 2023 23:26:52 +0100 Subject: i2c: Switch .probe() to not take an id parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit b8a1a4cd5a98 ("i2c: Provide a temporary .probe_new() call-back type") introduced a new probe callback to convert i2c init routines to not take an i2c_device_id parameter. Now that all in-tree drivers are converted to the temporary .probe_new() callback, .probe() can be modified to match the desired prototype. Now that .probe() and .probe_new() have the same semantic, they can be defined as members of an anonymous union to save some memory and simplify the core code a bit. Signed-off-by: Uwe Kleine-König Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 500404d85141..5ba89663ea86 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -236,8 +236,8 @@ enum i2c_driver_flags { /** * struct i2c_driver - represent an I2C device driver * @class: What kind of i2c device we instantiate (for detect) - * @probe: Callback for device binding - soon to be deprecated - * @probe_new: New callback for device binding + * @probe: Callback for device binding + * @probe_new: Transitional callback for device binding - do not use * @remove: Callback for device unbinding * @shutdown: Callback for device shutdown * @alert: Alert callback, for example for the SMBus alert protocol @@ -272,14 +272,18 @@ enum i2c_driver_flags { struct i2c_driver { unsigned int class; + union { /* Standard driver model interfaces */ - int (*probe)(struct i2c_client *client, const struct i2c_device_id *id); + int (*probe)(struct i2c_client *client); + /* + * Legacy callback that was part of a conversion of .probe(). + * Today it has the same semantic as .probe(). Don't use for new + * code. + */ + int (*probe_new)(struct i2c_client *client); + }; void (*remove)(struct i2c_client *client); - /* New driver model interface to aid the seamless removal of the - * current probe()'s, more commonly unused than used second parameter. - */ - int (*probe_new)(struct i2c_client *client); /* driver model interfaces that don't relate to enumeration */ void (*shutdown)(struct i2c_client *client); -- cgit v1.2.3 From 5cf9d015be160e2d90d29ae74ef1364390e8fce8 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 8 Mar 2023 13:47:11 -0700 Subject: clk: Avoid invalid function names in CLK_OF_DECLARE() After commit c28cd1f3433c ("clk: Mark a fwnode as initialized when using CLK_OF_DECLARE() macro"), drivers/clk/mvebu/kirkwood.c fails to build: drivers/clk/mvebu/kirkwood.c:358:1: error: expected identifier or '(' CLK_OF_DECLARE(98dx1135_clk, "marvell,mv98dx1135-core-clock", ^ include/linux/clk-provider.h:1367:21: note: expanded from macro 'CLK_OF_DECLARE' static void __init name##_of_clk_init_declare(struct device_node *np) \ ^ :124:1: note: expanded from here 98dx1135_clk_of_clk_init_declare ^ drivers/clk/mvebu/kirkwood.c:358:1: error: invalid digit 'd' in decimal constant include/linux/clk-provider.h:1372:34: note: expanded from macro 'CLK_OF_DECLARE' OF_DECLARE_1(clk, name, compat, name##_of_clk_init_declare) ^ :125:3: note: expanded from here 98dx1135_clk_of_clk_init_declare ^ drivers/clk/mvebu/kirkwood.c:358:1: error: invalid digit 'd' in decimal constant include/linux/clk-provider.h:1372:34: note: expanded from macro 'CLK_OF_DECLARE' OF_DECLARE_1(clk, name, compat, name##_of_clk_init_declare) ^ :125:3: note: expanded from here 98dx1135_clk_of_clk_init_declare ^ drivers/clk/mvebu/kirkwood.c:358:1: error: invalid digit 'd' in decimal constant include/linux/clk-provider.h:1372:34: note: expanded from macro 'CLK_OF_DECLARE' OF_DECLARE_1(clk, name, compat, name##_of_clk_init_declare) ^ :125:3: note: expanded from here 98dx1135_clk_of_clk_init_declare ^ C function names must start with either an alphabetic letter or an underscore. To avoid generating invalid function names from clock names, add two underscores to the beginning of the identifier. Fixes: c28cd1f3433c ("clk: Mark a fwnode as initialized when using CLK_OF_DECLARE() macro") Suggested-by: Saravana Kannan Signed-off-by: Nathan Chancellor Link: https://lore.kernel.org/r/20230308-clk_of_declare-fix-v1-1-317b741e2532@kernel.org Reviewed-by: Saravana Kannan Reported-by: Naresh Kamboju Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index c9f5276006a0..6f3175f0678a 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -1364,12 +1364,12 @@ struct clk_hw_onecell_data { }; #define CLK_OF_DECLARE(name, compat, fn) \ - static void __init name##_of_clk_init_declare(struct device_node *np) \ + static void __init __##name##_of_clk_init_declare(struct device_node *np) \ { \ fn(np); \ fwnode_dev_initialized(of_fwnode_handle(np), true); \ } \ - OF_DECLARE_1(clk, name, compat, name##_of_clk_init_declare) + OF_DECLARE_1(clk, name, compat, __##name##_of_clk_init_declare) /* * Use this macro when you have a driver that requires two initialization -- cgit v1.2.3 From fe9ae05cfbe587dda724fcf537c00bc2f287da62 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 8 Mar 2023 11:50:12 +0100 Subject: fbdev: Fix incorrect page mapping clearance at fb_deferred_io_release() The recent fix for the deferred I/O by the commit 3efc61d95259 ("fbdev: Fix invalid page access after closing deferred I/O devices") caused a regression when the same fb device is opened/closed while it's being used. It resulted in a frozen screen even if something is redrawn there after the close. The breakage is because the patch was made under a wrong assumption of a single open; in the current code, fb_deferred_io_release() cleans up the page mapping of the pageref list and it calls cancel_delayed_work_sync() unconditionally, where both are no correct behavior for multiple opens. This patch adds a refcount for the opens of the device, and applies the cleanup only when all files get closed. As both fb_deferred_io_open() and _close() are called always in the fb_info lock (mutex), it's safe to use the normal int for the refcounting. Also, a useless BUG_ON() is dropped. Fixes: 3efc61d95259 ("fbdev: Fix invalid page access after closing deferred I/O devices") Cc: Signed-off-by: Takashi Iwai Reviewed-by: Patrik Jakobsson Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20230308105012.1845-1-tiwai@suse.de --- include/linux/fb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index 73eb1f85ea8e..05e40fcc7696 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -212,6 +212,7 @@ struct fb_deferred_io { /* delay between mkwrite and deferred handler */ unsigned long delay; bool sort_pagereflist; /* sort pagelist by offset */ + int open_count; /* number of opened files; protected by fb_info lock */ struct mutex lock; /* mutex that protects the pageref list */ struct list_head pagereflist; /* list of pagerefs for touched pages */ /* callback */ -- cgit v1.2.3 From 62913ae96de747091c4dacd06d158e7729c1a76d Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 7 Mar 2023 23:15:49 -0500 Subject: ext4, jbd2: add an optimized bmap for the journal inode The generic bmap() function exported by the VFS takes locks and does checks that are not necessary for the journal inode. So allow the file system to set a journal-optimized bmap function in journal->j_bmap. Reported-by: syzbot+9543479984ae9e576000@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=e4aaa78795e490421c79f76ec3679006c8ff4cf0 Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 2170e0cc279d..6ffa34c51a11 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1308,6 +1308,14 @@ struct journal_s struct buffer_head *bh, enum passtype pass, int off, tid_t expected_commit_id); + + /** + * @j_bmap: + * + * Bmap function that should be used instead of the generic + * VFS bmap function. + */ + int (*j_bmap)(struct journal_s *journal, sector_t *block); }; #define jbd2_might_wait_for_commit(j) \ -- cgit v1.2.3 From e7304080e0e50d979ce9eaf694ad8283e2e539ea Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Mar 2023 08:52:03 -0700 Subject: cpumask: relax sanity checking constraints The cpumask_check() was unnecessarily tight, and causes problems for the users of cpumask_next(). We have a number of users that take the previous return value of one of the bit scanning functions and subtract one to keep it in "range". But since the scanning functions end up returning up to 'small_cpumask_bits' instead of the tighter 'nr_cpumask_bits', the range really needs to be using that widened form. [ This "previous-1" behavior is also the reason we have all those comments about /* -1 is a legal arg here. */ and separate checks for that being ok. So we could have just made "small_cpumask_bits-1" be a similar special "don't check this" value. Tetsuo Handa even suggested a patch that only does that for cpumask_next(), since that seems to be the only actual case that triggers, but that all makes it even _more_ magical and special. So just relax the check ] One example of this kind of pattern being the 'c_start()' function in arch/x86/kernel/cpu/proc.c, but also duplicated in various forms on other architectures. Reported-by: syzbot+96cae094d90877641f32@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?extid=96cae094d90877641f32 Reported-by: Tetsuo Handa Link: https://lore.kernel.org/lkml/c1f4cc16-feea-b83c-82cf-1a1f007b7eb9@I-love.SAKURA.ne.jp/ Fixes: 596ff4a09b89 ("cpumask: re-introduce constant-sized cpumask optimizations") Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 63d637d18e79..d4901ca8883c 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -147,7 +147,7 @@ static __always_inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bit /* verify cpu argument to cpumask_* operators */ static __always_inline unsigned int cpumask_check(unsigned int cpu) { - cpu_max_bits_warn(cpu, nr_cpumask_bits); + cpu_max_bits_warn(cpu, small_cpumask_bits); return cpu; } -- cgit v1.2.3 From ab909509850b27fd39b8ba99e44cda39dbc3858c Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Mon, 6 Mar 2023 16:10:11 +0100 Subject: PCI: s390: Fix use-after-free of PCI resources with per-function hotplug On s390 PCI functions may be hotplugged individually even when they belong to a multi-function device. In particular on an SR-IOV device VFs may be removed and later re-added. In commit a50297cf8235 ("s390/pci: separate zbus creation from scanning") it was missed however that struct pci_bus and struct zpci_bus's resource list retained a reference to the PCI functions MMIO resources even though those resources are released and freed on hot-unplug. These stale resources may subsequently be claimed when the PCI function re-appears resulting in use-after-free. One idea of fixing this use-after-free in s390 specific code that was investigated was to simply keep resources around from the moment a PCI function first appeared until the whole virtual PCI bus created for a multi-function device disappears. The problem with this however is that due to the requirement of artificial MMIO addreesses (address cookies) extra logic is then needed to keep the address cookies compatible on re-plug. At the same time the MMIO resources semantically belong to the PCI function so tying their lifecycle to the function seems more logical. Instead a simpler approach is to remove the resources of an individually hot-unplugged PCI function from the PCI bus's resource list while keeping the resources of other PCI functions on the PCI bus untouched. This is done by introducing pci_bus_remove_resource() to remove an individual resource. Similarly the resource also needs to be removed from the struct zpci_bus's resource list. It turns out however, that there is really no need to add the MMIO resources to the struct zpci_bus's resource list at all and instead we can simply use the zpci_bar_struct's resource pointer directly. Fixes: a50297cf8235 ("s390/pci: separate zbus creation from scanning") Signed-off-by: Niklas Schnelle Reviewed-by: Matthew Rosato Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20230306151014.60913-2-schnelle@linux.ibm.com Signed-off-by: Vasily Gorbik --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index fafd8020c6d7..b50e5c79f7e3 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1438,6 +1438,7 @@ void pci_bus_add_resource(struct pci_bus *bus, struct resource *res, unsigned int flags); struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n); void pci_bus_remove_resources(struct pci_bus *bus); +void pci_bus_remove_resource(struct pci_bus *bus, struct resource *res); int devm_request_pci_bus_resources(struct device *dev, struct list_head *resources); -- cgit v1.2.3 From 8b3a149db461d3286d1e211112de3b44ccaeaf71 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 12 Mar 2023 23:00:03 +0100 Subject: efi: earlycon: Reprobe after parsing config tables Commit 732ea9db9d8a ("efi: libstub: Move screen_info handling to common code") reorganized the earlycon handling so that all architectures pass the screen_info data via a EFI config table instead of populating struct screen_info directly, as the latter is only possible when the EFI stub is baked into the kernel (and not into the decompressor). However, this means that struct screen_info may not have been populated yet by the time the earlycon probe takes place, and this results in a non-functional early console. So let's probe again right after parsing the config tables and populating struct screen_info. Note that this means that earlycon output starts a bit later than before, and so it may fail to capture issues that occur while doing the early EFI initialization. Fixes: 732ea9db9d8a ("efi: libstub: Move screen_info handling to common code") Reported-by: Shawn Guo Tested-by: Shawn Guo Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 04a733f0ba95..7aa62c92185f 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -693,6 +693,7 @@ efi_guid_to_str(efi_guid_t *guid, char *out) } extern void efi_init (void); +extern void efi_earlycon_reprobe(void); #ifdef CONFIG_EFI extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if possible */ #else -- cgit v1.2.3 From 34e0a279a993debaff03158fc2fbf6a00c093643 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 13 Mar 2023 10:30:02 +0100 Subject: block: do not reverse request order when flushing plug list Commit 26fed4ac4eab ("block: flush plug based on hardware and software queue order") changed flushing of plug list to submit requests one device at a time. However while doing that it also started using list_add_tail() instead of list_add() used previously thus effectively submitting requests in reverse order. Also when forming a rq_list with remaining requests (in case two or more devices are used), we effectively reverse the ordering of the plug list for each device we process. Submitting requests in reverse order has negative impact on performance for rotational disks (when BFQ is not in use). We observe 10-25% regression in random 4k write throughput, as well as ~20% regression in MariaDB OLTP benchmark on rotational storage on btrfs filesystem. Fix the problem by preserving ordering of the plug list when inserting requests into the queuelist as well as by appending to requeue_list instead of prepending to it. Fixes: 26fed4ac4eab ("block: flush plug based on hardware and software queue order") Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230313093002.11756-1-jack@suse.cz Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index dd5ce1137f04..de0b0c3e7395 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -228,6 +228,12 @@ static inline unsigned short req_get_ioprio(struct request *req) *(listptr) = rq; \ } while (0) +#define rq_list_add_tail(lastpptr, rq) do { \ + (rq)->rq_next = NULL; \ + **(lastpptr) = rq; \ + *(lastpptr) = &rq->rq_next; \ +} while (0) + #define rq_list_pop(listptr) \ ({ \ struct request *__req = NULL; \ -- cgit v1.2.3 From 7ff84910c66c9144cc0de9d9deed9fb84c03aff0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 14 Mar 2023 06:20:58 -0400 Subject: lockd: set file_lock start and end when decoding nlm4 testargs Commit 6930bcbfb6ce dropped the setting of the file_lock range when decoding a nlm_lock off the wire. This causes the client side grant callback to miss matching blocks and reject the lock, only to rerequest it 30s later. Add a helper function to set the file_lock range from the start and end values that the protocol uses, and have the nlm_lock decoder call that to set up the file_lock args properly. Fixes: 6930bcbfb6ce ("lockd: detect and reject lock arguments that overflow") Reported-by: Amir Goldstein Signed-off-by: Jeff Layton Tested-by: Amir Goldstein Cc: stable@vger.kernel.org #6.0 Signed-off-by: Anna Schumaker --- include/linux/lockd/xdr4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h index 9a6b55da8fd6..72831e35dca3 100644 --- a/include/linux/lockd/xdr4.h +++ b/include/linux/lockd/xdr4.h @@ -22,6 +22,7 @@ #define nlm4_fbig cpu_to_be32(NLM_FBIG) #define nlm4_failed cpu_to_be32(NLM_FAILED) +void nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len); bool nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -- cgit v1.2.3 From c2679254b9c9980d9045f0f722cf093a2b1f7590 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 10 Mar 2023 17:28:56 -0500 Subject: tracing: Make tracepoint lockdep check actually test something A while ago where the trace events had the following: rcu_read_lock_sched_notrace(); rcu_dereference_sched(...); rcu_read_unlock_sched_notrace(); If the tracepoint is enabled, it could trigger RCU issues if called in the wrong place. And this warning was only triggered if lockdep was enabled. If the tracepoint was never enabled with lockdep, the bug would not be caught. To handle this, the above sequence was done when lockdep was enabled regardless if the tracepoint was enabled or not (although the always enabled code really didn't do anything, it would still trigger a warning). But a lot has changed since that lockdep code was added. One is, that sequence no longer triggers any warning. Another is, the tracepoint when enabled doesn't even do that sequence anymore. The main check we care about today is whether RCU is "watching" or not. So if lockdep is enabled, always check if rcu_is_watching() which will trigger a warning if it is not (tracepoints require RCU to be watching). Note, that old sequence did add a bit of overhead when lockdep was enabled, and with the latest kernel updates, would cause the system to slow down enough to trigger kernel "stalled" warnings. Link: http://lore.kernel.org/lkml/20140806181801.GA4605@redhat.com Link: http://lore.kernel.org/lkml/20140807175204.C257CAC5@viggo.jf.intel.com Link: https://lore.kernel.org/lkml/20230307184645.521db5c9@gandalf.local.home/ Link: https://lore.kernel.org/linux-trace-kernel/20230310172856.77406446@gandalf.local.home Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Dave Hansen Cc: "Paul E. McKenney" Cc: Mathieu Desnoyers Cc: Joel Fernandes Acked-by: Peter Zijlstra (Intel) Acked-by: Paul E. McKenney Fixes: e6753f23d961 ("tracepoint: Make rcuidle tracepoint callers use SRCU") Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint.h | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index fa1004fcf810..2083f2d2f05b 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -231,12 +231,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. * - * When lockdep is enabled, we make sure to always do the RCU portions of - * the tracepoint code, regardless of whether tracing is on. However, - * don't check if the condition is false, due to interaction with idle - * instrumentation. This lets us find RCU issues triggered with tracepoints - * even when this tracepoint is off. This code has no purpose other than - * poking RCU a bit. + * When lockdep is enabled, we make sure to always test if RCU is + * "watching" regardless if the tracepoint is enabled or not. Tracepoints + * require RCU to be active, and it should always warn at the tracepoint + * site if it is not watching, as it will need to be active when the + * tracepoint is enabled. */ #define __DECLARE_TRACE(name, proto, args, cond, data_proto) \ extern int __traceiter_##name(data_proto); \ @@ -249,9 +248,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) TP_ARGS(args), \ TP_CONDITION(cond), 0); \ if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ - rcu_read_lock_sched_notrace(); \ - rcu_dereference_sched(__tracepoint_##name.funcs);\ - rcu_read_unlock_sched_notrace(); \ + WARN_ON_ONCE(!rcu_is_watching()); \ } \ } \ __DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args), \ -- cgit v1.2.3 From 4b397c06cb987935b1b097336532aa6b4210e091 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 10 Mar 2023 19:11:09 +0000 Subject: net: tunnels: annotate lockless accesses to dev->needed_headroom IP tunnels can apparently update dev->needed_headroom in their xmit path. This patch takes care of three tunnels xmit, and also the core LL_RESERVED_SPACE() and LL_RESERVED_SPACE_EXTRA() helpers. More changes might be needed for completeness. BUG: KCSAN: data-race in ip_tunnel_xmit / ip_tunnel_xmit read to 0xffff88815b9da0ec of 2 bytes by task 888 on cpu 1: ip_tunnel_xmit+0x1270/0x1730 net/ipv4/ip_tunnel.c:803 __gre_xmit net/ipv4/ip_gre.c:469 [inline] ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661 __netdev_start_xmit include/linux/netdevice.h:4881 [inline] netdev_start_xmit include/linux/netdevice.h:4895 [inline] xmit_one net/core/dev.c:3580 [inline] dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596 __dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246 dev_queue_xmit include/linux/netdevice.h:3051 [inline] neigh_direct_output+0x17/0x20 net/core/neighbour.c:1623 neigh_output include/net/neighbour.h:546 [inline] ip_finish_output2+0x740/0x840 net/ipv4/ip_output.c:228 ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:316 NF_HOOK_COND include/linux/netfilter.h:291 [inline] ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:430 dst_output include/net/dst.h:444 [inline] ip_local_out+0x64/0x80 net/ipv4/ip_output.c:126 iptunnel_xmit+0x34a/0x4b0 net/ipv4/ip_tunnel_core.c:82 ip_tunnel_xmit+0x1451/0x1730 net/ipv4/ip_tunnel.c:813 __gre_xmit net/ipv4/ip_gre.c:469 [inline] ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661 __netdev_start_xmit include/linux/netdevice.h:4881 [inline] netdev_start_xmit include/linux/netdevice.h:4895 [inline] xmit_one net/core/dev.c:3580 [inline] dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596 __dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246 dev_queue_xmit include/linux/netdevice.h:3051 [inline] neigh_direct_output+0x17/0x20 net/core/neighbour.c:1623 neigh_output include/net/neighbour.h:546 [inline] ip_finish_output2+0x740/0x840 net/ipv4/ip_output.c:228 ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:316 NF_HOOK_COND include/linux/netfilter.h:291 [inline] ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:430 dst_output include/net/dst.h:444 [inline] ip_local_out+0x64/0x80 net/ipv4/ip_output.c:126 iptunnel_xmit+0x34a/0x4b0 net/ipv4/ip_tunnel_core.c:82 ip_tunnel_xmit+0x1451/0x1730 net/ipv4/ip_tunnel.c:813 __gre_xmit net/ipv4/ip_gre.c:469 [inline] ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661 __netdev_start_xmit include/linux/netdevice.h:4881 [inline] netdev_start_xmit include/linux/netdevice.h:4895 [inline] xmit_one net/core/dev.c:3580 [inline] dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596 __dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246 dev_queue_xmit include/linux/netdevice.h:3051 [inline] neigh_direct_output+0x17/0x20 net/core/neighbour.c:1623 neigh_output include/net/neighbour.h:546 [inline] ip_finish_output2+0x740/0x840 net/ipv4/ip_output.c:228 ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:316 NF_HOOK_COND include/linux/netfilter.h:291 [inline] ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:430 dst_output include/net/dst.h:444 [inline] ip_local_out+0x64/0x80 net/ipv4/ip_output.c:126 iptunnel_xmit+0x34a/0x4b0 net/ipv4/ip_tunnel_core.c:82 ip_tunnel_xmit+0x1451/0x1730 net/ipv4/ip_tunnel.c:813 __gre_xmit net/ipv4/ip_gre.c:469 [inline] ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661 __netdev_start_xmit include/linux/netdevice.h:4881 [inline] netdev_start_xmit include/linux/netdevice.h:4895 [inline] xmit_one net/core/dev.c:3580 [inline] dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596 __dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246 dev_queue_xmit include/linux/netdevice.h:3051 [inline] neigh_direct_output+0x17/0x20 net/core/neighbour.c:1623 neigh_output include/net/neighbour.h:546 [inline] ip_finish_output2+0x740/0x840 net/ipv4/ip_output.c:228 ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:316 NF_HOOK_COND include/linux/netfilter.h:291 [inline] ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:430 dst_output include/net/dst.h:444 [inline] ip_local_out+0x64/0x80 net/ipv4/ip_output.c:126 iptunnel_xmit+0x34a/0x4b0 net/ipv4/ip_tunnel_core.c:82 ip_tunnel_xmit+0x1451/0x1730 net/ipv4/ip_tunnel.c:813 __gre_xmit net/ipv4/ip_gre.c:469 [inline] ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661 __netdev_start_xmit include/linux/netdevice.h:4881 [inline] netdev_start_xmit include/linux/netdevice.h:4895 [inline] xmit_one net/core/dev.c:3580 [inline] dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596 __dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246 dev_queue_xmit include/linux/netdevice.h:3051 [inline] neigh_direct_output+0x17/0x20 net/core/neighbour.c:1623 neigh_output include/net/neighbour.h:546 [inline] ip_finish_output2+0x740/0x840 net/ipv4/ip_output.c:228 ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:316 NF_HOOK_COND include/linux/netfilter.h:291 [inline] ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:430 dst_output include/net/dst.h:444 [inline] ip_local_out+0x64/0x80 net/ipv4/ip_output.c:126 iptunnel_xmit+0x34a/0x4b0 net/ipv4/ip_tunnel_core.c:82 ip_tunnel_xmit+0x1451/0x1730 net/ipv4/ip_tunnel.c:813 __gre_xmit net/ipv4/ip_gre.c:469 [inline] ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661 __netdev_start_xmit include/linux/netdevice.h:4881 [inline] netdev_start_xmit include/linux/netdevice.h:4895 [inline] xmit_one net/core/dev.c:3580 [inline] dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596 __dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246 dev_queue_xmit include/linux/netdevice.h:3051 [inline] neigh_direct_output+0x17/0x20 net/core/neighbour.c:1623 neigh_output include/net/neighbour.h:546 [inline] ip_finish_output2+0x740/0x840 net/ipv4/ip_output.c:228 ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:316 NF_HOOK_COND include/linux/netfilter.h:291 [inline] ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:430 dst_output include/net/dst.h:444 [inline] ip_local_out+0x64/0x80 net/ipv4/ip_output.c:126 iptunnel_xmit+0x34a/0x4b0 net/ipv4/ip_tunnel_core.c:82 ip_tunnel_xmit+0x1451/0x1730 net/ipv4/ip_tunnel.c:813 __gre_xmit net/ipv4/ip_gre.c:469 [inline] ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661 __netdev_start_xmit include/linux/netdevice.h:4881 [inline] netdev_start_xmit include/linux/netdevice.h:4895 [inline] xmit_one net/core/dev.c:3580 [inline] dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596 __dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246 write to 0xffff88815b9da0ec of 2 bytes by task 2379 on cpu 0: ip_tunnel_xmit+0x1294/0x1730 net/ipv4/ip_tunnel.c:804 __gre_xmit net/ipv4/ip_gre.c:469 [inline] ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661 __netdev_start_xmit include/linux/netdevice.h:4881 [inline] netdev_start_xmit include/linux/netdevice.h:4895 [inline] xmit_one net/core/dev.c:3580 [inline] dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596 __dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246 dev_queue_xmit include/linux/netdevice.h:3051 [inline] neigh_direct_output+0x17/0x20 net/core/neighbour.c:1623 neigh_output include/net/neighbour.h:546 [inline] ip6_finish_output2+0x9bc/0xc50 net/ipv6/ip6_output.c:134 __ip6_finish_output net/ipv6/ip6_output.c:195 [inline] ip6_finish_output+0x39a/0x4e0 net/ipv6/ip6_output.c:206 NF_HOOK_COND include/linux/netfilter.h:291 [inline] ip6_output+0xeb/0x220 net/ipv6/ip6_output.c:227 dst_output include/net/dst.h:444 [inline] NF_HOOK include/linux/netfilter.h:302 [inline] mld_sendpack+0x438/0x6a0 net/ipv6/mcast.c:1820 mld_send_cr net/ipv6/mcast.c:2121 [inline] mld_ifc_work+0x519/0x7b0 net/ipv6/mcast.c:2653 process_one_work+0x3e6/0x750 kernel/workqueue.c:2390 worker_thread+0x5f2/0xa10 kernel/workqueue.c:2537 kthread+0x1ac/0x1e0 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308 value changed: 0x0dd4 -> 0x0e14 Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 2379 Comm: kworker/0:0 Not tainted 6.3.0-rc1-syzkaller-00002-g8ca09d5fa354-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/02/2023 Workqueue: mld mld_ifc_work Fixes: 8eb30be0352d ("ipv6: Create ip6_tnl_xmit") Reported-by: syzbot Signed-off-by: Eric Dumazet Link: https://lore.kernel.org/r/20230310191109.2384387-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6a14b7b11766..470085b121d3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -297,9 +297,11 @@ struct hh_cache { * relationship HH alignment <= LL alignment. */ #define LL_RESERVED_SPACE(dev) \ - ((((dev)->hard_header_len+(dev)->needed_headroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD) + ((((dev)->hard_header_len + READ_ONCE((dev)->needed_headroom)) \ + & ~(HH_DATA_MOD - 1)) + HH_DATA_MOD) #define LL_RESERVED_SPACE_EXTRA(dev,extra) \ - ((((dev)->hard_header_len+(dev)->needed_headroom+(extra))&~(HH_DATA_MOD - 1)) + HH_DATA_MOD) + ((((dev)->hard_header_len + READ_ONCE((dev)->needed_headroom) + (extra)) \ + & ~(HH_DATA_MOD - 1)) + HH_DATA_MOD) struct header_ops { int (*create) (struct sk_buff *skb, struct net_device *dev, -- cgit v1.2.3 From 8e19b87cfce2de2125f11363d7dea3d08f16ccae Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Thu, 9 Mar 2023 23:31:18 +0900 Subject: nvme-trace: show more opcode names We have more commands to show in the trace. Sync up. Signed-off-by: Minwoo Im Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 4fad4aa245fb..779507ac750b 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -812,6 +812,7 @@ enum nvme_opcode { nvme_opcode_name(nvme_cmd_compare), \ nvme_opcode_name(nvme_cmd_write_zeroes), \ nvme_opcode_name(nvme_cmd_dsm), \ + nvme_opcode_name(nvme_cmd_verify), \ nvme_opcode_name(nvme_cmd_resv_register), \ nvme_opcode_name(nvme_cmd_resv_report), \ nvme_opcode_name(nvme_cmd_resv_acquire), \ @@ -1144,10 +1145,14 @@ enum nvme_admin_opcode { nvme_admin_opcode_name(nvme_admin_ns_mgmt), \ nvme_admin_opcode_name(nvme_admin_activate_fw), \ nvme_admin_opcode_name(nvme_admin_download_fw), \ + nvme_admin_opcode_name(nvme_admin_dev_self_test), \ nvme_admin_opcode_name(nvme_admin_ns_attach), \ nvme_admin_opcode_name(nvme_admin_keep_alive), \ nvme_admin_opcode_name(nvme_admin_directive_send), \ nvme_admin_opcode_name(nvme_admin_directive_recv), \ + nvme_admin_opcode_name(nvme_admin_virtual_mgmt), \ + nvme_admin_opcode_name(nvme_admin_nvme_mi_send), \ + nvme_admin_opcode_name(nvme_admin_nvme_mi_recv), \ nvme_admin_opcode_name(nvme_admin_dbbuf), \ nvme_admin_opcode_name(nvme_admin_format_nvm), \ nvme_admin_opcode_name(nvme_admin_security_send), \ -- cgit v1.2.3 From 5f27571382ca42daa3e3d40d1b252bf18c2b61d2 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 23 Feb 2023 17:12:26 +0800 Subject: block: count 'ios' and 'sectors' when io is done for bio-based device While using iostat for raid, I observed very strange 'await' occasionally, and turns out it's due to that 'ios' and 'sectors' is counted in bdev_start_io_acct(), while 'nsecs' is counted in bdev_end_io_acct(). I'm not sure why they are ccounted like that but I think this behaviour is obviously wrong because user will get wrong disk stats. Fix the problem by counting 'ios' and 'sectors' when io is done, like what rq-based device does. Fixes: 394ffa503bc4 ("blk: introduce generic io stat accounting help function") Signed-off-by: Yu Kuai Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20230223091226.1135678-1-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d1aee08f8c18..941304f17492 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1446,11 +1446,10 @@ static inline void blk_wake_io_task(struct task_struct *waiter) wake_up_process(waiter); } -unsigned long bdev_start_io_acct(struct block_device *bdev, - unsigned int sectors, enum req_op op, +unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op, unsigned long start_time); void bdev_end_io_acct(struct block_device *bdev, enum req_op op, - unsigned long start_time); + unsigned int sectors, unsigned long start_time); unsigned long bio_start_io_acct(struct bio *bio); void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, -- cgit v1.2.3 From 3615c78673c332b69aaacefbcde5937c5c706686 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 14 Mar 2023 13:31:02 +0100 Subject: efi: sysfb_efi: Fix DMI quirks not working for simpledrm Commit 8633ef82f101 ("drivers/firmware: consolidate EFI framebuffer setup for all arches") moved the sysfb_apply_efi_quirks() call in sysfb_init() from before the [sysfb_]parse_mode() call to after it. But sysfb_apply_efi_quirks() modifies the global screen_info struct which [sysfb_]parse_mode() parses, so doing it later is too late. This has broken all DMI based quirks for correcting wrong firmware efifb settings when simpledrm is used. To fix this move the sysfb_apply_efi_quirks() call back to its old place and split the new setup of the efifb_fwnode (which requires the platform_device) into its own function and call that at the place of the moved sysfb_apply_efi_quirks(pd) calls. Fixes: 8633ef82f101 ("drivers/firmware: consolidate EFI framebuffer setup for all arches") Cc: stable@vger.kernel.org Cc: Javier Martinez Canillas Cc: Thomas Zimmermann Signed-off-by: Hans de Goede Reviewed-by: Javier Martinez Canillas Signed-off-by: Ard Biesheuvel --- include/linux/sysfb.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index 8ba8b5be5567..c1ef5fc60a3c 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -70,11 +70,16 @@ static inline void sysfb_disable(void) #ifdef CONFIG_EFI extern struct efifb_dmi_info efifb_dmi_list[]; -void sysfb_apply_efi_quirks(struct platform_device *pd); +void sysfb_apply_efi_quirks(void); +void sysfb_set_efifb_fwnode(struct platform_device *pd); #else /* CONFIG_EFI */ -static inline void sysfb_apply_efi_quirks(struct platform_device *pd) +static inline void sysfb_apply_efi_quirks(void) +{ +} + +static inline void sysfb_set_efifb_fwnode(struct platform_device *pd) { } -- cgit v1.2.3 From 99669259f3361d759219811e670b7e0742668556 Mon Sep 17 00:00:00 2001 From: Maxime Bizon Date: Thu, 16 Mar 2023 16:33:16 -0700 Subject: net: mdio: fix owner field for mdio buses registered using device-tree Bus ownership is wrong when using of_mdiobus_register() to register an mdio bus. That function is not inline, so when it calls mdiobus_register() the wrong THIS_MODULE value is captured. Signed-off-by: Maxime Bizon Fixes: 90eff9096c01 ("net: phy: Allow splitting MDIO bus/device support from PHYs") [florian: fix kdoc, added Fixes tag] Signed-off-by: Florian Fainelli Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/of_mdio.h | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index da633d34ab86..8a52ef2e6fa6 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -14,9 +14,25 @@ #if IS_ENABLED(CONFIG_OF_MDIO) bool of_mdiobus_child_is_phy(struct device_node *child); -int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np); -int devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, - struct device_node *np); +int __of_mdiobus_register(struct mii_bus *mdio, struct device_node *np, + struct module *owner); + +static inline int of_mdiobus_register(struct mii_bus *mdio, + struct device_node *np) +{ + return __of_mdiobus_register(mdio, np, THIS_MODULE); +} + +int __devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, + struct device_node *np, struct module *owner); + +static inline int devm_of_mdiobus_register(struct device *dev, + struct mii_bus *mdio, + struct device_node *np) +{ + return __devm_of_mdiobus_register(dev, mdio, np, THIS_MODULE); +} + struct mdio_device *of_mdio_find_device(struct device_node *np); struct phy_device *of_phy_find_device(struct device_node *phy_np); struct phy_device * -- cgit v1.2.3 From 30b605b8501e321f79e19c3238aa6ca31da6087c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 16 Mar 2023 16:33:17 -0700 Subject: net: mdio: fix owner field for mdio buses registered using ACPI Bus ownership is wrong when using acpi_mdiobus_register() to register an mdio bus. That function is not inline, so when it calls mdiobus_register() the wrong THIS_MODULE value is captured. CC: Maxime Bizon Fixes: 803ca24d2f92 ("net: mdio: Add ACPI support code for mdio") Signed-off-by: Florian Fainelli Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/acpi_mdio.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi_mdio.h b/include/linux/acpi_mdio.h index 0a24ab7cb66f..8e2eefa9fbc0 100644 --- a/include/linux/acpi_mdio.h +++ b/include/linux/acpi_mdio.h @@ -9,7 +9,14 @@ #include #if IS_ENABLED(CONFIG_ACPI_MDIO) -int acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode); +int __acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode, + struct module *owner); + +static inline int +acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *handle) +{ + return __acpi_mdiobus_register(mdio, handle, THIS_MODULE); +} #else /* CONFIG_ACPI_MDIO */ static inline int acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode) -- cgit v1.2.3 From 070246e4674b125860d311c18ce2623e73e2bd51 Mon Sep 17 00:00:00 2001 From: Jochen Henneberg Date: Fri, 17 Mar 2023 09:08:17 +0100 Subject: net: stmmac: Fix for mismatched host/device DMA address width Currently DMA address width is either read from a RO device register or force set from the platform data. This breaks DMA when the host DMA address width is <=32it but the device is >32bit. Right now the driver may decide to use a 2nd DMA descriptor for another buffer (happens in case of TSO xmit) assuming that 32bit addressing is used due to platform configuration but the device will still use both descriptor addresses as one address. This can be observed with the Intel EHL platform driver that sets 32bit for addr64 but the MAC reports 40bit. The TX queue gets stuck in case of TCP with iptables NAT configuration on TSO packets. The logic should be like this: Whatever we do on the host side (memory allocation GFP flags) should happen with the host DMA width, whenever we decide how to set addresses on the device registers we must use the device DMA address width. This patch renames the platform address width field from addr64 (term used in device datasheet) to host_addr and uses this value exclusively for host side operations while all chip operations consider the device DMA width as read from the device register. Fixes: 7cfc4486e7ea ("stmmac: intel: Configure EHL PSE0 GbE and PSE1 GbE to 32 bits DMA addressing") Signed-off-by: Jochen Henneberg Signed-off-by: David S. Miller --- include/linux/stmmac.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index a152678b82b7..a2414c187483 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -215,7 +215,7 @@ struct plat_stmmacenet_data { int unicast_filter_entries; int tx_fifo_size; int rx_fifo_size; - u32 addr64; + u32 host_dma_width; u32 rx_queues_to_use; u32 tx_queues_to_use; u8 rx_sched_algorithm; -- cgit v1.2.3 From 1470afefc3c42df5d1662f87d079b46651bdc95b Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 15 Mar 2023 17:31:02 -0700 Subject: cpumask: introduce for_each_cpu_or Equivalent of for_each_cpu_and, except it ORs the two masks together so it iterates all the CPUs present in either mask. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/cpumask.h | 17 +++++++++++++++++ include/linux/find.h | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 8fbe76607965..220974ef1bf5 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -350,6 +350,23 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta #define for_each_cpu_andnot(cpu, mask1, mask2) \ for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits) +/** + * for_each_cpu_or - iterate over every cpu present in either mask + * @cpu: the (optionally unsigned) integer iterator + * @mask1: the first cpumask pointer + * @mask2: the second cpumask pointer + * + * This saves a temporary CPU mask in many places. It is equivalent to: + * struct cpumask tmp; + * cpumask_or(&tmp, &mask1, &mask2); + * for_each_cpu(cpu, &tmp) + * ... + * + * After the loop, cpu is >= nr_cpu_ids. + */ +#define for_each_cpu_or(cpu, mask1, mask2) \ + for_each_or_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits) + /** * cpumask_any_but - return a "random" in a cpumask, but not this one. * @mask: the cpumask to search diff --git a/include/linux/find.h b/include/linux/find.h index 4647864a5ffd..5e4f39ef2e72 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -14,6 +14,8 @@ unsigned long _find_next_and_bit(const unsigned long *addr1, const unsigned long unsigned long nbits, unsigned long start); unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long nbits, unsigned long start); +unsigned long _find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long nbits, unsigned long start); unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits, unsigned long start); extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size); @@ -127,6 +129,36 @@ unsigned long find_next_andnot_bit(const unsigned long *addr1, } #endif +#ifndef find_next_or_bit +/** + * find_next_or_bit - find the next set bit in either memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @size: The bitmap size in bits + * @offset: The bitnumber to start searching at + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +static inline +unsigned long find_next_or_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = (*addr1 | *addr2) & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + + return _find_next_or_bit(addr1, addr2, size, offset); +} +#endif + #ifndef find_next_zero_bit /** * find_next_zero_bit - find the next cleared bit in a memory region @@ -536,6 +568,11 @@ unsigned long find_next_bit_le(const void *addr, unsigned (bit) = find_next_andnot_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\ (bit)++) +#define for_each_or_bit(bit, addr1, addr2, size) \ + for ((bit) = 0; \ + (bit) = find_next_or_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\ + (bit)++) + /* same as for_each_set_bit() but use bit as value to start with */ #define for_each_set_bit_from(bit, addr, size) \ for (; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++) -- cgit v1.2.3 From e9b60c7f97130795c7aa81a649ae4b93a172a277 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 15 Mar 2023 17:31:03 -0700 Subject: pcpcntr: remove percpu_counter_sum_all() percpu_counter_sum_all() is now redundant as the race condition it was invented to handle is now dealt with by percpu_counter_sum() directly and all users of percpu_counter_sum_all() have been removed. Remove it. This effectively reverts the changes made in f689054aace2 ("percpu_counter: add percpu_counter_sum_all interface") except for the cpumask iteration that fixes percpu_counter_sum() made earlier in this series. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/percpu_counter.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 521a733e21a9..75b73c83bc9d 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -45,7 +45,6 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch); s64 __percpu_counter_sum(struct percpu_counter *fbc); -s64 percpu_counter_sum_all(struct percpu_counter *fbc); int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); void percpu_counter_sync(struct percpu_counter *fbc); @@ -196,11 +195,6 @@ static inline s64 percpu_counter_sum(struct percpu_counter *fbc) return percpu_counter_read(fbc); } -static inline s64 percpu_counter_sum_all(struct percpu_counter *fbc) -{ - return percpu_counter_read(fbc); -} - static inline bool percpu_counter_initialized(struct percpu_counter *fbc) { return true; -- cgit v1.2.3 From 9d2789ac9d60c049d26ef6d3005d9c94c5a559e9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 20 Mar 2023 20:01:25 -0600 Subject: block/io_uring: pass in issue_flags for uring_cmd task_work handling io_uring_cmd_done() currently assumes that the uring_lock is held when invoked, and while it generally is, this is not guaranteed. Pass in the issue_flags associated with it, so that we have IO_URING_F_UNLOCKED available to be able to lock the CQ ring appropriately when completing events. Cc: stable@vger.kernel.org Fixes: ee692a21e9bf ("fs,io_uring: add infrastructure for uring-cmd") Signed-off-by: Jens Axboe --- include/linux/io_uring.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 934e5dd4ccc0..35b9328ca335 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -27,7 +27,7 @@ struct io_uring_cmd { const void *cmd; union { /* callback to defer completions to task context */ - void (*task_work_cb)(struct io_uring_cmd *cmd); + void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned); /* used for polled completion */ void *cookie; }; @@ -39,9 +39,10 @@ struct io_uring_cmd { #if defined(CONFIG_IO_URING) int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, struct iov_iter *iter, void *ioucmd); -void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2); +void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2, + unsigned issue_flags); void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *)); + void (*task_work_cb)(struct io_uring_cmd *, unsigned)); struct sock *io_uring_get_socket(struct file *file); void __io_uring_cancel(bool cancel_all); void __io_uring_free(struct task_struct *tsk); @@ -72,11 +73,11 @@ static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, return -EOPNOTSUPP; } static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, - ssize_t ret2) + ssize_t ret2, unsigned issue_flags) { } static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *)) + void (*task_work_cb)(struct io_uring_cmd *, unsigned)) { } static inline struct sock *io_uring_get_socket(struct file *file) -- cgit v1.2.3 From f87d28673b71b35b248231a2086f9404afbb7f28 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Sat, 25 Feb 2023 16:01:36 -0800 Subject: entry: Fix noinstr warning in __enter_from_user_mode() __enter_from_user_mode() is triggering noinstr warnings with CONFIG_DEBUG_PREEMPT due to its call of preempt_count_add() via ct_state(). The preemption disable isn't needed as interrupts are already disabled. And the context_tracking_enabled() check in ct_state() also isn't needed as that's already being done by the CT_WARN_ON(). Just use __ct_state() instead. Fixes the following warnings: vmlinux.o: warning: objtool: enter_from_user_mode+0xba: call to preempt_count_add() leaves .noinstr.text section vmlinux.o: warning: objtool: syscall_enter_from_user_mode+0xf9: call to preempt_count_add() leaves .noinstr.text section vmlinux.o: warning: objtool: syscall_enter_from_user_mode_prepare+0xc7: call to preempt_count_add() leaves .noinstr.text section vmlinux.o: warning: objtool: irqentry_enter_from_user_mode+0xba: call to preempt_count_add() leaves .noinstr.text section Fixes: 171476775d32 ("context_tracking: Convert state to atomic_t") Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/d8955fa6d68dc955dda19baf13ae014ae27926f5.1677369694.git.jpoimboe@kernel.org --- include/linux/context_tracking.h | 1 + include/linux/context_tracking_state.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index d4afa8508a80..3a7909ed5498 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -96,6 +96,7 @@ static inline void user_exit_irqoff(void) { } static inline int exception_enter(void) { return 0; } static inline void exception_exit(enum ctx_state prev_ctx) { } static inline int ct_state(void) { return -1; } +static inline int __ct_state(void) { return -1; } static __always_inline bool context_tracking_guest_enter(void) { return false; } static inline void context_tracking_guest_exit(void) { } #define CT_WARN_ON(cond) do { } while (0) diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 4a4d56f77180..fdd537ea513f 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -46,7 +46,9 @@ struct context_tracking { #ifdef CONFIG_CONTEXT_TRACKING DECLARE_PER_CPU(struct context_tracking, context_tracking); +#endif +#ifdef CONFIG_CONTEXT_TRACKING_USER static __always_inline int __ct_state(void) { return arch_atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_STATE_MASK; -- cgit v1.2.3 From fbaa38214cd9e150764ccaa82e04ecf42cc1140c Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 11 Mar 2023 15:40:01 +0100 Subject: cxl/pci: Fix CDAT retrieval on big endian The CDAT exposed in sysfs differs between little endian and big endian arches: On big endian, every 4 bytes are byte-swapped. PCI Configuration Space is little endian (PCI r3.0 sec 6.1). Accessors such as pci_read_config_dword() implicitly swap bytes on big endian. That way, the macros in include/uapi/linux/pci_regs.h work regardless of the arch's endianness. For an example of implicit byte-swapping, see ppc4xx_pciex_read_config(), which calls in_le32(), which uses lwbrx (Load Word Byte-Reverse Indexed). DOE Read/Write Data Mailbox Registers are unlike other registers in Configuration Space in that they contain or receive a 4 byte portion of an opaque byte stream (a "Data Object" per PCIe r6.0 sec 7.9.24.5f). They need to be copied to or from the request/response buffer verbatim. So amend pci_doe_send_req() and pci_doe_recv_resp() to undo the implicit byte-swapping. The CXL_DOE_TABLE_ACCESS_* and PCI_DOE_DATA_OBJECT_DISC_* macros assume implicit byte-swapping. Byte-swap requests after constructing them with those macros and byte-swap responses before parsing them. Change the request and response type to __le32 to avoid sparse warnings. Per a request from Jonathan, replace sizeof(u32) with sizeof(__le32) for consistency. Fixes: c97006046c79 ("cxl/port: Read CDAT table") Tested-by: Ira Weiny Signed-off-by: Lukas Wunner Reviewed-by: Dan Williams Cc: stable@vger.kernel.org # v6.0+ Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/3051114102f41d19df3debbee123129118fc5e6d.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams --- include/linux/pci-doe.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci-doe.h b/include/linux/pci-doe.h index ed9b4df792b8..43765eaf2342 100644 --- a/include/linux/pci-doe.h +++ b/include/linux/pci-doe.h @@ -34,6 +34,10 @@ struct pci_doe_mb; * @work: Used internally by the mailbox * @doe_mb: Used internally by the mailbox * + * Payloads are treated as opaque byte streams which are transmitted verbatim, + * without byte-swapping. If payloads contain little-endian register values, + * the caller is responsible for conversion with cpu_to_le32() / le32_to_cpu(). + * * The payload sizes and rv are specified in bytes with the following * restrictions concerning the protocol. * @@ -45,9 +49,9 @@ struct pci_doe_mb; */ struct pci_doe_task { struct pci_doe_protocol prot; - u32 *request_pl; + __le32 *request_pl; size_t request_pl_sz; - u32 *response_pl; + __le32 *response_pl; size_t response_pl_sz; int rv; void (*complete)(struct pci_doe_task *task); -- cgit v1.2.3 From aa01c67de5926fdb276793180564f172c55fb0d7 Mon Sep 17 00:00:00 2001 From: Caleb Sander Date: Mon, 20 Mar 2023 09:57:36 -0600 Subject: nvme-tcp: fix nvme_tcp_term_pdu to match spec The FEI field of C2HTermReq/H2CTermReq is 4 bytes but not 4-byte-aligned in the NVMe/TCP specification (it is located at offset 10 in the PDU). Split it into two 16-bit integers in struct nvme_tcp_term_pdu so no padding is inserted. There should also be 10 reserved bytes after. There are currently no users of this type. Fixes: fc221d05447aa6db ("nvme-tcp: Add protocol header") Reported-by: Geert Uytterhoeven Signed-off-by: Caleb Sander Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/linux/nvme-tcp.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h index 75470159a194..57ebe1267f7f 100644 --- a/include/linux/nvme-tcp.h +++ b/include/linux/nvme-tcp.h @@ -115,8 +115,9 @@ struct nvme_tcp_icresp_pdu { struct nvme_tcp_term_pdu { struct nvme_tcp_hdr hdr; __le16 fes; - __le32 fei; - __u8 rsvd[8]; + __le16 feil; + __le16 feiu; + __u8 rsvd[10]; }; /** -- cgit v1.2.3 From 790930f44289c8209c57461b2db499fcc702e0b3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Mar 2023 18:01:26 +0100 Subject: thermal: core: Introduce thermal_cooling_device_update() Introduce a core thermal API function, thermal_cooling_device_update(), for updating the max_state value for a cooling device and rearranging its statistics in sysfs after a possible change of its ->get_max_state() callback return value. That callback is now invoked only once, during cooling device registration, to populate the max_state field in the cooling device object, so if its return value changes, it needs to be invoked again and the new return value needs to be stored as max_state. Moreover, the statistics presented in sysfs need to be rearranged in general, because there may not be enough room in them to store data for all of the possible states (in the case when max_state grows). The new function takes care of that (and some other minor things related to it), but some extra locking and lockdep annotations are added in several places too to protect against crashes in the cases when the statistics are not present or when a stale max_state value might be used by sysfs attributes. Note that the actual user of the new function will be added separately. Link: https://lore.kernel.org/linux-pm/53ec1f06f61c984100868926f282647e57ecfb2d.camel@intel.com/ Signed-off-by: Rafael J. Wysocki Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- include/linux/thermal.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 2bb4bf33f4f3..13c6aaed18df 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -384,6 +384,7 @@ devm_thermal_of_cooling_device_register(struct device *dev, struct device_node *np, char *type, void *devdata, const struct thermal_cooling_device_ops *ops); +void thermal_cooling_device_update(struct thermal_cooling_device *); void thermal_cooling_device_unregister(struct thermal_cooling_device *); struct thermal_zone_device *thermal_zone_get_zone_by_name(const char *name); int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp); -- cgit v1.2.3 From a90ac762d345890b40d88a1385a34a2449c2d75e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 24 Mar 2023 09:23:42 +0000 Subject: net: sfp: make sfp_bus_find_fwnode() take a const fwnode sfp_bus_find_fwnode() does not write to the fwnode, so let's make it const. Signed-off-by: Russell King (Oracle) Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/sfp.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 52b98f9666a2..ef06a195b3c2 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -557,7 +557,7 @@ int sfp_get_module_eeprom_by_page(struct sfp_bus *bus, void sfp_upstream_start(struct sfp_bus *bus); void sfp_upstream_stop(struct sfp_bus *bus); void sfp_bus_put(struct sfp_bus *bus); -struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode); +struct sfp_bus *sfp_bus_find_fwnode(const struct fwnode_handle *fwnode); int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream, const struct sfp_upstream_ops *ops); void sfp_bus_del_upstream(struct sfp_bus *bus); @@ -619,7 +619,8 @@ static inline void sfp_bus_put(struct sfp_bus *bus) { } -static inline struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode) +static inline struct sfp_bus * +sfp_bus_find_fwnode(const struct fwnode_handle *fwnode) { return NULL; } -- cgit v1.2.3 From 4a0faa02d419a6728abef0f1d8a32d8c35ef95e6 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 24 Mar 2023 09:23:53 +0000 Subject: net: phy: constify fwnode_get_phy_node() fwnode argument fwnode_get_phy_node() does not motify the fwnode structure, so make the argument const, Signed-off-by: Russell King (Oracle) Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/phy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 36bf0bbc8efa..db7c0bd67559 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1547,7 +1547,7 @@ int fwnode_get_phy_id(struct fwnode_handle *fwnode, u32 *phy_id); struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode); struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode); struct phy_device *device_phy_find_device(struct device *dev); -struct fwnode_handle *fwnode_get_phy_node(struct fwnode_handle *fwnode); +struct fwnode_handle *fwnode_get_phy_node(const struct fwnode_handle *fwnode); struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45); int phy_device_register(struct phy_device *phy); void phy_device_free(struct phy_device *phydev); -- cgit v1.2.3 From d583fbd7066a2dea43050521a95d9770f7d7593e Mon Sep 17 00:00:00 2001 From: Dmytro Maluka Date: Wed, 22 Mar 2023 21:43:43 +0100 Subject: KVM: irqfd: Make resampler_list an RCU list It is useful to be able to do read-only traversal of the list of all the registered irqfd resamplers without locking the resampler_lock mutex. In particular, we are going to traverse it to search for a resampler registered for the given irq of an irqchip, and that will be done with an irqchip spinlock (ioapic->lock) held, so it is undesirable to lock a mutex in this context. So turn this list into an RCU list. For protecting the read side, reuse kvm->irq_srcu which is already used for protecting a number of irq related things (kvm->irq_routing, irqfd->resampler->list, kvm->irq_ack_notifier_list, kvm->arch.mask_notifier_list). Signed-off-by: Dmytro Maluka Message-Id: <20230322204344.50138-2-dmy@semihalf.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + include/linux/kvm_irqfd.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8ada23756b0e..9f508c8e66e1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -755,6 +755,7 @@ struct kvm { struct { spinlock_t lock; struct list_head items; + /* resampler_list update side is protected by resampler_lock. */ struct list_head resampler_list; struct mutex resampler_lock; } irqfds; diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h index dac047abdba7..8ad43692e3bb 100644 --- a/include/linux/kvm_irqfd.h +++ b/include/linux/kvm_irqfd.h @@ -31,7 +31,7 @@ struct kvm_kernel_irqfd_resampler { /* * Entry in list of kvm->irqfd.resampler_list. Use for sharing * resamplers among irqfds on the same gsi. - * Accessed and modified under kvm->irqfds.resampler_lock + * RCU list modified under kvm->irqfds.resampler_lock */ struct list_head link; }; -- cgit v1.2.3 From fef8f2b90edbd7089a4278021314f11f056b0cbb Mon Sep 17 00:00:00 2001 From: Dmytro Maluka Date: Wed, 22 Mar 2023 21:43:44 +0100 Subject: KVM: x86/ioapic: Resample the pending state of an IRQ when unmasking KVM irqfd based emulation of level-triggered interrupts doesn't work quite correctly in some cases, particularly in the case of interrupts that are handled in a Linux guest as oneshot interrupts (IRQF_ONESHOT). Such an interrupt is acked to the device in its threaded irq handler, i.e. later than it is acked to the interrupt controller (EOI at the end of hardirq), not earlier. Linux keeps such interrupt masked until its threaded handler finishes, to prevent the EOI from re-asserting an unacknowledged interrupt. However, with KVM + vfio (or whatever is listening on the resamplefd) we always notify resamplefd at the EOI, so vfio prematurely unmasks the host physical IRQ, thus a new physical interrupt is fired in the host. This extra interrupt in the host is not a problem per se. The problem is that it is unconditionally queued for injection into the guest, so the guest sees an extra bogus interrupt. [*] There are observed at least 2 user-visible issues caused by those extra erroneous interrupts for a oneshot irq in the guest: 1. System suspend aborted due to a pending wakeup interrupt from ChromeOS EC (drivers/platform/chrome/cros_ec.c). 2. Annoying "invalid report id data" errors from ELAN0000 touchpad (drivers/input/mouse/elan_i2c_core.c), flooding the guest dmesg every time the touchpad is touched. The core issue here is that by the time when the guest unmasks the IRQ, the physical IRQ line is no longer asserted (since the guest has acked the interrupt to the device in the meantime), yet we unconditionally inject the interrupt queued into the guest by the previous resampling. So to fix the issue, we need a way to detect that the IRQ is no longer pending, and cancel the queued interrupt in this case. With IOAPIC we are not able to probe the physical IRQ line state directly (at least not if the underlying physical interrupt controller is an IOAPIC too), so in this patch we use irqfd resampler for that. Namely, instead of injecting the queued interrupt, we just notify the resampler that this interrupt is done. If the IRQ line is actually already deasserted, we are done. If it is still asserted, a new interrupt will be shortly triggered through irqfd and injected into the guest. In the case if there is no irqfd resampler registered for this IRQ, we cannot fix the issue, so we keep the existing behavior: immediately unconditionally inject the queued interrupt. This patch fixes the issue for x86 IOAPIC only. In the long run, we can fix it for other irqchips and other architectures too, possibly taking advantage of reading the physical state of the IRQ line, which is possible with some other irqchips (e.g. with arm64 GIC, maybe even with the legacy x86 PIC). [*] In this description we assume that the interrupt is a physical host interrupt forwarded to the guest e.g. by vfio. Potentially the same issue may occur also with a purely virtual interrupt from an emulated device, e.g. if the guest handles this interrupt, again, as a oneshot interrupt. Signed-off-by: Dmytro Maluka Link: https://lore.kernel.org/kvm/31420943-8c5f-125c-a5ee-d2fde2700083@semihalf.com/ Link: https://lore.kernel.org/lkml/87o7wrug0w.wl-maz@kernel.org/ Message-Id: <20230322204344.50138-3-dmy@semihalf.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9f508c8e66e1..a9adf75344be 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1987,6 +1987,9 @@ int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); #ifdef CONFIG_HAVE_KVM_IRQFD int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args); void kvm_irqfd_release(struct kvm *kvm); +bool kvm_notify_irqfd_resampler(struct kvm *kvm, + unsigned int irqchip, + unsigned int pin); void kvm_irq_routing_update(struct kvm *); #else static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) @@ -1995,6 +1998,13 @@ static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) } static inline void kvm_irqfd_release(struct kvm *kvm) {} + +static inline bool kvm_notify_irqfd_resampler(struct kvm *kvm, + unsigned int irqchip, + unsigned int pin) +{ + return false; +} #endif #else -- cgit v1.2.3 From 16812c96550c30a8d5743167ef4e462d6fbe7472 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 29 Mar 2023 21:47:21 +0800 Subject: iommu/vt-d: Fix an IOMMU perfmon warning when CPU hotplug A warning can be triggered when hotplug CPU 0. $ echo 0 > /sys/devices/system/cpu/cpu0/online ------------[ cut here ]------------ Voluntary context switch within RCU read-side critical section! WARNING: CPU: 0 PID: 19 at kernel/rcu/tree_plugin.h:318 rcu_note_context_switch+0x4f4/0x580 RIP: 0010:rcu_note_context_switch+0x4f4/0x580 Call Trace: ? perf_event_update_userpage+0x104/0x150 __schedule+0x8d/0x960 ? perf_event_set_state.part.82+0x11/0x50 schedule+0x44/0xb0 schedule_timeout+0x226/0x310 ? __perf_event_disable+0x64/0x1a0 ? _raw_spin_unlock+0x14/0x30 wait_for_completion+0x94/0x130 __wait_rcu_gp+0x108/0x130 synchronize_rcu+0x67/0x70 ? invoke_rcu_core+0xb0/0xb0 ? __bpf_trace_rcu_stall_warning+0x10/0x10 perf_pmu_migrate_context+0x121/0x370 iommu_pmu_cpu_offline+0x6a/0xa0 ? iommu_pmu_del+0x1e0/0x1e0 cpuhp_invoke_callback+0x129/0x510 cpuhp_thread_fun+0x94/0x150 smpboot_thread_fn+0x183/0x220 ? sort_range+0x20/0x20 kthread+0xe6/0x110 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 ---[ end trace 0000000000000000 ]--- The synchronize_rcu() will be invoked in the perf_pmu_migrate_context(), when migrating a PMU to a new CPU. However, the current for_each_iommu() is within RCU read-side critical section. Two methods were considered to fix the issue. - Use the dmar_global_lock to replace the RCU read lock when going through the drhd list. But it triggers a lockdep warning. - Use the cpuhp_setup_state_multi() to set up a dedicated state for each IOMMU PMU. The lock can be avoided. The latter method is implemented in this patch. Since each IOMMU PMU has a dedicated state, add cpuhp_node and cpu in struct iommu_pmu to track the state. The state can be dynamically allocated now. Remove the CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE. Fixes: 46284c6ceb5e ("iommu/vt-d: Support cpumask for IOMMU perfmon") Reported-by: Ammy Yi Signed-off-by: Kan Liang Link: https://lore.kernel.org/r/20230328182028.1366416-1-kan.liang@linux.intel.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230329134721.469447-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/cpuhotplug.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index c6fab004104a..5b2f8147d1ae 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -218,7 +218,6 @@ enum cpuhp_state { CPUHP_AP_PERF_X86_CQM_ONLINE, CPUHP_AP_PERF_X86_CSTATE_ONLINE, CPUHP_AP_PERF_X86_IDXD_ONLINE, - CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE, CPUHP_AP_PERF_S390_CF_ONLINE, CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, -- cgit v1.2.3 From 653a180957a85c3fc30320cc7e84f5dc913a64f8 Mon Sep 17 00:00:00 2001 From: Michael Sit Wei Hong Date: Thu, 30 Mar 2023 17:14:02 +0800 Subject: net: phylink: add phylink_expects_phy() method Provide phylink_expects_phy() to allow MAC drivers to check if it is expecting a PHY to attach to. Since fixed-linked setups do not need to attach to a PHY. Provides a boolean value as to if the MAC should expect a PHY. Returns true if a PHY is expected. Reviewed-by: Russell King (Oracle) Signed-off-by: Michael Sit Wei Hong Signed-off-by: David S. Miller --- include/linux/phylink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index c492c26202b5..637698ed5cb6 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -574,6 +574,7 @@ struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *, phy_interface_t iface, const struct phylink_mac_ops *mac_ops); void phylink_destroy(struct phylink *); +bool phylink_expects_phy(struct phylink *pl); int phylink_connect_phy(struct phylink *, struct phy_device *); int phylink_of_phy_connect(struct phylink *, struct device_node *, u32 flags); -- cgit v1.2.3 From ea65b41807a26495ff2a73dd8b1bab2751940887 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Mon, 27 Mar 2023 18:36:46 +0100 Subject: ftrace: Mark get_lock_parent_ip() __always_inline If the compiler decides not to inline this function then preemption tracing will always show an IP inside the preemption disabling path and never the function actually calling preempt_{enable,disable}. Link: https://lore.kernel.org/linux-trace-kernel/20230327173647.1690849-1-john@metanate.com Cc: Masami Hiramatsu Cc: Mark Rutland Cc: stable@vger.kernel.org Fixes: f904f58263e1d ("sched/debug: Fix preempt_disable_ip recording for preempt_disable()") Signed-off-by: John Keeping Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 366c730beaa3..402fc061de75 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -980,7 +980,7 @@ static inline void __ftrace_enabled_restore(int enabled) #define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5)) #define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6)) -static inline unsigned long get_lock_parent_ip(void) +static __always_inline unsigned long get_lock_parent_ip(void) { unsigned long addr = CALLER_ADDR0; -- cgit v1.2.3 From 3dd4432549415f3c65dd52d5c687629efbf4ece1 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Mon, 27 Feb 2023 09:36:07 -0800 Subject: mm: enable maple tree RCU mode by default Use the maple tree in RCU mode for VMA tracking. The maple tree tracks the stack and is able to update the pivot (lower/upper boundary) in-place to allow the page fault handler to write to the tree while holding just the mmap read lock. This is safe as the writes to the stack have a guard VMA which ensures there will always be a NULL in the direction of the growth and thus will only update a pivot. It is possible, but not recommended, to have VMAs that grow up/down without guard VMAs. syzbot has constructed a testcase which sets up a VMA to grow and consume the empty space. Overwriting the entire NULL entry causes the tree to be altered in a way that is not safe for concurrent readers; the readers may see a node being rewritten or one that does not match the maple state they are using. Enabling RCU mode allows the concurrent readers to see a stable node and will return the expected result. [Liam.Howlett@Oracle.com: we don't need to free the nodes with RCU[ Link: https://lore.kernel.org/linux-mm/000000000000b0a65805f663ace6@google.com/ Link: https://lkml.kernel.org/r/20230227173632.3292573-9-surenb@google.com Fixes: d4af56c5c7c6 ("mm: start tracking VMAs with maple tree") Signed-off-by: Liam R. Howlett Signed-off-by: Suren Baghdasaryan Reported-by: syzbot+8d95422d3537159ca390@syzkaller.appspotmail.com Cc: Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0722859c3647..a57e6ae78e65 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -774,7 +774,8 @@ struct mm_struct { unsigned long cpu_bitmap[]; }; -#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN) +#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN | \ + MT_FLAGS_USE_RCU) extern struct mm_struct init_mm; /* Pointer magic because the dynamic array size confuses some compilers. */ -- cgit v1.2.3 From 195d8e5da3acb17c5357526494f818a21e97cd10 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Wed, 29 Mar 2023 13:13:11 -0700 Subject: PCI/MSI: Provide missing stub for pci_msix_can_alloc_dyn() pci_msix_can_alloc_dyn() is not declared when CONFIG_PCI_MSI is disabled. There is no existing user of pci_msix_can_alloc_dyn() but work is in progress to change this. This work encounters the following error when CONFIG_PCI_MSI is disabled: drivers/vfio/pci/vfio_pci_intrs.c:427:21: error: implicit declaration of function 'pci_msix_can_alloc_dyn' [-Werror=implicit-function-declaration] Provide definition for pci_msix_can_alloc_dyn() in preparation for users that need to compile when CONFIG_PCI_MSI is disabled. [bhelgaas: Also reported by Arnd Bergmann in drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c; added his Fixes: line] Fixes: fb0a6a268dcd ("net/mlx5: Provide external API for allocating vectors") Fixes: 34026364df8e ("PCI/MSI: Provide post-enable dynamic allocation interfaces for MSI-X") Link: https://lore.kernel.org/oe-kbuild-all/202303291000.PWFqGCxH-lkp@intel.com/ Link: https://lore.kernel.org/r/310ecc4815dae4174031062f525245f0755c70e2.1680119924.git.reinette.chatre@intel.com Reported-by: kernel test robot Signed-off-by: Reinette Chatre Signed-off-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan Cc: stable@vger.kernel.org # v6.2+ --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index fafd8020c6d7..5e2dab675943 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1623,6 +1623,8 @@ pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, flags, NULL); } +static inline bool pci_msix_can_alloc_dyn(struct pci_dev *dev) +{ return false; } static inline struct msi_map pci_msix_alloc_irq_at(struct pci_dev *dev, unsigned int index, const struct irq_affinity_desc *affdesc) { -- cgit v1.2.3