From 747bda7bb5b1644a06734900326847a5d353c448 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 19 Apr 2024 10:29:00 +0200 Subject: fbdev/deferred-io: Provide get_page hook in struct fb_deferred_io Add a callback for drivers to provide framebuffer pages to fbdev's deferred-I/O helpers. Implementations need to acquire a reference on the page before returning it. Returning NULL generates a SIGBUS signal. This will be useful for DRM's fbdev emulation with GEM-shmem buffer objects. v2: - fix typo in commit message (Javier) Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20240419083331.7761-8-tzimmermann@suse.de --- include/linux/fb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fb.h b/include/linux/fb.h index 811e47f9d1c3..5358edbb9c0b 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -225,6 +225,7 @@ struct fb_deferred_io { struct mutex lock; /* mutex that protects the pageref list */ struct list_head pagereflist; /* list of pagerefs for touched pages */ /* callback */ + struct page *(*get_page)(struct fb_info *info, unsigned long offset); void (*deferred_io)(struct fb_info *info, struct list_head *pagelist); }; #endif -- cgit v1.2.3 From 150f431a08317e0e0363a7f9147b6246d3b40ba6 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 19 Apr 2024 10:29:01 +0200 Subject: drm/fbdev: Add fbdev-shmem Add an fbdev emulation for SHMEM-based memory managers. The code is similar to fbdev-generic, but does not require an additional shadow buffer for mmap(). Fbdev-shmem operates directly on the buffer object's SHMEM pages. Fbdev's deferred-I/O mechanism updates the hardware state on write operations. The memory pages of GEM SHMEM cannot be detected by fbdefio. Therefore fbdev-shmem implements the .get_page() hook in struct fb_deferred_io. The fbdefio helpers call this hook to retrieve the page directly from fbdev-shmem instead of trying to detect it internally. v3: - clarify on get_page mechanism in commit description (Javier) v2: - use drm_driver_legacy_fb_format() (Geert) Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20240419083331.7761-9-tzimmermann@suse.de --- include/drm/drm_fbdev_shmem.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 include/drm/drm_fbdev_shmem.h (limited to 'include') diff --git a/include/drm/drm_fbdev_shmem.h b/include/drm/drm_fbdev_shmem.h new file mode 100644 index 000000000000..fb43cadd1950 --- /dev/null +++ b/include/drm/drm_fbdev_shmem.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef DRM_FBDEV_SHMEM_H +#define DRM_FBDEV_SHMEM_H + +struct drm_device; + +#ifdef CONFIG_DRM_FBDEV_EMULATION +void drm_fbdev_shmem_setup(struct drm_device *dev, unsigned int preferred_bpp); +#else +static inline void drm_fbdev_shmem_setup(struct drm_device *dev, unsigned int preferred_bpp) +{ } +#endif + +#endif -- cgit v1.2.3 From aae4682e5d66c1e1dc181fa341652e037237f144 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 19 Apr 2024 10:29:35 +0200 Subject: drm/fbdev-generic: Convert to fbdev-ttm Only TTM-based drivers use fbdev-generic. Rename it to fbdev-ttm and change the symbol infix from _generic_ to _ttm_. Link the source file into TTM helpers, so that it is only build if TTM-based drivers have been selected. Select DRM_TTM_HELPER for loongson. Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20240419083331.7761-43-tzimmermann@suse.de --- include/drm/drm_fbdev_generic.h | 15 --------------- include/drm/drm_fbdev_ttm.h | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 15 deletions(-) delete mode 100644 include/drm/drm_fbdev_generic.h create mode 100644 include/drm/drm_fbdev_ttm.h (limited to 'include') diff --git a/include/drm/drm_fbdev_generic.h b/include/drm/drm_fbdev_generic.h deleted file mode 100644 index 75799342098d..000000000000 --- a/include/drm/drm_fbdev_generic.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: MIT */ - -#ifndef DRM_FBDEV_GENERIC_H -#define DRM_FBDEV_GENERIC_H - -struct drm_device; - -#ifdef CONFIG_DRM_FBDEV_EMULATION -void drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp); -#else -static inline void drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp) -{ } -#endif - -#endif diff --git a/include/drm/drm_fbdev_ttm.h b/include/drm/drm_fbdev_ttm.h new file mode 100644 index 000000000000..9e6c3bdf3537 --- /dev/null +++ b/include/drm/drm_fbdev_ttm.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef DRM_FBDEV_TTM_H +#define DRM_FBDEV_TTM_H + +struct drm_device; + +#ifdef CONFIG_DRM_FBDEV_EMULATION +void drm_fbdev_ttm_setup(struct drm_device *dev, unsigned int preferred_bpp); +#else +static inline void drm_fbdev_ttm_setup(struct drm_device *dev, unsigned int preferred_bpp) +{ } +#endif + +#endif -- cgit v1.2.3 From 18bc074c226bfecd205bf031678f5e35ee55c3da Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 19 Apr 2024 10:29:36 +0200 Subject: drm/fbdev: Clean up fbdev documentation Rewrite some docs that are not up-to-date any longer. Remove the TODO item for fbdev-generic conversion, as the helper has been replaced. Make documentation for DMA, SHMEM and TTM emulation available. Signed-off-by: Thomas Zimmermann Cc: Jonathan Corbet Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20240419083331.7761-44-tzimmermann@suse.de --- include/drm/drm_mode_config.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index 8de3c9a5f61b..ab0f167474b1 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -106,8 +106,8 @@ struct drm_mode_config_funcs { * Drivers implementing fbdev emulation use drm_kms_helper_hotplug_event() * to call this hook to inform the fbdev helper of output changes. * - * This hook is deprecated, drivers should instead use - * drm_fbdev_generic_setup() which takes care of any necessary + * This hook is deprecated, drivers should instead implement fbdev + * support with struct drm_client, which takes care of any necessary * hotplug event forwarding already without further involvement by * the driver. */ -- cgit v1.2.3 From ef283674a17e000bb6b2ff05dd2ac5cbf2e3ae0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 22 Apr 2024 11:58:57 +0300 Subject: drm/uapi: Move drm_color_ctm_3x4 out from drm_mode.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_color_ctm_3x4 is some undocumented amgdpu private uapi and thus has no business being in drm_mode.h. At least move it to some amdgpu specific header, albeit with the wrong namespace as maybe something somewhere is using this already? Cc: Harry Wentland Cc: Joshua Ashton Cc: Alex Deucher Fixes: 6872a189be50 ("drm/amd/display: Add 3x4 CTM support for plane CTM") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240422085857.17651-1-ville.syrjala@linux.intel.com Reviewed-by: Harry Wentland --- include/uapi/drm/amdgpu_drm.h | 9 +++++++++ include/uapi/drm/drm_mode.h | 8 -------- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 96e32dafd4f0..d5ebafacdd70 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1269,6 +1269,15 @@ struct drm_amdgpu_info_gpuvm_fault { #define AMDGPU_FAMILY_GC_10_3_7 151 /* GC 10.3.7 */ #define AMDGPU_FAMILY_GC_11_5_0 150 /* GC 11.5.0 */ +/* FIXME wrong namespace! */ +struct drm_color_ctm_3x4 { + /* + * Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude + * (not two's complement!) format. + */ + __u64 matrix[12]; +}; + #if defined(__cplusplus) } #endif diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 1ca5c7e418fd..d390011b89b4 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -846,14 +846,6 @@ struct drm_color_ctm { __u64 matrix[9]; }; -struct drm_color_ctm_3x4 { - /* - * Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude - * (not two's complement!) format. - */ - __u64 matrix[12]; -}; - struct drm_color_lut { /* * Values are mapped linearly to 0.0 - 1.0 range, with 0x0 == 0.0 and -- cgit v1.2.3 From 7fb8af6798e8d013017e4607505f58d9942fd671 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 29 Apr 2024 19:43:36 +0300 Subject: drm: deprecate driver date The driver date serves no useful purpose, because it's hardly ever updated. The information is misleading at best. As described in Documentation/gpu/drm-internals.rst: The driver date, formatted as YYYYMMDD, is meant to identify the date of the latest modification to the driver. However, as most drivers fail to update it, its value is mostly useless. The DRM core prints it to the kernel log at initialization time and passes it to userspace through the DRM_IOCTL_VERSION ioctl. Stop printing the driver date at init, and start returning the empty string "" as driver date through the DRM_IOCTL_VERSION ioctl. The driver date initialization in drivers and the struct drm_driver date member can be removed in follow-up. Reviewed-by: Hamza Mahfooz Acked-by: Simon Ser Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20240429164336.1406480-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- include/drm/drm_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index 8878260d7529..cd37936c3926 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -411,7 +411,7 @@ struct drm_driver { char *name; /** @desc: driver description */ char *desc; - /** @date: driver date */ + /** @date: driver date, unused, to be removed */ char *date; /** -- cgit v1.2.3 From c72ceafbd12cf95e088681ae5e535ef1a78bf0ed Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Fri, 29 Mar 2024 16:24:42 -0500 Subject: mm: Introduce AS_INACCESSIBLE for encrypted/confidential memory filemap users like guest_memfd may use page cache pages to allocate/manage memory that is only intended to be accessed by guests via hardware protections like encryption. Writes to memory of this sort in common paths like truncation may cause unexpected behavior such as writing garbage instead of zeros when attempting to zero pages, or worse, triggering hardware protections that are considered fatal as far as the kernel is concerned. Introduce a new address_space flag, AS_INACCESSIBLE, and use this initially to prevent zero'ing of pages during truncation, with the understanding that it is up to the owner of the mapping to handle this specially if needed. This is admittedly a rather blunt solution, but it seems like there are no other places that should take into account the flag to keep its promise. Link: https://lore.kernel.org/lkml/ZR9LYhpxTaTk6PJX@google.com/ Cc: Matthew Wilcox Suggested-by: Sean Christopherson Signed-off-by: Michael Roth Message-ID: <20240329212444.395559-5-michael.roth@amd.com> Acked-by: Vlastimil Babka Signed-off-by: Paolo Bonzini --- include/linux/pagemap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 2df35e65557d..f879c1d54da7 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -207,6 +207,7 @@ enum mapping_flags { AS_STABLE_WRITES, /* must wait for writeback before modifying folio contents */ AS_UNMOVABLE, /* The mapping cannot be moved, ever */ + AS_INACCESSIBLE, /* Do not attempt direct R/W access to the mapping */ }; /** -- cgit v1.2.3 From 3bb2531e20bff99f9cd8719ee7aea8bd82687173 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 7 May 2024 12:54:03 -0400 Subject: KVM: guest_memfd: Add hook for initializing memory guest_memfd pages are generally expected to be in some arch-defined initial state prior to using them for guest memory. For SEV-SNP this initial state is 'private', or 'guest-owned', and requires additional operations to move these pages into a 'private' state by updating the corresponding entries the RMP table. Allow for an arch-defined hook to handle updates of this sort, and go ahead and implement one for x86 so KVM implementations like AMD SVM can register a kvm_x86_ops callback to handle these updates for SEV-SNP guests. The preparation callback is always called when allocating/grabbing folios via gmem, and it is up to the architecture to keep track of whether or not the pages are already in the expected state (e.g. the RMP table in the case of SEV-SNP). In some cases, it is necessary to defer the preparation of the pages to handle things like in-place encryption of initial guest memory payloads before marking these pages as 'private'/'guest-owned'. Add an argument (always true for now) to kvm_gmem_get_folio() that allows for the preparation callback to be bypassed. To detect possible issues in the way userspace initializes memory, it is only possible to add an unprepared page if it is not already included in the filemap. Link: https://lore.kernel.org/lkml/ZLqVdvsF11Ddo7Dq@google.com/ Co-developed-by: Michael Roth Signed-off-by: Michael Roth Message-Id: <20231230172351.574091-5-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index afbc99264ffa..1af069ab657c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2443,4 +2443,9 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, } #endif /* CONFIG_KVM_PRIVATE_MEM */ +#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE +int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order); +bool kvm_arch_gmem_prepare_needed(struct kvm *kvm); +#endif + #endif -- cgit v1.2.3 From 1f6c06b177513e8a47c43e95d1985dbd9cff3ddd Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 14 Feb 2024 12:09:06 -0500 Subject: KVM: guest_memfd: Add interface for populating gmem pages with user data During guest run-time, kvm_arch_gmem_prepare() is issued as needed to prepare newly-allocated gmem pages prior to mapping them into the guest. In the case of SEV-SNP, this mainly involves setting the pages to private in the RMP table. However, for the GPA ranges comprising the initial guest payload, which are encrypted/measured prior to starting the guest, the gmem pages need to be accessed prior to setting them to private in the RMP table so they can be initialized with the userspace-provided data. Additionally, an SNP firmware call is needed afterward to encrypt them in-place and measure the contents into the guest's launch digest. While it is possible to bypass the kvm_arch_gmem_prepare() hooks so that this handling can be done in an open-coded/vendor-specific manner, this may expose more gmem-internal state/dependencies to external callers than necessary. Try to avoid this by implementing an interface that tries to handle as much of the common functionality inside gmem as possible, while also making it generic enough to potentially be usable/extensible for TDX as well. Suggested-by: Sean Christopherson Signed-off-by: Michael Roth Co-developed-by: Michael Roth Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1af069ab657c..1ae65774d9fa 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2448,4 +2448,31 @@ int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_ord bool kvm_arch_gmem_prepare_needed(struct kvm *kvm); #endif +/** + * kvm_gmem_populate() - Populate/prepare a GPA range with guest data + * + * @kvm: KVM instance + * @gfn: starting GFN to be populated + * @src: userspace-provided buffer containing data to copy into GFN range + * (passed to @post_populate, and incremented on each iteration + * if not NULL) + * @npages: number of pages to copy from userspace-buffer + * @post_populate: callback to issue for each gmem page that backs the GPA + * range + * @opaque: opaque data to pass to @post_populate callback + * + * This is primarily intended for cases where a gmem-backed GPA range needs + * to be initialized with userspace-provided data prior to being mapped into + * the guest as a private page. This should be called with the slots->lock + * held so that caller-enforced invariants regarding the expected memory + * attributes of the GPA range do not race with KVM_SET_MEMORY_ATTRIBUTES. + * + * Returns the number of pages that were populated. + */ +typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, + void __user *src, int order, void *opaque); + +long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages, + kvm_gmem_populate_cb post_populate, void *opaque); + #endif -- cgit v1.2.3 From a90764f0e4ed5350cc9caeb384e0fb356c032587 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Sat, 30 Dec 2023 11:23:21 -0600 Subject: KVM: guest_memfd: Add hook for invalidating memory In some cases, like with SEV-SNP, guest memory needs to be updated in a platform-specific manner before it can be safely freed back to the host. Wire up arch-defined hooks to the .free_folio kvm_gmem_aops callback to allow for special handling of this sort when freeing memory in response to FALLOC_FL_PUNCH_HOLE operations and when releasing the inode, and go ahead and define an arch-specific hook for x86 since it will be needed for handling memory used for SEV-SNP guests. Signed-off-by: Michael Roth Message-Id: <20231230172351.574091-6-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1ae65774d9fa..b43b96f876fe 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2475,4 +2475,8 @@ typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages, kvm_gmem_populate_cb post_populate, void *opaque); +#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end); +#endif + #endif -- cgit v1.2.3 From ad27ce155566f2b4400fa865859834592bd18777 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Wed, 1 May 2024 03:51:57 -0500 Subject: KVM: SEV: Add KVM_SEV_SNP_LAUNCH_FINISH command Add a KVM_SEV_SNP_LAUNCH_FINISH command to finalize the cryptographic launch digest which stores the measurement of the guest at launch time. Also extend the existing SNP firmware data structures to support disabling the use of Versioned Chip Endorsement Keys (VCEK) by guests as part of this command. While finalizing the launch flow, the code also issues the LAUNCH_UPDATE SNP firmware commands to encrypt/measure the initial VMSA pages for each configured vCPU, which requires setting the RMP entries for those pages to private, so also add handling to clean up the RMP entries for these pages whening freeing vCPUs during shutdown. Signed-off-by: Brijesh Singh Co-developed-by: Michael Roth Signed-off-by: Michael Roth Signed-off-by: Harald Hoyer Signed-off-by: Ashish Kalra Message-ID: <20240501085210.2213060-8-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- include/linux/psp-sev.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 3705c2044fc0..903ddfea8585 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -658,6 +658,7 @@ struct sev_data_snp_launch_update { * @id_auth_paddr: system physical address of ID block authentication structure * @id_block_en: indicates whether ID block is present * @auth_key_en: indicates whether author key is present in authentication structure + * @vcek_disabled: indicates whether use of VCEK is allowed for attestation reports * @rsvd: reserved * @host_data: host-supplied data for guest, not interpreted by firmware */ @@ -667,7 +668,8 @@ struct sev_data_snp_launch_finish { u64 id_auth_paddr; u8 id_block_en:1; u8 auth_key_en:1; - u64 rsvd:62; + u8 vcek_disabled:1; + u64 rsvd:61; u8 host_data[32]; } __packed; -- cgit v1.2.3 From 3dbfbd101a5844f851da9ae6e90f59753c10ff42 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 13 May 2024 23:27:23 +0300 Subject: drm/edid: remove drm_do_get_edid() All users of drm_do_get_edid() have been converted to drm_edid_read_custom(). Remove the unused function to prevent new users from creeping in. Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20240513202723.261440-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- include/drm/drm_edid.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index b085525e53e2..6bdfa254a1c1 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -423,10 +423,6 @@ static inline void drm_edid_decode_panel_id(u32 panel_id, char vend[4], u16 *pro } bool drm_probe_ddc(struct i2c_adapter *adapter); -struct edid *drm_do_get_edid(struct drm_connector *connector, - int (*get_edid_block)(void *data, u8 *buf, unsigned int block, - size_t len), - void *data); struct edid *drm_get_edid(struct drm_connector *connector, struct i2c_adapter *adapter); struct edid *drm_get_edid_switcheroo(struct drm_connector *connector, -- cgit v1.2.3 From cdfad4db7756563db7d458216d9e3c2651dddc7d Mon Sep 17 00:00:00 2001 From: Tomasz Rusinowicz Date: Mon, 13 May 2024 14:04:27 +0200 Subject: accel/ivpu: Add NPU profiling support Implement time based Metric Streamer profiling UAPI. This is a generic mechanism allowing user mode tools to sample NPU metrics. These metrics are defined by the FW and transparent to the driver. The user space can check for this feature by checking DRM_IVPU_CAP_METRIC_STREAMER driver capability. Signed-off-by: Tomasz Rusinowicz Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20240513120431.3187212-9-jacek.lawrynowicz@linux.intel.com --- include/uapi/drm/ivpu_accel.h | 69 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index 19a13468eca5..084fb529e1e9 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __UAPI_IVPU_DRM_H__ @@ -21,6 +21,10 @@ extern "C" { #define DRM_IVPU_BO_INFO 0x03 #define DRM_IVPU_SUBMIT 0x05 #define DRM_IVPU_BO_WAIT 0x06 +#define DRM_IVPU_METRIC_STREAMER_START 0x07 +#define DRM_IVPU_METRIC_STREAMER_STOP 0x08 +#define DRM_IVPU_METRIC_STREAMER_GET_DATA 0x09 +#define DRM_IVPU_METRIC_STREAMER_GET_INFO 0x0a #define DRM_IOCTL_IVPU_GET_PARAM \ DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_GET_PARAM, struct drm_ivpu_param) @@ -40,6 +44,22 @@ extern "C" { #define DRM_IOCTL_IVPU_BO_WAIT \ DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_WAIT, struct drm_ivpu_bo_wait) +#define DRM_IOCTL_IVPU_METRIC_STREAMER_START \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_START, \ + struct drm_ivpu_metric_streamer_start) + +#define DRM_IOCTL_IVPU_METRIC_STREAMER_STOP \ + DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_STOP, \ + struct drm_ivpu_metric_streamer_stop) + +#define DRM_IOCTL_IVPU_METRIC_STREAMER_GET_DATA \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_GET_DATA, \ + struct drm_ivpu_metric_streamer_get_data) + +#define DRM_IOCTL_IVPU_METRIC_STREAMER_GET_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_GET_INFO, \ + struct drm_ivpu_metric_streamer_get_data) + /** * DOC: contexts * @@ -336,6 +356,53 @@ struct drm_ivpu_bo_wait { __u32 pad; }; +/** + * struct drm_ivpu_metric_streamer_start - Start collecting metric data + */ +struct drm_ivpu_metric_streamer_start { + /** @metric_group_mask: Indicates metric streamer instance */ + __u64 metric_group_mask; + /** @sampling_period_ns: Sampling period in nanoseconds */ + __u64 sampling_period_ns; + /** + * @read_period_samples: + * + * Number of samples after which user space will try to read the data. + * Reading the data after significantly longer period may cause data loss. + */ + __u32 read_period_samples; + /** @sample_size: Returned size of a single sample in bytes */ + __u32 sample_size; + /** @max_data_size: Returned max @data_size from %DRM_IOCTL_IVPU_METRIC_STREAMER_GET_DATA */ + __u32 max_data_size; +}; + +/** + * struct drm_ivpu_metric_streamer_get_data - Copy collected metric data + */ +struct drm_ivpu_metric_streamer_get_data { + /** @metric_group_mask: Indicates metric streamer instance */ + __u64 metric_group_mask; + /** @buffer_ptr: A pointer to a destination for the copied data */ + __u64 buffer_ptr; + /** @buffer_size: Size of the destination buffer */ + __u64 buffer_size; + /** + * @data_size: Returned size of copied metric data + * + * If the @buffer_size is zero, returns the amount of data ready to be copied. + */ + __u64 data_size; +}; + +/** + * struct drm_ivpu_metric_streamer_stop - Stop collecting metric data + */ +struct drm_ivpu_metric_streamer_stop { + /** @metric_group_mask: Indicates metric streamer instance */ + __u64 metric_group_mask; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 0b03829fdece47beba9ecb7dbcbde4585ee3663e Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 14 May 2024 10:20:51 -0700 Subject: drm/mipi-dsi: Fix theoretical int overflow in mipi_dsi_dcs_write_seq() The mipi_dsi_dcs_write_seq() macro makes a call to mipi_dsi_dcs_write_buffer() which returns a type ssize_t. The macro then stores it in an int and checks to see if it's negative. This could theoretically be a problem if "ssize_t" is larger than "int". To see the issue, imagine that "ssize_t" is 32-bits and "int" is 16-bits, you could see a problem if there was some code out there that looked like: mipi_dsi_dcs_write_seq(dsi, cmd, <32767 bytes as arguments>); ...since we'd get back that 32768 bytes were transferred and 32768 stored in a 16-bit int would look negative. Though there are no callsites where we'd actually hit this (even if "int" was only 16-bit), it's cleaner to make the types match so let's fix it. Fixes: 2a9e9daf7523 ("drm/mipi-dsi: Introduce mipi_dsi_dcs_write_seq macro") Reviewed-by: Neil Armstrong Reviewed-by: Linus Walleij Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20240514102056.v5.1.I30fa4c8348ea316c886ef8a522a52fed617f930d@changeid Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240514102056.v5.1.I30fa4c8348ea316c886ef8a522a52fed617f930d@changeid --- include/drm/drm_mipi_dsi.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index 82b1cc434ea3..70ce0b8cbc68 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -333,18 +333,18 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi, * @cmd: Command * @seq: buffer containing data to be transmitted */ -#define mipi_dsi_dcs_write_seq(dsi, cmd, seq...) \ - do { \ - static const u8 d[] = { cmd, seq }; \ - struct device *dev = &dsi->dev; \ - int ret; \ - ret = mipi_dsi_dcs_write_buffer(dsi, d, ARRAY_SIZE(d)); \ - if (ret < 0) { \ - dev_err_ratelimited( \ - dev, "sending command %#02x failed: %d\n", \ - cmd, ret); \ - return ret; \ - } \ +#define mipi_dsi_dcs_write_seq(dsi, cmd, seq...) \ + do { \ + static const u8 d[] = { cmd, seq }; \ + struct device *dev = &dsi->dev; \ + ssize_t ret; \ + ret = mipi_dsi_dcs_write_buffer(dsi, d, ARRAY_SIZE(d)); \ + if (ret < 0) { \ + dev_err_ratelimited( \ + dev, "sending command %#02x failed: %zd\n", \ + cmd, ret); \ + return ret; \ + } \ } while (0) /** -- cgit v1.2.3 From 24acbcce5cc673886c2f4f9b3f6f89a9c6a53b7e Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 14 May 2024 10:20:52 -0700 Subject: drm/mipi-dsi: Fix theoretical int overflow in mipi_dsi_generic_write_seq() The mipi_dsi_generic_write_seq() macro makes a call to mipi_dsi_generic_write() which returns a type ssize_t. The macro then stores it in an int and checks to see if it's negative. This could theoretically be a problem if "ssize_t" is larger than "int". To see the issue, imagine that "ssize_t" is 32-bits and "int" is 16-bits, you could see a problem if there was some code out there that looked like: mipi_dsi_generic_write_seq(dsi, <32768 bytes as arguments>); ...since we'd get back that 32768 bytes were transferred and 32768 stored in a 16-bit int would look negative. Though there are no callsites where we'd actually hit this (even if "int" was only 16-bit), it's cleaner to make the types match so let's fix it. Fixes: a9015ce59320 ("drm/mipi-dsi: Add a mipi_dsi_dcs_write_seq() macro") Reviewed-by: Neil Armstrong Reviewed-by: Linus Walleij Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20240514102056.v5.2.Iadb65b8add19ed3ae3ed6425011beb97e380a912@changeid Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240514102056.v5.2.Iadb65b8add19ed3ae3ed6425011beb97e380a912@changeid --- include/drm/drm_mipi_dsi.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index 70ce0b8cbc68..e0f56564bf97 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -314,17 +314,17 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi, * @dsi: DSI peripheral device * @seq: buffer containing the payload */ -#define mipi_dsi_generic_write_seq(dsi, seq...) \ - do { \ - static const u8 d[] = { seq }; \ - struct device *dev = &dsi->dev; \ - int ret; \ - ret = mipi_dsi_generic_write(dsi, d, ARRAY_SIZE(d)); \ - if (ret < 0) { \ - dev_err_ratelimited(dev, "transmit data failed: %d\n", \ - ret); \ - return ret; \ - } \ +#define mipi_dsi_generic_write_seq(dsi, seq...) \ + do { \ + static const u8 d[] = { seq }; \ + struct device *dev = &dsi->dev; \ + ssize_t ret; \ + ret = mipi_dsi_generic_write(dsi, d, ARRAY_SIZE(d)); \ + if (ret < 0) { \ + dev_err_ratelimited(dev, "transmit data failed: %zd\n", \ + ret); \ + return ret; \ + } \ } while (0) /** -- cgit v1.2.3 From 7d3f6acaf87c7db6dcd868694a2f65e7040478dc Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 14 May 2024 10:20:53 -0700 Subject: drm/mipi-dsi: mipi_dsi_*_write functions don't need to ratelimit prints We really don't expect these errors to be printed over and over again. When a driver hits the error it should bail out. Just use a normal error print. This gives a nice space savings for users of these functions: $ scripts/bloat-o-meter \ .../before/panel-novatek-nt36672e.ko \ .../after/panel-novatek-nt36672e.ko add/remove: 0/1 grow/shrink: 0/1 up/down: 0/-16760 (-16760) Function old new delta nt36672e_1080x2408_60hz_init 17080 10640 -6440 nt36672e_1080x2408_60hz_init._rs 10320 - -10320 Total: Before=31815, After=15055, chg -52.68% Reviewed-by: Neil Armstrong Reviewed-by: Linus Walleij Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20240514102056.v5.3.I9982cd5d8014de7a4513f5619f66f88da49ce4ec@changeid Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240514102056.v5.3.I9982cd5d8014de7a4513f5619f66f88da49ce4ec@changeid --- include/drm/drm_mipi_dsi.h | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index e0f56564bf97..67967be48dbd 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -314,17 +314,16 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi, * @dsi: DSI peripheral device * @seq: buffer containing the payload */ -#define mipi_dsi_generic_write_seq(dsi, seq...) \ - do { \ - static const u8 d[] = { seq }; \ - struct device *dev = &dsi->dev; \ - ssize_t ret; \ - ret = mipi_dsi_generic_write(dsi, d, ARRAY_SIZE(d)); \ - if (ret < 0) { \ - dev_err_ratelimited(dev, "transmit data failed: %zd\n", \ - ret); \ - return ret; \ - } \ +#define mipi_dsi_generic_write_seq(dsi, seq...) \ + do { \ + static const u8 d[] = { seq }; \ + struct device *dev = &dsi->dev; \ + ssize_t ret; \ + ret = mipi_dsi_generic_write(dsi, d, ARRAY_SIZE(d)); \ + if (ret < 0) { \ + dev_err(dev, "transmit data failed: %zd\n", ret); \ + return ret; \ + } \ } while (0) /** @@ -340,8 +339,7 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi, ssize_t ret; \ ret = mipi_dsi_dcs_write_buffer(dsi, d, ARRAY_SIZE(d)); \ if (ret < 0) { \ - dev_err_ratelimited( \ - dev, "sending command %#02x failed: %zd\n", \ + dev_err(dev, "sending command %#02x failed: %zd\n", \ cmd, ret); \ return ret; \ } \ -- cgit v1.2.3 From 3b724909a380fddb44dfa0072fc459c698a52658 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 14 May 2024 10:20:54 -0700 Subject: drm/mipi-dsi: Reduce driver bloat of mipi_dsi_*_write_seq() Through a cooperative effort between Hsin-Yi Wang and Dmitry Baryshkov, we have realized the dev_err() in the mipi_dsi_*_write_seq() macros was causing quite a bit of bloat to the kernel. Let's hoist this call into drm_mipi_dsi.c by adding a "chatty" version of the functions that includes the print. While doing this, add a bit more comments to these macros making it clear that they print errors and also that they return out of _the caller's_ function. Without any changes to clients this gives a nice savings. Specifically the macro was inlined and thus the error report call was inlined into every call to mipi_dsi_dcs_write_seq() and mipi_dsi_generic_write_seq(). By using a call to a "chatty" function, the usage is reduced to one call in the chatty function and a function call at the invoking site. Building with my build system shows one example: $ scripts/bloat-o-meter \ .../before/panel-novatek-nt36672e.ko \ .../after/panel-novatek-nt36672e.ko add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-4404 (-4404) Function old new delta nt36672e_1080x2408_60hz_init 10640 6236 -4404 Total: Before=15055, After=10651, chg -29.25% Note that given the change in location of the print it's harder to include the "cmd" in the printout for mipi_dsi_dcs_write_seq() since, theoretically, someone could call the new chatty function with a zero-size array and it would be illegal to dereference data[0]. There's a printk format to print the whole buffer and this is probably more useful for debugging anyway. Given that we're doing this for mipi_dsi_dcs_write_seq(), let's also print the buffer for mipi_dsi_generic_write_seq() in the error case. It should be noted that the current consensus of DRM folks is that the mipi_dsi_*_write_seq() should be deprecated due to the non-intuitive return behavior. A future patch will formally mark them as deprecated and provide an alternative. Reviewed-by: Dmitry Baryshkov Reviewed-by: Neil Armstrong Reviewed-by: Linus Walleij Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20240514102056.v5.4.Id15fae80582bc74a0d4f1338987fa375738f45b9@changeid Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240514102056.v5.4.Id15fae80582bc74a0d4f1338987fa375738f45b9@changeid --- include/drm/drm_mipi_dsi.h | 47 +++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index 67967be48dbd..6d68d9927f46 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -256,6 +256,8 @@ int mipi_dsi_picture_parameter_set(struct mipi_dsi_device *dsi, ssize_t mipi_dsi_generic_write(struct mipi_dsi_device *dsi, const void *payload, size_t size); +int mipi_dsi_generic_write_chatty(struct mipi_dsi_device *dsi, + const void *payload, size_t size); ssize_t mipi_dsi_generic_read(struct mipi_dsi_device *dsi, const void *params, size_t num_params, void *data, size_t size); @@ -279,6 +281,8 @@ enum mipi_dsi_dcs_tear_mode { ssize_t mipi_dsi_dcs_write_buffer(struct mipi_dsi_device *dsi, const void *data, size_t len); +int mipi_dsi_dcs_write_buffer_chatty(struct mipi_dsi_device *dsi, + const void *data, size_t len); ssize_t mipi_dsi_dcs_write(struct mipi_dsi_device *dsi, u8 cmd, const void *data, size_t len); ssize_t mipi_dsi_dcs_read(struct mipi_dsi_device *dsi, u8 cmd, void *data, @@ -311,38 +315,39 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi, /** * mipi_dsi_generic_write_seq - transmit data using a generic write packet + * + * This macro will print errors for you and will RETURN FROM THE CALLING + * FUNCTION (yes this is non-intuitive) upon error. + * * @dsi: DSI peripheral device * @seq: buffer containing the payload */ -#define mipi_dsi_generic_write_seq(dsi, seq...) \ - do { \ - static const u8 d[] = { seq }; \ - struct device *dev = &dsi->dev; \ - ssize_t ret; \ - ret = mipi_dsi_generic_write(dsi, d, ARRAY_SIZE(d)); \ - if (ret < 0) { \ - dev_err(dev, "transmit data failed: %zd\n", ret); \ - return ret; \ - } \ +#define mipi_dsi_generic_write_seq(dsi, seq...) \ + do { \ + static const u8 d[] = { seq }; \ + int ret; \ + ret = mipi_dsi_generic_write_chatty(dsi, d, ARRAY_SIZE(d)); \ + if (ret < 0) \ + return ret; \ } while (0) /** * mipi_dsi_dcs_write_seq - transmit a DCS command with payload + * + * This macro will print errors for you and will RETURN FROM THE CALLING + * FUNCTION (yes this is non-intuitive) upon error. + * * @dsi: DSI peripheral device * @cmd: Command * @seq: buffer containing data to be transmitted */ -#define mipi_dsi_dcs_write_seq(dsi, cmd, seq...) \ - do { \ - static const u8 d[] = { cmd, seq }; \ - struct device *dev = &dsi->dev; \ - ssize_t ret; \ - ret = mipi_dsi_dcs_write_buffer(dsi, d, ARRAY_SIZE(d)); \ - if (ret < 0) { \ - dev_err(dev, "sending command %#02x failed: %zd\n", \ - cmd, ret); \ - return ret; \ - } \ +#define mipi_dsi_dcs_write_seq(dsi, cmd, seq...) \ + do { \ + static const u8 d[] = { cmd, seq }; \ + int ret; \ + ret = mipi_dsi_dcs_write_buffer_chatty(dsi, d, ARRAY_SIZE(d)); \ + if (ret < 0) \ + return ret; \ } while (0) /** -- cgit v1.2.3 From 966e397e4f6032b73438f8d775756541513e7daf Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 14 May 2024 10:20:55 -0700 Subject: drm/mipi-dsi: Introduce mipi_dsi_*_write_seq_multi() The current mipi_dsi_*_write_seq() macros are non-intutitive because they contain a hidden "return" statement that will return out of the _caller_ of the macro. Let's mark them as deprecated and instead introduce some new macros that are more intuitive. These new macros are less optimal when an error occurs but should behave more optimally when there is no error. Specifically these new macros cause smaller code to get generated and the code size savings (less to fetch from RAM, less cache space used, less RAM used) are important. Since the error case isn't something we need to optimize for and these new macros are easier to understand and more flexible, they should be used. After converting to use these new functions, one example shows some nice savings while also being easier to understand. $ scripts/bloat-o-meter \ ...after/panel-novatek-nt36672e.ko \ ...ctx/panel-novatek-nt36672e.ko add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-988 (-988) Function old new delta nt36672e_1080x2408_60hz_init 6236 5248 -988 Total: Before=10651, After=9663, chg -9.28% Reviewed-by: Neil Armstrong Reviewed-by: Linus Walleij Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20240514102056.v5.5.Ie94246c30fe95101e0e26dd5f96e976dbeb8f242@changeid Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240514102056.v5.5.Ie94246c30fe95101e0e26dd5f96e976dbeb8f242@changeid --- include/drm/drm_mipi_dsi.h | 62 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'include') diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index 6d68d9927f46..5e9cad541bd6 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -197,6 +197,27 @@ struct mipi_dsi_device { struct drm_dsc_config *dsc; }; +/** + * struct mipi_dsi_multi_context - Context to call multiple MIPI DSI funcs in a row + */ +struct mipi_dsi_multi_context { + /** + * @dsi: Pointer to the MIPI DSI device + */ + struct mipi_dsi_device *dsi; + + /** + * @accum_err: Storage for the accumulated error over the multiple calls + * + * Init to 0. If a function encounters an error then the error code + * will be stored here. If you call a function and this points to a + * non-zero value then the function will be a noop. This allows calling + * a function many times in a row and just checking the error at the + * end to see if any of them failed. + */ + int accum_err; +}; + #define MIPI_DSI_MODULE_PREFIX "mipi-dsi:" #define to_mipi_dsi_device(__dev) container_of_const(__dev, struct mipi_dsi_device, dev) @@ -258,6 +279,8 @@ ssize_t mipi_dsi_generic_write(struct mipi_dsi_device *dsi, const void *payload, size_t size); int mipi_dsi_generic_write_chatty(struct mipi_dsi_device *dsi, const void *payload, size_t size); +void mipi_dsi_generic_write_multi(struct mipi_dsi_multi_context *ctx, + const void *payload, size_t size); ssize_t mipi_dsi_generic_read(struct mipi_dsi_device *dsi, const void *params, size_t num_params, void *data, size_t size); @@ -283,6 +306,8 @@ ssize_t mipi_dsi_dcs_write_buffer(struct mipi_dsi_device *dsi, const void *data, size_t len); int mipi_dsi_dcs_write_buffer_chatty(struct mipi_dsi_device *dsi, const void *data, size_t len); +void mipi_dsi_dcs_write_buffer_multi(struct mipi_dsi_multi_context *ctx, + const void *data, size_t len); ssize_t mipi_dsi_dcs_write(struct mipi_dsi_device *dsi, u8 cmd, const void *data, size_t len); ssize_t mipi_dsi_dcs_read(struct mipi_dsi_device *dsi, u8 cmd, void *data, @@ -319,6 +344,9 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi, * This macro will print errors for you and will RETURN FROM THE CALLING * FUNCTION (yes this is non-intuitive) upon error. * + * Because of the non-intuitive return behavior, THIS MACRO IS DEPRECATED. + * Please replace calls of it with mipi_dsi_generic_write_seq_multi(). + * * @dsi: DSI peripheral device * @seq: buffer containing the payload */ @@ -331,12 +359,30 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi, return ret; \ } while (0) +/** + * mipi_dsi_generic_write_seq_multi - transmit data using a generic write packet + * + * This macro will print errors for you and error handling is optimized for + * callers that call this multiple times in a row. + * + * @ctx: Context for multiple DSI transactions + * @seq: buffer containing the payload + */ +#define mipi_dsi_generic_write_seq_multi(ctx, seq...) \ + do { \ + static const u8 d[] = { seq }; \ + mipi_dsi_generic_write_multi(ctx, d, ARRAY_SIZE(d)); \ + } while (0) + /** * mipi_dsi_dcs_write_seq - transmit a DCS command with payload * * This macro will print errors for you and will RETURN FROM THE CALLING * FUNCTION (yes this is non-intuitive) upon error. * + * Because of the non-intuitive return behavior, THIS MACRO IS DEPRECATED. + * Please replace calls of it with mipi_dsi_dcs_write_seq_multi(). + * * @dsi: DSI peripheral device * @cmd: Command * @seq: buffer containing data to be transmitted @@ -350,6 +396,22 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi, return ret; \ } while (0) +/** + * mipi_dsi_dcs_write_seq_multi - transmit a DCS command with payload + * + * This macro will print errors for you and error handling is optimized for + * callers that call this multiple times in a row. + * + * @ctx: Context for multiple DSI transactions + * @cmd: Command + * @seq: buffer containing data to be transmitted + */ +#define mipi_dsi_dcs_write_seq_multi(ctx, cmd, seq...) \ + do { \ + static const u8 d[] = { cmd, seq }; \ + mipi_dsi_dcs_write_buffer_multi(ctx, d, ARRAY_SIZE(d)); \ + } while (0) + /** * struct mipi_dsi_driver - DSI driver * @driver: device driver model driver -- cgit v1.2.3 From 0f1bb41bf39695c84c83ce6f69e125b562d1d7ab Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 14 May 2024 15:59:39 +0100 Subject: drm/i915: Support replaying GPU hangs with captured context image When debugging GPU hangs Mesa developers are finding it useful to replay the captured error state against the simulator. But due various simulator limitations which prevent replicating all hangs, one step further is being able to replay against a real GPU. This is almost doable today with the missing part being able to upload the captured context image into the driver state prior to executing the uploaded hanging batch and all the buffers. To enable this last part we add a new context parameter called I915_CONTEXT_PARAM_CONTEXT_IMAGE. It follows the existing SSEU configuration pattern of being able to select which context to apply against, paired with the actual image and its size. Since this is adding a new concept of debug only uapi, we hide it behind a new kconfig option and also require activation with a module parameter. Together with a warning banner printed at driver load, all those combined should be sufficient to guard against inadvertently enabling the feature. In terms of implementation we allow the legacy context set param to be used since that removes the need to record the per context data in the proto context, while still allowing flexibility of specifying context images for any context. Mesa MR using the uapi can be seen at: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27594 v2: * Fix whitespace alignment as per checkpatch. * Added warning on userspace misuse. * Rebase for extracting ce->default_state shadowing. v3: * Rebase for I915_CONTEXT_PARAM_LOW_LATENCY. Signed-off-by: Tvrtko Ursulin Cc: Lionel Landwerlin Cc: Carlos Santa Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Tested-by: Carlos Santa Signed-off-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20240514145939.87427-2-tursulin@igalia.com --- include/uapi/drm/i915_drm.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index d4d86e566e07..535cb68fdb5c 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2163,6 +2163,15 @@ struct drm_i915_gem_context_param { * supports this per context flag. */ #define I915_CONTEXT_PARAM_LOW_LATENCY 0xe + +/* + * I915_CONTEXT_PARAM_CONTEXT_IMAGE: + * + * Allows userspace to provide own context images. + * + * Note that this is a debug API not available on production kernel builds. + */ +#define I915_CONTEXT_PARAM_CONTEXT_IMAGE 0xf /* Must be kept compact -- no holes and well documented */ /** @value: Context parameter value to be set or queried */ @@ -2564,6 +2573,24 @@ struct i915_context_param_engines { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ +struct i915_gem_context_param_context_image { + /** @engine: Engine class & instance to be configured. */ + struct i915_engine_class_instance engine; + + /** @flags: One of the supported flags or zero. */ + __u32 flags; +#define I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX (1u << 0) + + /** @size: Size of the image blob pointed to by @image. */ + __u32 size; + + /** @mbz: Must be zero. */ + __u32 mbz; + + /** @image: Userspace memory containing the context image. */ + __u64 image; +} __attribute__((packed)); + /** * struct drm_i915_gem_context_create_ext_setparam - Context parameter * to set or query during context creation. -- cgit v1.2.3 From f79d6d28d8fe77b14beeaebe5393d9f294f8d09d Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 12 May 2024 02:00:19 +0300 Subject: drm/mipi-dsi: wrap more functions for streamline handling Follow the pattern of mipi_dsi_dcs_*_multi() and wrap several existing MIPI DSI functions to use the context for processing. This simplifies and streamlines driver code to use simpler code pattern. Note, msleep function is also wrapped in this way as it is frequently called inbetween other mipi_dsi_dcs_*() functions. Signed-off-by: Dmitry Baryshkov Reviewed-by: Douglas Anderson Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20240512-dsi-panels-upd-api-v2-2-e31ca14d102e@linaro.org Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240512-dsi-panels-upd-api-v2-2-e31ca14d102e@linaro.org --- include/drm/drm_mipi_dsi.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index 5e9cad541bd6..bd5a0b6d0711 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -275,6 +275,13 @@ int mipi_dsi_compression_mode_ext(struct mipi_dsi_device *dsi, bool enable, int mipi_dsi_picture_parameter_set(struct mipi_dsi_device *dsi, const struct drm_dsc_picture_parameter_set *pps); +void mipi_dsi_compression_mode_ext_multi(struct mipi_dsi_multi_context *ctx, + bool enable, + enum mipi_dsi_compression_algo algo, + unsigned int pps_selector); +void mipi_dsi_picture_parameter_set_multi(struct mipi_dsi_multi_context *ctx, + const struct drm_dsc_picture_parameter_set *pps); + ssize_t mipi_dsi_generic_write(struct mipi_dsi_device *dsi, const void *payload, size_t size); int mipi_dsi_generic_write_chatty(struct mipi_dsi_device *dsi, @@ -284,6 +291,12 @@ void mipi_dsi_generic_write_multi(struct mipi_dsi_multi_context *ctx, ssize_t mipi_dsi_generic_read(struct mipi_dsi_device *dsi, const void *params, size_t num_params, void *data, size_t size); +#define mipi_dsi_msleep(ctx, delay) \ + do { \ + if (!ctx.accum_err) \ + msleep(delay); \ + } while (0) + /** * enum mipi_dsi_dcs_tear_mode - Tearing Effect Output Line mode * @MIPI_DSI_DCS_TEAR_MODE_VBLANK: the TE output line consists of V-Blanking @@ -338,6 +351,14 @@ int mipi_dsi_dcs_set_display_brightness_large(struct mipi_dsi_device *dsi, int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi, u16 *brightness); +void mipi_dsi_dcs_nop_multi(struct mipi_dsi_multi_context *ctx); +void mipi_dsi_dcs_enter_sleep_mode_multi(struct mipi_dsi_multi_context *ctx); +void mipi_dsi_dcs_exit_sleep_mode_multi(struct mipi_dsi_multi_context *ctx); +void mipi_dsi_dcs_set_display_off_multi(struct mipi_dsi_multi_context *ctx); +void mipi_dsi_dcs_set_display_on_multi(struct mipi_dsi_multi_context *ctx); +void mipi_dsi_dcs_set_tear_on_multi(struct mipi_dsi_multi_context *ctx, + enum mipi_dsi_dcs_tear_mode mode); + /** * mipi_dsi_generic_write_seq - transmit data using a generic write packet * -- cgit v1.2.3 From 36b75080e68b4a27ae1c40beffb3d6131f8eeeff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Sun, 12 May 2024 19:23:26 -0300 Subject: drm/v3d: Create a new V3D parameter for the maximum number of perfcnt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The maximum number of performance counters can change from version to version and it's important for userspace to know this value, as it needs to use the counters for performance queries. Therefore, expose the maximum number of performance counters to userspace as a parameter. Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20240512222655.2792754-4-mcanal@igalia.com --- include/uapi/drm/v3d_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index dce1835eced4..215b01bb69c3 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -286,6 +286,7 @@ enum drm_v3d_param { DRM_V3D_PARAM_SUPPORTS_PERFMON, DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT, DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE, + DRM_V3D_PARAM_MAX_PERF_COUNTERS, }; struct drm_v3d_get_param { -- cgit v1.2.3 From f33fe58298e686e7cc2d24f747c980457812b566 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Sun, 12 May 2024 19:23:27 -0300 Subject: drm/v3d: Create new IOCTL to expose performance counters information MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userspace usually needs some information about the performance counters available. Although we could replicate this information in the kernel and user-space, let's use the kernel as the "single source of truth" to avoid issues in the future (e.g. list of performance counters is updated in user-space, but not in the kernel, generating invalid requests). Therefore, create a new IOCTL to expose the performance counters information, that is name, category, and description. Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20240512222655.2792754-5-mcanal@igalia.com --- include/uapi/drm/v3d_drm.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index 215b01bb69c3..0860ddb3d0b6 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -42,6 +42,7 @@ extern "C" { #define DRM_V3D_PERFMON_DESTROY 0x09 #define DRM_V3D_PERFMON_GET_VALUES 0x0a #define DRM_V3D_SUBMIT_CPU 0x0b +#define DRM_V3D_PERFMON_GET_COUNTER 0x0c #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl) #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo) @@ -58,6 +59,8 @@ extern "C" { #define DRM_IOCTL_V3D_PERFMON_GET_VALUES DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_VALUES, \ struct drm_v3d_perfmon_get_values) #define DRM_IOCTL_V3D_SUBMIT_CPU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CPU, struct drm_v3d_submit_cpu) +#define DRM_IOCTL_V3D_PERFMON_GET_COUNTER DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_COUNTER, \ + struct drm_v3d_perfmon_get_counter) #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01 #define DRM_V3D_SUBMIT_EXTENSION 0x02 @@ -718,6 +721,40 @@ struct drm_v3d_perfmon_get_values { __u64 values_ptr; }; +#define DRM_V3D_PERFCNT_MAX_NAME 64 +#define DRM_V3D_PERFCNT_MAX_CATEGORY 32 +#define DRM_V3D_PERFCNT_MAX_DESCRIPTION 256 + +/** + * struct drm_v3d_perfmon_get_counter - ioctl to get the description of a + * performance counter + * + * As userspace needs to retrieve information about the performance counters + * available, this IOCTL allows users to get information about a performance + * counter (name, category and description). + */ +struct drm_v3d_perfmon_get_counter { + /* + * Counter ID + * + * Must be smaller than the maximum number of performance counters, which + * can be retrieve through DRM_V3D_PARAM_MAX_PERF_COUNTERS. + */ + __u8 counter; + + /* Name of the counter */ + __u8 name[DRM_V3D_PERFCNT_MAX_NAME]; + + /* Category of the counter */ + __u8 category[DRM_V3D_PERFCNT_MAX_CATEGORY]; + + /* Description of the counter */ + __u8 description[DRM_V3D_PERFCNT_MAX_DESCRIPTION]; + + /* mbz */ + __u8 reserved[7]; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 673087d8b023faf34b84e8faf63bbeea3da87bab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Sun, 12 May 2024 19:23:29 -0300 Subject: drm/v3d: Deprecate the use of the Performance Counters enum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Performance Counters enum used to identify the index of each performance counter and provide the total number of performance counters (V3D_PERFCNT_NUM). But, this enum is only valid for V3D 4.2, not for V3D 7.1. As we implemented a new flexible structure to retrieve performance counters information, we can deprecate this enum. Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20240512222655.2792754-7-mcanal@igalia.com --- include/uapi/drm/v3d_drm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index 0860ddb3d0b6..87fc5bb0a61e 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -603,6 +603,16 @@ struct drm_v3d_submit_cpu { __u64 extensions; }; +/* The performance counters index represented by this enum are deprecated and + * must no longer be used. These counters are only valid for V3D 4.2. + * + * In order to check for performance counter information, + * use DRM_IOCTL_V3D_PERFMON_GET_COUNTER. + * + * Don't use V3D_PERFCNT_NUM to retrieve the maximum number of performance + * counters. You should use DRM_IOCTL_V3D_GET_PARAM with the following + * parameter: DRM_V3D_PARAM_MAX_PERF_COUNTERS. + */ enum { V3D_PERFCNT_FEP_VALID_PRIMTS_NO_PIXELS, V3D_PERFCNT_FEP_VALID_PRIMS, -- cgit v1.2.3 From 8a0a7b98d4b6eeeab337ec25daa4bc0a5e710a15 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Thu, 7 Mar 2024 14:29:57 +0800 Subject: drm/mst: Fix NULL pointer dereference at drm_dp_add_payload_part2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] Commit: - commit 5aa1dfcdf0a4 ("drm/mst: Refactor the flow for payload allocation/removement") accidently overwrite the commit - commit 54d217406afe ("drm: use mgr->dev in drm_dbg_kms in drm_dp_add_payload_part2") which cause regression. [How] Recover the original NULL fix and remove the unnecessary input parameter 'state' for drm_dp_add_payload_part2(). Fixes: 5aa1dfcdf0a4 ("drm/mst: Refactor the flow for payload allocation/removement") Reported-by: Leon Weiß Link: https://lore.kernel.org/r/38c253ea42072cc825dc969ac4e6b9b600371cc8.camel@ruhr-uni-bochum.de/ Cc: lyude@redhat.com Cc: imre.deak@intel.com Cc: stable@vger.kernel.org Cc: regressions@lists.linux.dev Reviewed-by: Harry Wentland Acked-by: Jani Nikula Signed-off-by: Wayne Lin Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20240307062957.2323620-1-Wayne.Lin@amd.com --- include/drm/display/drm_dp_mst_helper.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h index 3546b58a121b..cfe096389d94 100644 --- a/include/drm/display/drm_dp_mst_helper.h +++ b/include/drm/display/drm_dp_mst_helper.h @@ -871,7 +871,6 @@ int drm_dp_add_payload_part1(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_topology_state *mst_state, struct drm_dp_mst_atomic_payload *payload); int drm_dp_add_payload_part2(struct drm_dp_mst_topology_mgr *mgr, - struct drm_atomic_state *state, struct drm_dp_mst_atomic_payload *payload); void drm_dp_remove_payload_part1(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_topology_state *mst_state, -- cgit v1.2.3 From 5716146295183651cfd89ae2ea46c5ef0d31faa8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2024 09:38:35 +0200 Subject: wifi: regulatory: remove extra documentation The struct member country_ie_checksum doesn't exist, so don't document it. Link: https://msgid.link/20240515093852.ebcc9673558b.Ie0b58c1249c6375c60859fa6474d7cdd8862b065@changeid Signed-off-by: Johannes Berg --- include/net/regulatory.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/regulatory.h b/include/net/regulatory.h index ebf9e028d1ef..a103f4c8cf75 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -71,8 +71,6 @@ enum environment_cap { * CRDA and can be used by other regulatory requests. When a * the last request is not yet processed we must yield until it * is processed before processing any new requests. - * @country_ie_checksum: checksum of the last processed and accepted - * country IE * @country_ie_env: lets us know if the AP is telling us we are outdoor, * indoor, or if it doesn't matter * @list: used to insert into the reg_requests_list linked list -- cgit v1.2.3 From 3bb8dce41c023e39ec4d79a83742403b5064a276 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2024 09:38:36 +0200 Subject: wifi: ieee80211: add missing doc short descriptions Some structures erroneously don't have a short description, add the missing descriptions. Link: https://msgid.link/20240515093852.16f4355e918e.I940276a4fb006ada68ab1a3e6077e3229fff0f14@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index de2dce743ee2..713266ce48a7 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1101,7 +1101,7 @@ enum ieee80211_vht_opmode_bits { }; /** - * enum ieee80211_s1g_chanwidth + * enum ieee80211_s1g_chanwidth - S1G channel widths * These are defined in IEEE802.11-2016ah Table 10-20 * as BSS Channel Width * @@ -4145,7 +4145,7 @@ enum ieee80211_idle_options { }; /** - * struct ieee80211_bss_max_idle_period_ie + * struct ieee80211_bss_max_idle_period_ie - BSS max idle period element struct * * This structure refers to "BSS Max idle period element" * @@ -4180,7 +4180,7 @@ enum ieee80211_sa_query_action { }; /** - * struct ieee80211_bssid_index + * struct ieee80211_bssid_index - multiple BSSID index element structure * * This structure refers to "Multiple BSSID-index element" * @@ -4195,7 +4195,8 @@ struct ieee80211_bssid_index { }; /** - * struct ieee80211_multiple_bssid_configuration + * struct ieee80211_multiple_bssid_configuration - multiple BSSID configuration + * element structure * * This structure refers to "Multiple BSSID Configuration element" * -- cgit v1.2.3 From 1c26f09b20bafc92e0de80e84942be77d4d3c61e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2024 09:38:37 +0200 Subject: wifi: radiotap: document ieee80211_get_radiotap_len() return value Document the return value of ieee80211_get_radiotap_len() in the proper kernel-doc format. Link: https://msgid.link/20240515093852.143aadfdb094.I8795ec1e8cfd7106d58325fb514bae92625fb45c@changeid Signed-off-by: Johannes Berg --- include/net/ieee80211_radiotap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index 925bac726a92..91762faecc13 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -582,6 +582,7 @@ enum ieee80211_radiotap_eht_usig_tb { /** * ieee80211_get_radiotap_len - get radiotap header length * @data: pointer to the header + * Return: the radiotap header length */ static inline u16 ieee80211_get_radiotap_len(const char *data) { -- cgit v1.2.3 From 54856871298c9a8717450351699a8e6fe706101c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2024 09:38:38 +0200 Subject: wifi: ieee80211: remove ieee80211_next_tbtt_present() This is actually completely equivalent to the other function ieee80211_is_s1g_short_beacon(), but open-codes the logic. Implement the necessary logic in ieee80211_is_s1g_short_beacon() and remove ieee80211_next_tbtt_present(). Link: https://msgid.link/20240515093852.774ced74dea8.I152525b4cff6e6a25be6c48fe6a4b89f17bab8a9@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 713266ce48a7..72b351abb4f6 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -604,25 +604,15 @@ static inline bool ieee80211_is_s1g_beacon(__le16 fc) } /** - * ieee80211_next_tbtt_present - check if IEEE80211_FTYPE_EXT && - * IEEE80211_STYPE_S1G_BEACON && IEEE80211_S1G_BCN_NEXT_TBTT - * @fc: frame control bytes in little-endian byteorder - */ -static inline bool ieee80211_next_tbtt_present(__le16 fc) -{ - return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == - cpu_to_le16(IEEE80211_FTYPE_EXT | IEEE80211_STYPE_S1G_BEACON) && - fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT); -} - -/** - * ieee80211_is_s1g_short_beacon - check if next tbtt present bit is set. Only - * true for S1G beacons when they're short. + * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an S1G short beacon, + * i.e. it is an S1G beacon with 'next TBTT' flag set */ static inline bool ieee80211_is_s1g_short_beacon(__le16 fc) { - return ieee80211_is_s1g_beacon(fc) && ieee80211_next_tbtt_present(fc); + return ieee80211_is_s1g_beacon(fc) && + (fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT)); } /** -- cgit v1.2.3 From 3c75e99c7036a87f159dbd526060554afb3482f5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2024 09:38:39 +0200 Subject: wifi: ieee80211: document function return values These are all missing, as pointed out when running kernel-doc. Add return value documentation and fix some small things while at it. Link: https://msgid.link/20240515093852.1cd5ad8f354d.Idc16e9767fa42de80b659c32efc58aea38c26996@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 106 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 84 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 72b351abb4f6..595f83783f0e 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -373,6 +373,7 @@ struct ieee80211_trigger { /** * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame has to-DS set */ static inline bool ieee80211_has_tods(__le16 fc) { @@ -382,6 +383,7 @@ static inline bool ieee80211_has_tods(__le16 fc) /** * ieee80211_has_fromds - check if IEEE80211_FCTL_FROMDS is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame has from-DS set */ static inline bool ieee80211_has_fromds(__le16 fc) { @@ -391,6 +393,7 @@ static inline bool ieee80211_has_fromds(__le16 fc) /** * ieee80211_has_a4 - check if IEEE80211_FCTL_TODS and IEEE80211_FCTL_FROMDS are set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not it's a 4-address frame (from-DS and to-DS set) */ static inline bool ieee80211_has_a4(__le16 fc) { @@ -401,6 +404,7 @@ static inline bool ieee80211_has_a4(__le16 fc) /** * ieee80211_has_morefrags - check if IEEE80211_FCTL_MOREFRAGS is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame has more fragments (more frags bit set) */ static inline bool ieee80211_has_morefrags(__le16 fc) { @@ -410,6 +414,7 @@ static inline bool ieee80211_has_morefrags(__le16 fc) /** * ieee80211_has_retry - check if IEEE80211_FCTL_RETRY is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the retry flag is set */ static inline bool ieee80211_has_retry(__le16 fc) { @@ -419,6 +424,7 @@ static inline bool ieee80211_has_retry(__le16 fc) /** * ieee80211_has_pm - check if IEEE80211_FCTL_PM is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the power management flag is set */ static inline bool ieee80211_has_pm(__le16 fc) { @@ -428,6 +434,7 @@ static inline bool ieee80211_has_pm(__le16 fc) /** * ieee80211_has_moredata - check if IEEE80211_FCTL_MOREDATA is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the more data flag is set */ static inline bool ieee80211_has_moredata(__le16 fc) { @@ -437,6 +444,7 @@ static inline bool ieee80211_has_moredata(__le16 fc) /** * ieee80211_has_protected - check if IEEE80211_FCTL_PROTECTED is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the protected flag is set */ static inline bool ieee80211_has_protected(__le16 fc) { @@ -446,6 +454,7 @@ static inline bool ieee80211_has_protected(__le16 fc) /** * ieee80211_has_order - check if IEEE80211_FCTL_ORDER is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the order flag is set */ static inline bool ieee80211_has_order(__le16 fc) { @@ -455,6 +464,7 @@ static inline bool ieee80211_has_order(__le16 fc) /** * ieee80211_is_mgmt - check if type is IEEE80211_FTYPE_MGMT * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame type is management */ static inline bool ieee80211_is_mgmt(__le16 fc) { @@ -465,6 +475,7 @@ static inline bool ieee80211_is_mgmt(__le16 fc) /** * ieee80211_is_ctl - check if type is IEEE80211_FTYPE_CTL * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame type is control */ static inline bool ieee80211_is_ctl(__le16 fc) { @@ -475,6 +486,7 @@ static inline bool ieee80211_is_ctl(__le16 fc) /** * ieee80211_is_data - check if type is IEEE80211_FTYPE_DATA * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a data frame */ static inline bool ieee80211_is_data(__le16 fc) { @@ -485,6 +497,7 @@ static inline bool ieee80211_is_data(__le16 fc) /** * ieee80211_is_ext - check if type is IEEE80211_FTYPE_EXT * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame type is extended */ static inline bool ieee80211_is_ext(__le16 fc) { @@ -496,6 +509,7 @@ static inline bool ieee80211_is_ext(__le16 fc) /** * ieee80211_is_data_qos - check if type is IEEE80211_FTYPE_DATA and IEEE80211_STYPE_QOS_DATA is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a QoS data frame */ static inline bool ieee80211_is_data_qos(__le16 fc) { @@ -510,6 +524,8 @@ static inline bool ieee80211_is_data_qos(__le16 fc) /** * ieee80211_is_data_present - check if type is IEEE80211_FTYPE_DATA and has data * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a QoS data frame that has data + * (i.e. is not null data) */ static inline bool ieee80211_is_data_present(__le16 fc) { @@ -524,6 +540,7 @@ static inline bool ieee80211_is_data_present(__le16 fc) /** * ieee80211_is_assoc_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ASSOC_REQ * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an association request */ static inline bool ieee80211_is_assoc_req(__le16 fc) { @@ -534,6 +551,7 @@ static inline bool ieee80211_is_assoc_req(__le16 fc) /** * ieee80211_is_assoc_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ASSOC_RESP * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an association response */ static inline bool ieee80211_is_assoc_resp(__le16 fc) { @@ -544,6 +562,7 @@ static inline bool ieee80211_is_assoc_resp(__le16 fc) /** * ieee80211_is_reassoc_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_REASSOC_REQ * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a reassociation request */ static inline bool ieee80211_is_reassoc_req(__le16 fc) { @@ -554,6 +573,7 @@ static inline bool ieee80211_is_reassoc_req(__le16 fc) /** * ieee80211_is_reassoc_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_REASSOC_RESP * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a reassociation response */ static inline bool ieee80211_is_reassoc_resp(__le16 fc) { @@ -564,6 +584,7 @@ static inline bool ieee80211_is_reassoc_resp(__le16 fc) /** * ieee80211_is_probe_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_PROBE_REQ * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a probe request */ static inline bool ieee80211_is_probe_req(__le16 fc) { @@ -574,6 +595,7 @@ static inline bool ieee80211_is_probe_req(__le16 fc) /** * ieee80211_is_probe_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_PROBE_RESP * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a probe response */ static inline bool ieee80211_is_probe_resp(__le16 fc) { @@ -584,6 +606,7 @@ static inline bool ieee80211_is_probe_resp(__le16 fc) /** * ieee80211_is_beacon - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_BEACON * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a (regular, not S1G) beacon */ static inline bool ieee80211_is_beacon(__le16 fc) { @@ -595,6 +618,7 @@ static inline bool ieee80211_is_beacon(__le16 fc) * ieee80211_is_s1g_beacon - check if IEEE80211_FTYPE_EXT && * IEEE80211_STYPE_S1G_BEACON * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an S1G beacon */ static inline bool ieee80211_is_s1g_beacon(__le16 fc) { @@ -618,6 +642,7 @@ static inline bool ieee80211_is_s1g_short_beacon(__le16 fc) /** * ieee80211_is_atim - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ATIM * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an ATIM frame */ static inline bool ieee80211_is_atim(__le16 fc) { @@ -628,6 +653,7 @@ static inline bool ieee80211_is_atim(__le16 fc) /** * ieee80211_is_disassoc - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_DISASSOC * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a disassociation frame */ static inline bool ieee80211_is_disassoc(__le16 fc) { @@ -638,6 +664,7 @@ static inline bool ieee80211_is_disassoc(__le16 fc) /** * ieee80211_is_auth - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_AUTH * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an authentication frame */ static inline bool ieee80211_is_auth(__le16 fc) { @@ -648,6 +675,7 @@ static inline bool ieee80211_is_auth(__le16 fc) /** * ieee80211_is_deauth - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_DEAUTH * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a deauthentication frame */ static inline bool ieee80211_is_deauth(__le16 fc) { @@ -658,6 +686,7 @@ static inline bool ieee80211_is_deauth(__le16 fc) /** * ieee80211_is_action - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ACTION * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an action frame */ static inline bool ieee80211_is_action(__le16 fc) { @@ -668,6 +697,7 @@ static inline bool ieee80211_is_action(__le16 fc) /** * ieee80211_is_back_req - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_BACK_REQ * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a block-ACK request frame */ static inline bool ieee80211_is_back_req(__le16 fc) { @@ -678,6 +708,7 @@ static inline bool ieee80211_is_back_req(__le16 fc) /** * ieee80211_is_back - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_BACK * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a block-ACK frame */ static inline bool ieee80211_is_back(__le16 fc) { @@ -688,6 +719,7 @@ static inline bool ieee80211_is_back(__le16 fc) /** * ieee80211_is_pspoll - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_PSPOLL * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a PS-poll frame */ static inline bool ieee80211_is_pspoll(__le16 fc) { @@ -698,6 +730,7 @@ static inline bool ieee80211_is_pspoll(__le16 fc) /** * ieee80211_is_rts - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_RTS * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an RTS frame */ static inline bool ieee80211_is_rts(__le16 fc) { @@ -708,6 +741,7 @@ static inline bool ieee80211_is_rts(__le16 fc) /** * ieee80211_is_cts - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CTS * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a CTS frame */ static inline bool ieee80211_is_cts(__le16 fc) { @@ -718,6 +752,7 @@ static inline bool ieee80211_is_cts(__le16 fc) /** * ieee80211_is_ack - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_ACK * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an ACK frame */ static inline bool ieee80211_is_ack(__le16 fc) { @@ -728,6 +763,7 @@ static inline bool ieee80211_is_ack(__le16 fc) /** * ieee80211_is_cfend - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CFEND * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a CF-end frame */ static inline bool ieee80211_is_cfend(__le16 fc) { @@ -738,6 +774,7 @@ static inline bool ieee80211_is_cfend(__le16 fc) /** * ieee80211_is_cfendack - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CFENDACK * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a CF-end-ack frame */ static inline bool ieee80211_is_cfendack(__le16 fc) { @@ -748,6 +785,7 @@ static inline bool ieee80211_is_cfendack(__le16 fc) /** * ieee80211_is_nullfunc - check if frame is a regular (non-QoS) nullfunc frame * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a nullfunc frame */ static inline bool ieee80211_is_nullfunc(__le16 fc) { @@ -758,6 +796,7 @@ static inline bool ieee80211_is_nullfunc(__le16 fc) /** * ieee80211_is_qos_nullfunc - check if frame is a QoS nullfunc frame * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a QoS nullfunc frame */ static inline bool ieee80211_is_qos_nullfunc(__le16 fc) { @@ -768,6 +807,7 @@ static inline bool ieee80211_is_qos_nullfunc(__le16 fc) /** * ieee80211_is_trigger - check if frame is trigger frame * @fc: frame control field in little-endian byteorder + * Return: whether or not the frame is a trigger frame */ static inline bool ieee80211_is_trigger(__le16 fc) { @@ -778,6 +818,7 @@ static inline bool ieee80211_is_trigger(__le16 fc) /** * ieee80211_is_any_nullfunc - check if frame is regular or QoS nullfunc frame * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a nullfunc or QoS nullfunc frame */ static inline bool ieee80211_is_any_nullfunc(__le16 fc) { @@ -787,6 +828,8 @@ static inline bool ieee80211_is_any_nullfunc(__le16 fc) /** * ieee80211_is_first_frag - check if IEEE80211_SCTL_FRAG is not set * @seq_ctrl: frame sequence control bytes in little-endian byteorder + * Return: whether or not the frame is the first fragment (also true if + * it's not fragmented at all) */ static inline bool ieee80211_is_first_frag(__le16 seq_ctrl) { @@ -796,6 +839,7 @@ static inline bool ieee80211_is_first_frag(__le16 seq_ctrl) /** * ieee80211_is_frag - check if a frame is a fragment * @hdr: 802.11 header of the frame + * Return: whether or not the frame is a fragment */ static inline bool ieee80211_is_frag(struct ieee80211_hdr *hdr) { @@ -2349,6 +2393,8 @@ struct ieee80211_eht_operation_info { * @max_vht_nss: current maximum NSS as advertised by the STA in * operating mode notification, can be 0 in which case the * capability data will be used to derive this (from MCS support) + * Return: The maximum NSS that can be used for the given bandwidth/MCS + * combination * * Due to the VHT Extended NSS Bandwidth Support, the maximum NSS can * vary for a given BW/MCS. This function parses the data. @@ -4317,6 +4363,7 @@ struct ieee80211_he_6ghz_capa { /** * ieee80211_get_qos_ctl - get pointer to qos control bytes * @hdr: the frame + * Return: a pointer to the QoS control field in the frame header * * The qos ctrl bytes come after the frame_control, duration, seq_num * and 3 or 4 addresses of length ETH_ALEN. Checks frame_control to choose @@ -4339,6 +4386,7 @@ static inline u8 *ieee80211_get_qos_ctl(struct ieee80211_hdr *hdr) /** * ieee80211_get_tid - get qos TID * @hdr: the frame + * Return: the TID from the QoS control field */ static inline u8 ieee80211_get_tid(struct ieee80211_hdr *hdr) { @@ -4350,6 +4398,7 @@ static inline u8 ieee80211_get_tid(struct ieee80211_hdr *hdr) /** * ieee80211_get_SA - get pointer to SA * @hdr: the frame + * Return: a pointer to the source address (SA) * * Given an 802.11 frame, this function returns the offset * to the source address (SA). It does not verify that the @@ -4369,6 +4418,7 @@ static inline u8 *ieee80211_get_SA(struct ieee80211_hdr *hdr) /** * ieee80211_get_DA - get pointer to DA * @hdr: the frame + * Return: a pointer to the destination address (DA) * * Given an 802.11 frame, this function returns the offset * to the destination address (DA). It does not verify that @@ -4387,6 +4437,7 @@ static inline u8 *ieee80211_get_DA(struct ieee80211_hdr *hdr) /** * ieee80211_is_bufferable_mmpdu - check if frame is bufferable MMPDU * @skb: the skb to check, starting with the 802.11 header + * Return: whether or not the MMPDU is bufferable */ static inline bool ieee80211_is_bufferable_mmpdu(struct sk_buff *skb) { @@ -4425,6 +4476,7 @@ static inline bool ieee80211_is_bufferable_mmpdu(struct sk_buff *skb) /** * _ieee80211_is_robust_mgmt_frame - check if frame is a robust management frame * @hdr: the frame (buffer must include at least the first octet of payload) + * Return: whether or not the frame is a robust management frame */ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr) { @@ -4461,6 +4513,7 @@ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr) /** * ieee80211_is_robust_mgmt_frame - check if skb contains a robust mgmt frame * @skb: the skb containing the frame, length will be checked + * Return: whether or not the frame is a robust management frame */ static inline bool ieee80211_is_robust_mgmt_frame(struct sk_buff *skb) { @@ -4473,6 +4526,7 @@ static inline bool ieee80211_is_robust_mgmt_frame(struct sk_buff *skb) * ieee80211_is_public_action - check if frame is a public action frame * @hdr: the frame * @len: length of the frame + * Return: whether or not the frame is a public action frame */ static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr, size_t len) @@ -4518,8 +4572,9 @@ ieee80211_is_protected_dual_of_public_action(struct sk_buff *skb) /** * _ieee80211_is_group_privacy_action - check if frame is a group addressed - * privacy action frame + * privacy action frame * @hdr: the frame + * Return: whether or not the frame is a group addressed privacy action frame */ static inline bool _ieee80211_is_group_privacy_action(struct ieee80211_hdr *hdr) { @@ -4535,8 +4590,9 @@ static inline bool _ieee80211_is_group_privacy_action(struct ieee80211_hdr *hdr) /** * ieee80211_is_group_privacy_action - check if frame is a group addressed - * privacy action frame + * privacy action frame * @skb: the skb containing the frame, length will be checked + * Return: whether or not the frame is a group addressed privacy action frame */ static inline bool ieee80211_is_group_privacy_action(struct sk_buff *skb) { @@ -4548,6 +4604,7 @@ static inline bool ieee80211_is_group_privacy_action(struct sk_buff *skb) /** * ieee80211_tu_to_usec - convert time units (TU) to microseconds * @tu: the TUs + * Return: the time value converted to microseconds */ static inline unsigned long ieee80211_tu_to_usec(unsigned long tu) { @@ -4559,6 +4616,7 @@ static inline unsigned long ieee80211_tu_to_usec(unsigned long tu) * @tim: the TIM IE * @tim_len: length of the TIM IE * @aid: the AID to look for + * Return: whether or not traffic is indicated in the TIM for the given AID */ static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim, u8 tim_len, u16 aid) @@ -4585,8 +4643,10 @@ static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim, } /** - * ieee80211_get_tdls_action - get tdls packet action (or -1, if not tdls packet) + * ieee80211_get_tdls_action - get TDLS action code * @skb: the skb containing the frame, length will not be checked + * Return: the TDLS action code, or -1 if it's not an encapsulated TDLS action + * frame * * This function assumes the frame is a data frame, and that the network header * is in the correct place. @@ -4626,6 +4686,7 @@ static inline int ieee80211_get_tdls_action(struct sk_buff *skb) /** * ieee80211_action_contains_tpc - checks if the frame contains TPC element * @skb: the skb containing the frame, length will be checked + * Return: %true if the frame contains a TPC element, %false otherwise * * This function checks if it's either TPC report action frame or Link * Measurement report action frame as defined in IEEE Std. 802.11-2012 8.5.2.5 @@ -4743,6 +4804,7 @@ struct element { * @element: element pointer after for_each_element() or friends * @data: same data pointer as passed to for_each_element() or friends * @datalen: same data length as passed to for_each_element() or friends + * Return: %true if all elements were iterated, %false otherwise; see notes * * This function returns %true if all the data was parsed or considered * while walking the elements. Only use this if your for_each_element() @@ -4946,6 +5008,7 @@ struct ieee80211_mle_tdls_common_info { * ieee80211_mle_common_size - check multi-link element common size * @data: multi-link element, must already be checked for size using * ieee80211_mle_size_ok() + * Return: the size of the multi-link element's "common" subfield */ static inline u8 ieee80211_mle_common_size(const u8 *data) { @@ -4978,11 +5041,10 @@ static inline u8 ieee80211_mle_common_size(const u8 *data) /** * ieee80211_mle_get_link_id - returns the link ID * @data: the basic multi link element + * Return: the link ID, or -1 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). - * - * If the BSS link ID can't be found, -1 will be returned */ static inline int ieee80211_mle_get_link_id(const u8 *data) { @@ -5002,12 +5064,10 @@ static inline int ieee80211_mle_get_link_id(const u8 *data) /** * ieee80211_mle_get_bss_param_ch_cnt - returns the BSS parameter change count * @data: pointer to the basic multi link element + * Return: the BSS Parameter Change Count field value, or -1 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). - * - * If the BSS parameter change count value can't be found (the presence bit - * for it is clear), -1 will be returned. */ static inline int ieee80211_mle_get_bss_param_ch_cnt(const u8 *data) @@ -5030,13 +5090,13 @@ ieee80211_mle_get_bss_param_ch_cnt(const u8 *data) /** * ieee80211_mle_get_eml_med_sync_delay - returns the medium sync delay - * @data: pointer to the multi link EHT IE + * @data: pointer to the multi-link element + * Return: the medium synchronization delay field value from the multi-link + * element, or the default value (%IEEE80211_MED_SYNC_DELAY_DEFAULT) + * if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). - * - * If the medium synchronization is not present, then the default value is - * returned. */ static inline u16 ieee80211_mle_get_eml_med_sync_delay(const u8 *data) { @@ -5060,12 +5120,12 @@ static inline u16 ieee80211_mle_get_eml_med_sync_delay(const u8 *data) /** * ieee80211_mle_get_eml_cap - returns the EML capability - * @data: pointer to the multi link EHT IE + * @data: pointer to the multi-link element + * Return: the EML capability field value from the multi-link element, + * or 0 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). - * - * If the EML capability is not present, 0 will be returned. */ static inline u16 ieee80211_mle_get_eml_cap(const u8 *data) { @@ -5091,13 +5151,12 @@ static inline u16 ieee80211_mle_get_eml_cap(const u8 *data) /** * ieee80211_mle_get_mld_capa_op - returns the MLD capabilities and operations. - * @data: pointer to the multi link EHT IE + * @data: pointer to the multi-link element + * Return: the MLD capabilities and operations field value from the multi-link + * element, or 0 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). - * - * If the MLD capabilities and operations field is not present, 0 will be - * returned. */ static inline u16 ieee80211_mle_get_mld_capa_op(const u8 *data) { @@ -5128,12 +5187,11 @@ static inline u16 ieee80211_mle_get_mld_capa_op(const u8 *data) /** * ieee80211_mle_get_mld_id - returns the MLD ID - * @data: pointer to the multi link element + * @data: pointer to the multi-link element + * Return: The MLD ID in the given multi-link element, or 0 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). - * - * If the MLD ID is not present, 0 will be returned. */ static inline u8 ieee80211_mle_get_mld_id(const u8 *data) { @@ -5168,6 +5226,7 @@ static inline u8 ieee80211_mle_get_mld_id(const u8 *data) * ieee80211_mle_size_ok - validate multi-link element size * @data: pointer to the element data * @len: length of the containing element + * Return: whether or not the multi-link element size is OK */ static inline bool ieee80211_mle_size_ok(const u8 *data, size_t len) { @@ -5237,6 +5296,7 @@ static inline bool ieee80211_mle_size_ok(const u8 *data, size_t len) * @data: pointer to the element data * @type: expected type of the element * @len: length of the containing element + * Return: whether or not the multi-link element type matches and size is OK */ static inline bool ieee80211_mle_type_ok(const u8 *data, u8 type, size_t len) { @@ -5280,6 +5340,7 @@ struct ieee80211_mle_per_sta_profile { * profile size * @data: pointer to the sub element data * @len: length of the containing sub element + * Return: %true if the STA profile is large enough, %false otherwise */ static inline bool ieee80211_mle_basic_sta_prof_size_ok(const u8 *data, size_t len) @@ -5364,6 +5425,7 @@ ieee80211_mle_basic_sta_prof_bss_param_ch_cnt(const struct ieee80211_mle_per_sta * element sta profile size. * @data: pointer to the sub element data * @len: length of the containing sub element + * Return: %true if the STA profile is large enough, %false otherwise */ static inline bool ieee80211_mle_reconf_sta_prof_size_ok(const u8 *data, size_t len) -- cgit v1.2.3 From 9d222c12834d80849afbad7b42822b8ffbbc025f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2024 09:38:40 +0200 Subject: wifi: ieee80211: document two FTM related functions Add some documentation to ieee80211_is_timing_measurement() and ieee80211_is_ftm(). Link: https://msgid.link/20240515093852.229aa69e972c.Ifae6762a698e79cd5a49a055fe4c32330e826200@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 595f83783f0e..fb50a99daa93 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -4731,6 +4731,11 @@ static inline bool ieee80211_action_contains_tpc(struct sk_buff *skb) return true; } +/** + * ieee80211_is_timing_measurement - check if frame is timing measurement response + * @skb: the SKB to check + * Return: whether or not the frame is a valid timing measurement response + */ static inline bool ieee80211_is_timing_measurement(struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *)skb->data; @@ -4750,6 +4755,11 @@ static inline bool ieee80211_is_timing_measurement(struct sk_buff *skb) return false; } +/** + * ieee80211_is_ftm - check if frame is FTM response + * @skb: the SKB to check + * Return: whether or not the frame is a valid FTM response action frame + */ static inline bool ieee80211_is_ftm(struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *)skb->data; -- cgit v1.2.3 From 8592fd7ccc95a7cf222985e6297041463e8b4e9d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:37:54 +0200 Subject: wifi: ieee80211/ath11k: remove IEEE80211_MAX_NUM_PWR_LEVEL The define IEEE80211_MAX_NUM_PWR_LEVEL doesn't make much sense. Yes, that table has a maximum value of 8, and the table will actually remain that way, but EHT introduced a way to encode more levels for 320 MHz channels. Remove IEEE80211_MAX_NUM_PWR_LEVEL and, for ath11k being the only user, add ATH11K_NUM_PWR_LEVELS, where it makes sense since it cannot support 320 MHz channels. Acked-by: Kalle Valo Link: https://msgid.link/20240506214536.9818e5471055.Icece7e47e963d6b68e0d97ba13c102b37fbaa689@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index fb50a99daa93..1c3a683a3ee2 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2833,11 +2833,6 @@ struct ieee80211_he_6ghz_oper { * So it it totally max 8 Transmit Power Envelope element. */ #define IEEE80211_TPE_MAX_IE_COUNT 8 -/* - * In "Table 9-277—Meaning of Maximum Transmit Power Count subfield" - * of "IEEE Std 802.11ax™‐2021", the max power level is 8. - */ -#define IEEE80211_MAX_NUM_PWR_LEVEL 8 #define IEEE80211_TPE_MAX_POWER_COUNT 8 -- cgit v1.2.3 From 39dc8b8ea387ce7f4fe2d2d6d550ed52aa9aa040 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:37:56 +0200 Subject: wifi: mac80211: pass parsed TPE data to drivers Instead of passing the full TPE elements, in all their glory and mixed up data formats for HE backward compatibility, parse them fully into the right values, and pass that to the drivers. Also introduce proper validation already in mac80211, so that drivers don't need to do it, and parse the EHT portions. The code now passes the values in the right order according to the channel used by an interface, which could also be a subset of the data advertised by the AP, if we couldn't connect with the full bandwidth (for whatever reason.) Also add kunit tests for the more complicated bits of it. Reviewed-by: Miriam Rachel Korenblit Acked-by: Kalle Valo Link: https://msgid.link/20240506214536.2aa839969b60.I265b28209e0b29772b2f125f7f83de44a4da877b@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 102 +++++++++++++++++++++++++++++++++++++++------- include/net/mac80211.h | 41 +++++++++++++++++-- 2 files changed, 124 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 1c3a683a3ee2..769008a51809 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2825,17 +2825,6 @@ struct ieee80211_he_6ghz_oper { u8 minrate; } __packed; -/* - * In "9.4.2.161 Transmit Power Envelope element" of "IEEE Std 802.11ax-2021", - * it show four types in "Table 9-275a-Maximum Transmit Power Interpretation - * subfield encoding", and two category for each type in "Table E-12-Regulatory - * Info subfield encoding in the United States". - * So it it totally max 8 Transmit Power Envelope element. - */ -#define IEEE80211_TPE_MAX_IE_COUNT 8 - -#define IEEE80211_TPE_MAX_POWER_COUNT 8 - /* transmit power interpretation type of transmit power envelope element */ enum ieee80211_tx_power_intrpt_type { IEEE80211_TPE_LOCAL_EIRP, @@ -2844,24 +2833,107 @@ enum ieee80211_tx_power_intrpt_type { IEEE80211_TPE_REG_CLIENT_EIRP_PSD, }; +/* category type of transmit power envelope element */ +enum ieee80211_tx_power_category_6ghz { + IEEE80211_TPE_CAT_6GHZ_DEFAULT = 0, + IEEE80211_TPE_CAT_6GHZ_SUBORDINATE = 1, +}; + +/* + * For IEEE80211_TPE_LOCAL_EIRP / IEEE80211_TPE_REG_CLIENT_EIRP, + * setting to 63.5 dBm means no constraint. + */ +#define IEEE80211_TPE_MAX_TX_PWR_NO_CONSTRAINT 127 + +/* + * For IEEE80211_TPE_LOCAL_EIRP_PSD / IEEE80211_TPE_REG_CLIENT_EIRP_PSD, + * setting to 127 indicates no PSD limit for the 20 MHz channel. + */ +#define IEEE80211_TPE_PSD_NO_LIMIT 127 + /** * struct ieee80211_tx_pwr_env - Transmit Power Envelope - * @tx_power_info: Transmit Power Information field - * @tx_power: Maximum Transmit Power field + * @info: Transmit Power Information field + * @variable: Maximum Transmit Power field * * This structure represents the payload of the "Transmit Power * Envelope element" as described in IEEE Std 802.11ax-2021 section * 9.4.2.161 */ struct ieee80211_tx_pwr_env { - u8 tx_power_info; - s8 tx_power[IEEE80211_TPE_MAX_POWER_COUNT]; + u8 info; + u8 variable[]; } __packed; #define IEEE80211_TX_PWR_ENV_INFO_COUNT 0x7 #define IEEE80211_TX_PWR_ENV_INFO_INTERPRET 0x38 #define IEEE80211_TX_PWR_ENV_INFO_CATEGORY 0xC0 +#define IEEE80211_TX_PWR_ENV_EXT_COUNT 0xF + +static inline bool ieee80211_valid_tpe_element(const u8 *data, u8 len) +{ + const struct ieee80211_tx_pwr_env *env = (const void *)data; + u8 count, interpret, category; + u8 needed = sizeof(*env); + u8 N; /* also called N in the spec */ + + if (len < needed) + return false; + + count = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_COUNT); + interpret = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_INTERPRET); + category = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_CATEGORY); + + switch (category) { + case IEEE80211_TPE_CAT_6GHZ_DEFAULT: + case IEEE80211_TPE_CAT_6GHZ_SUBORDINATE: + break; + default: + return false; + } + + switch (interpret) { + case IEEE80211_TPE_LOCAL_EIRP: + case IEEE80211_TPE_REG_CLIENT_EIRP: + if (count > 3) + return false; + + /* count == 0 encodes 1 value for 20 MHz, etc. */ + needed += count + 1; + + if (len < needed) + return false; + + /* there can be extension fields not accounted for in 'count' */ + + return true; + case IEEE80211_TPE_LOCAL_EIRP_PSD: + case IEEE80211_TPE_REG_CLIENT_EIRP_PSD: + if (count > 4) + return false; + + N = count ? 1 << (count - 1) : 1; + needed += N; + + if (len < needed) + return false; + + if (len > needed) { + u8 K = u8_get_bits(env->variable[N], + IEEE80211_TX_PWR_ENV_EXT_COUNT); + + needed += 1 + K; + if (len < needed) + return false; + } + + return true; + } + + return false; +} + /* * ieee80211_he_oper_size - calculate 802.11ax HE Operations IE size * @he_oper_ie: byte data of the He Operations IE, stating from the byte diff --git a/include/net/mac80211.h b/include/net/mac80211.h index cafc664ee531..a4efbfb8d796 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -550,6 +550,39 @@ struct ieee80211_fils_discovery { u32 max_interval; }; +#define IEEE80211_TPE_EIRP_ENTRIES_320MHZ 5 +struct ieee80211_parsed_tpe_eirp { + bool valid; + s8 power[IEEE80211_TPE_EIRP_ENTRIES_320MHZ]; + u8 count; +}; + +#define IEEE80211_TPE_PSD_ENTRIES_320MHZ 16 +struct ieee80211_parsed_tpe_psd { + bool valid; + s8 power[IEEE80211_TPE_PSD_ENTRIES_320MHZ]; + u8 count, n; +}; + +/** + * struct ieee80211_parsed_tpe - parsed transmit power envelope information + * @max_local: maximum local EIRP, one value for 20, 40, 80, 160, 320 MHz each + * (indexed by TX power category) + * @max_reg_client: maximum regulatory client EIRP, one value for 20, 40, 80, + * 160, 320 MHz each + * (indexed by TX power category) + * @psd_local: maximum local power spectral density, one value for each 20 MHz + * subchannel per bss_conf's chanreq.oper + * (indexed by TX power category) + * @psd_reg_client: maximum regulatory power spectral density, one value for + * each 20 MHz subchannel per bss_conf's chanreq.oper + * (indexed by TX power category) + */ +struct ieee80211_parsed_tpe { + struct ieee80211_parsed_tpe_eirp max_local[2], max_reg_client[2]; + struct ieee80211_parsed_tpe_psd psd_local[2], psd_reg_client[2]; +}; + /** * struct ieee80211_bss_conf - holds the BSS's changing parameters * @@ -662,8 +695,7 @@ struct ieee80211_fils_discovery { * @beacon_tx_rate: The configured beacon transmit rate that needs to be passed * to driver when rate control is offloaded to firmware. * @power_type: power type of BSS for 6 GHz - * @tx_pwr_env: transmit power envelope array of BSS. - * @tx_pwr_env_num: number of @tx_pwr_env. + * @tpe: transmit power envelope information * @pwr_reduction: power constraint of BSS. * @eht_support: does this BSS support EHT * @csa_active: marks whether a channel switch is going on. @@ -766,8 +798,9 @@ struct ieee80211_bss_conf { u32 unsol_bcast_probe_resp_interval; struct cfg80211_bitrate_mask beacon_tx_rate; enum ieee80211_ap_reg_power power_type; - struct ieee80211_tx_pwr_env tx_pwr_env[IEEE80211_TPE_MAX_IE_COUNT]; - u8 tx_pwr_env_num; + + struct ieee80211_parsed_tpe tpe; + u8 pwr_reduction; bool eht_support; -- cgit v1.2.3 From 5a009b42e0418d30b3ffaff2f46c534cd79b3f23 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:37:57 +0200 Subject: wifi: mac80211: track changes in AP's TPE If the TPE (transmit power envelope) is changed, detect and report that to the driver. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240506214536.103dda923f45.I990877e409ab8eade9ed7c172272e0cae57256cf@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a4efbfb8d796..a59eacfe0480 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -362,6 +362,7 @@ struct ieee80211_vif_chanctx_switch { * status changed. * @BSS_CHANGED_MLD_VALID_LINKS: MLD valid links status changed. * @BSS_CHANGED_MLD_TTLM: negotiated TID to link mapping was changed + * @BSS_CHANGED_TPE: transmit power envelope changed */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -398,6 +399,7 @@ enum ieee80211_bss_change { BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31, BSS_CHANGED_MLD_VALID_LINKS = BIT_ULL(33), BSS_CHANGED_MLD_TTLM = BIT_ULL(34), + BSS_CHANGED_TPE = BIT_ULL(35), /* when adding here, make sure to change ieee80211_reconfig */ }; -- cgit v1.2.3 From f9a0757a4b2f5df4376963c25a3d7d7aeba78444 Mon Sep 17 00:00:00 2001 From: Pradeep Kumar Chitrapu Date: Wed, 15 May 2024 11:13:18 -0700 Subject: wifi: mac80211: Add EHT UL MU-MIMO flag in ieee80211_bss_conf Add flag for Full Bandwidth UL MU-MIMO for EHT. This is utilized to pass EHT MU-MIMO configurations from user space to driver in AP mode. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Signed-off-by: Pradeep Kumar Chitrapu Link: https://msgid.link/20240515181327.12855-2-quic_pradeepc@quicinc.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a59eacfe0480..ecfa65ade226 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -735,6 +735,9 @@ struct ieee80211_parsed_tpe { * beamformee * @eht_mu_beamformer: in AP-mode, does this BSS enable operation as an EHT MU * beamformer + * @eht_80mhz_full_bw_ul_mumimo: in AP-mode, does this BSS support the + * reception of an EHT TB PPDU on an RU that spans the entire PPDU + * bandwidth */ struct ieee80211_bss_conf { struct ieee80211_vif *vif; @@ -828,6 +831,7 @@ struct ieee80211_bss_conf { bool eht_su_beamformer; bool eht_su_beamformee; bool eht_mu_beamformer; + bool eht_80mhz_full_bw_ul_mumimo; }; /** -- cgit v1.2.3 From aa4ec06c455d0200eea0a4361cc58eb5b8bf68c4 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Fri, 17 May 2024 18:33:31 +0300 Subject: wifi: cfg80211: use __counted_by where appropriate Annotate 'sub_specs' of 'struct cfg80211_sar_specs', 'channels' of 'struct cfg80211_sched_scan_request', 'channels' of 'struct cfg80211_wowlan_nd_match', and 'matches' of 'struct cfg80211_wowlan_nd_info' with '__counted_by' attribute. Briefly tested with clang 18.1.1 and CONFIG_UBSAN_BOUNDS running iwlwifi. Signed-off-by: Dmitry Antipov Link: https://msgid.link/20240517153332.18271-1-dmantipov@yandex.ru Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index cbf1664dc569..d79180bec7a1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2200,7 +2200,7 @@ struct cfg80211_sar_sub_specs { struct cfg80211_sar_specs { enum nl80211_sar_type type; u32 num_sub_specs; - struct cfg80211_sar_sub_specs sub_specs[]; + struct cfg80211_sar_sub_specs sub_specs[] __counted_by(num_sub_specs); }; @@ -2838,7 +2838,7 @@ struct cfg80211_sched_scan_request { struct list_head list; /* keep last */ - struct ieee80211_channel *channels[]; + struct ieee80211_channel *channels[] __counted_by(n_channels); }; /** @@ -3582,7 +3582,7 @@ struct cfg80211_coalesce { struct cfg80211_wowlan_nd_match { struct cfg80211_ssid ssid; int n_channels; - u32 channels[]; + u32 channels[] __counted_by(n_channels); }; /** @@ -3596,7 +3596,7 @@ struct cfg80211_wowlan_nd_match { */ struct cfg80211_wowlan_nd_info { int n_matches; - struct cfg80211_wowlan_nd_match *matches[]; + struct cfg80211_wowlan_nd_match *matches[] __counted_by(n_matches); }; /** -- cgit v1.2.3 From 3d9d313d518c5bc9e5ab6aeab86c9fa4bece095c Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 10 May 2024 18:08:13 +0300 Subject: drm/connector: update edid_blob_ptr documentation Accessing the EDID via edid_blob_ptr causes chicken-and-egg problems. Keep edid_blob_ptr as the userspace interface that should be accessed via dedicated functions. Reviewed-by: Chaitanya Kumar Borah Acked-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/b6aa1ea30ae85ef9e9814315d3437e82f0ba6754.1715353572.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- include/drm/drm_connector.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index fe88d7fc6b8f..58ee9adf9091 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -1636,8 +1636,12 @@ struct drm_connector { /** * @edid_blob_ptr: DRM property containing EDID if present. Protected by - * &drm_mode_config.mutex. This should be updated only by calling + * &drm_mode_config.mutex. + * + * This must be updated only by calling drm_edid_connector_update() or * drm_connector_update_edid_property(). + * + * This must not be used by drivers directly. */ struct drm_property_blob *edid_blob_ptr; -- cgit v1.2.3 From 4d25ca2d6801cfcf26f7f39c561611ba5be99bf8 Mon Sep 17 00:00:00 2001 From: Abhishek Chauhan Date: Thu, 9 May 2024 14:18:32 -0700 Subject: net: Rename mono_delivery_time to tstamp_type for scalabilty mono_delivery_time was added to check if skb->tstamp has delivery time in mono clock base (i.e. EDT) otherwise skb->tstamp has timestamp in ingress and delivery_time at egress. Renaming the bitfield from mono_delivery_time to tstamp_type is for extensibilty for other timestamps such as userspace timestamp (i.e. SO_TXTIME) set via sock opts. As we are renaming the mono_delivery_time to tstamp_type, it makes sense to start assigning tstamp_type based on enum defined in this commit. Earlier we used bool arg flag to check if the tstamp is mono in function skb_set_delivery_time, Now the signature of the functions accepts tstamp_type to distinguish between mono and real time. Also skb_set_delivery_type_by_clockid is a new function which accepts clockid to determine the tstamp_type. In future tstamp_type:1 can be extended to support userspace timestamp by increasing the bitfield. Signed-off-by: Abhishek Chauhan Reviewed-by: Willem de Bruijn Reviewed-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20240509211834.3235191-2-quic_abchauha@quicinc.com Signed-off-by: Martin KaFai Lau --- include/linux/skbuff.h | 52 ++++++++++++++++++++++++++++++++++++------------- include/net/inet_frag.h | 4 ++-- 2 files changed, 41 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c0b97c93a6de..3a721cc3b644 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -706,6 +706,11 @@ typedef unsigned int sk_buff_data_t; typedef unsigned char *sk_buff_data_t; #endif +enum skb_tstamp_type { + SKB_CLOCK_REALTIME, + SKB_CLOCK_MONOTONIC, +}; + /** * DOC: Basic sk_buff geometry * @@ -823,10 +828,8 @@ typedef unsigned char *sk_buff_data_t; * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB * @slow_gro: state present at GRO time, slower prepare step required - * @mono_delivery_time: When set, skb->tstamp has the - * delivery_time in mono clock base (i.e. EDT). Otherwise, the - * skb->tstamp has the (rcv) timestamp at ingress and - * delivery_time at egress. + * @tstamp_type: When set, skb->tstamp has the + * delivery_time clock base of skb->tstamp. * @napi_id: id of the NAPI struct this skb came from * @sender_cpu: (aka @napi_id) source CPU in XPS * @alloc_cpu: CPU which did the skb allocation. @@ -954,7 +957,7 @@ struct sk_buff { /* private: */ __u8 __mono_tc_offset[0]; /* public: */ - __u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */ + __u8 tstamp_type:1; /* See skb_tstamp_type */ #ifdef CONFIG_NET_XGRESS __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ __u8 tc_skip_classify:1; @@ -4183,7 +4186,7 @@ static inline void skb_get_new_timestampns(const struct sk_buff *skb, static inline void __net_timestamp(struct sk_buff *skb) { skb->tstamp = ktime_get_real(); - skb->mono_delivery_time = 0; + skb->tstamp_type = SKB_CLOCK_REALTIME; } static inline ktime_t net_timedelta(ktime_t t) @@ -4192,10 +4195,33 @@ static inline ktime_t net_timedelta(ktime_t t) } static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt, - bool mono) + u8 tstamp_type) { skb->tstamp = kt; - skb->mono_delivery_time = kt && mono; + + if (kt) + skb->tstamp_type = tstamp_type; + else + skb->tstamp_type = SKB_CLOCK_REALTIME; +} + +static inline void skb_set_delivery_type_by_clockid(struct sk_buff *skb, + ktime_t kt, clockid_t clockid) +{ + u8 tstamp_type = SKB_CLOCK_REALTIME; + + switch (clockid) { + case CLOCK_REALTIME: + break; + case CLOCK_MONOTONIC: + tstamp_type = SKB_CLOCK_MONOTONIC; + break; + default: + WARN_ON_ONCE(1); + kt = 0; + } + + skb_set_delivery_time(skb, kt, tstamp_type); } DECLARE_STATIC_KEY_FALSE(netstamp_needed_key); @@ -4205,8 +4231,8 @@ DECLARE_STATIC_KEY_FALSE(netstamp_needed_key); */ static inline void skb_clear_delivery_time(struct sk_buff *skb) { - if (skb->mono_delivery_time) { - skb->mono_delivery_time = 0; + if (skb->tstamp_type) { + skb->tstamp_type = SKB_CLOCK_REALTIME; if (static_branch_unlikely(&netstamp_needed_key)) skb->tstamp = ktime_get_real(); else @@ -4216,7 +4242,7 @@ static inline void skb_clear_delivery_time(struct sk_buff *skb) static inline void skb_clear_tstamp(struct sk_buff *skb) { - if (skb->mono_delivery_time) + if (skb->tstamp_type) return; skb->tstamp = 0; @@ -4224,7 +4250,7 @@ static inline void skb_clear_tstamp(struct sk_buff *skb) static inline ktime_t skb_tstamp(const struct sk_buff *skb) { - if (skb->mono_delivery_time) + if (skb->tstamp_type) return 0; return skb->tstamp; @@ -4232,7 +4258,7 @@ static inline ktime_t skb_tstamp(const struct sk_buff *skb) static inline ktime_t skb_tstamp_cond(const struct sk_buff *skb, bool cond) { - if (!skb->mono_delivery_time && skb->tstamp) + if (skb->tstamp_type != SKB_CLOCK_MONOTONIC && skb->tstamp) return skb->tstamp; if (static_branch_unlikely(&netstamp_needed_key) || cond) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 153960663ce4..5af6eb14c5db 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -76,7 +76,7 @@ struct frag_v6_compare_key { * @stamp: timestamp of the last received fragment * @len: total length of the original datagram * @meat: length of received fragments so far - * @mono_delivery_time: stamp has a mono delivery time (EDT) + * @tstamp_type: stamp has a mono delivery time (EDT) * @flags: fragment queue flags * @max_size: maximum received fragment size * @fqdir: pointer to struct fqdir @@ -97,7 +97,7 @@ struct inet_frag_queue { ktime_t stamp; int len; int meat; - u8 mono_delivery_time; + u8 tstamp_type; __u8 flags; u16 max_size; struct fqdir *fqdir; -- cgit v1.2.3 From 1693c5db6ab8262e6f5263f9d211855959aa5acd Mon Sep 17 00:00:00 2001 From: Abhishek Chauhan Date: Thu, 9 May 2024 14:18:33 -0700 Subject: net: Add additional bit to support clockid_t timestamp type tstamp_type is now set based on actual clockid_t compressed into 2 bits. To make the design scalable for future needs this commit bring in the change to extend the tstamp_type:1 to tstamp_type:2 to support other clockid_t timestamp. We now support CLOCK_TAI as part of tstamp_type as part of this commit with existing support CLOCK_MONOTONIC and CLOCK_REALTIME. Signed-off-by: Abhishek Chauhan Reviewed-by: Willem de Bruijn Reviewed-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20240509211834.3235191-3-quic_abchauha@quicinc.com Signed-off-by: Martin KaFai Lau --- include/linux/skbuff.h | 18 ++++++++++++------ include/uapi/linux/bpf.h | 15 ++++++++++----- 2 files changed, 22 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3a721cc3b644..1e5c97daaa37 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -709,6 +709,8 @@ typedef unsigned char *sk_buff_data_t; enum skb_tstamp_type { SKB_CLOCK_REALTIME, SKB_CLOCK_MONOTONIC, + SKB_CLOCK_TAI, + __SKB_CLOCK_MAX = SKB_CLOCK_TAI, }; /** @@ -957,7 +959,7 @@ struct sk_buff { /* private: */ __u8 __mono_tc_offset[0]; /* public: */ - __u8 tstamp_type:1; /* See skb_tstamp_type */ + __u8 tstamp_type:2; /* See skb_tstamp_type */ #ifdef CONFIG_NET_XGRESS __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ __u8 tc_skip_classify:1; @@ -1087,15 +1089,16 @@ struct sk_buff { #endif #define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset) -/* if you move tc_at_ingress or mono_delivery_time +/* if you move tc_at_ingress or tstamp_type * around, you also must adapt these constants. */ #ifdef __BIG_ENDIAN_BITFIELD -#define SKB_MONO_DELIVERY_TIME_MASK (1 << 7) -#define TC_AT_INGRESS_MASK (1 << 6) +#define SKB_TSTAMP_TYPE_MASK (3 << 6) +#define SKB_TSTAMP_TYPE_RSHIFT (6) +#define TC_AT_INGRESS_MASK (1 << 5) #else -#define SKB_MONO_DELIVERY_TIME_MASK (1 << 0) -#define TC_AT_INGRESS_MASK (1 << 1) +#define SKB_TSTAMP_TYPE_MASK (3) +#define TC_AT_INGRESS_MASK (1 << 2) #endif #define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset) @@ -4216,6 +4219,9 @@ static inline void skb_set_delivery_type_by_clockid(struct sk_buff *skb, case CLOCK_MONOTONIC: tstamp_type = SKB_CLOCK_MONOTONIC; break; + case CLOCK_TAI: + tstamp_type = SKB_CLOCK_TAI; + break; default: WARN_ON_ONCE(1); kt = 0; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 90706a47f6ff..25ea393cf084 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6207,12 +6207,17 @@ union { \ __u64 :64; \ } __attribute__((aligned(8))) +/* The enum used in skb->tstamp_type. It specifies the clock type + * of the time stored in the skb->tstamp. + */ enum { - BPF_SKB_TSTAMP_UNSPEC, - BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */ - /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle, - * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC - * and try to deduce it by ingress, egress or skb->sk->sk_clockid. + BPF_SKB_TSTAMP_UNSPEC = 0, /* DEPRECATED */ + BPF_SKB_TSTAMP_DELIVERY_MONO = 1, /* DEPRECATED */ + BPF_SKB_CLOCK_REALTIME = 0, + BPF_SKB_CLOCK_MONOTONIC = 1, + BPF_SKB_CLOCK_TAI = 2, + /* For any future BPF_SKB_CLOCK_* that the bpf prog cannot handle, + * the bpf prog can try to deduce it by ingress/egress/skb->sk->sk_clockid. */ }; -- cgit v1.2.3 From 2c1713a8f1c94033a6e00aae4693ab03e8a3b9f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 18 May 2024 16:58:47 +0200 Subject: bpf: constify member bpf_sysctl_kern:: Table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysctl core is preparing to only expose instances of struct ctl_table as "const". This will also affect the ctl_table argument of sysctl handlers, for which bpf_sysctl_kern::table is also used. As the function prototype of all sysctl handlers throughout the tree needs to stay consistent that change will be done in one commit. To reduce the size of that final commit, switch this utility type which is not bound by "typedef proc_handler" to "const struct ctl_table". No functional change. Signed-off-by: Thomas Weißschuh Signed-off-by: Daniel Borkmann Reviewed-by: Joel Granados Link: https://lore.kernel.org/bpf/20240518-sysctl-const-handler-bpf-v1-1-f0d7186743c1@weissschuh.net --- include/linux/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 0f12cf01070e..b02aea291b7e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1406,7 +1406,7 @@ struct bpf_sock_ops_kern { struct bpf_sysctl_kern { struct ctl_table_header *head; - struct ctl_table *table; + const struct ctl_table *table; void *cur_val; size_t cur_len; void *new_val; -- cgit v1.2.3 From 73e75e6fc352bdca08f7e0893d5b6bb37171bdd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Koutn=C3=BD?= Date: Tue, 21 May 2024 11:21:26 +0200 Subject: cgroup/pids: Separate semantics of pids.events related to pids.max MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, when pids.max limit is breached in the hierarchy, the event is counted and reported in the cgroup where the forking task resides. This decouples the limit and the notification caused by the limit making it hard to detect when the actual limit was effected. Redefine the pids.events:max as: the number of times the limit of the cgroup was hit. (Implementation differentiates also "forkfail" event but this is currently not exposed as it would better fit into pids.stat. It also differs from pids.events:max only when pids.max is configured on non-leaf cgroups.) Since it changes semantics of the original "max" event, introduce this change only in the v2 API of the controller and add a cgroup2 mount option to revert to the legacy behavior. Signed-off-by: Michal Koutný Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index ea48c861cd36..b36690ca0d3f 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -119,7 +119,12 @@ enum { /* * Enable hugetlb accounting for the memory controller. */ - CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING = (1 << 19), + CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING = (1 << 19), + + /* + * Enable legacy local pids.events. + */ + CGRP_ROOT_PIDS_LOCAL_EVENTS = (1 << 20), }; /* cftype->flags */ -- cgit v1.2.3 From 965cc040bf0698e81b0c0aef359ae650b42b428e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 20 May 2024 16:19:56 +0200 Subject: ASoC: Constify channel mapping array arguments in set_channel_map() There is no need for implementations of DAI set_channel_map() to modify contents of passed arrays with actual channel mapping. Additionally, the caller keeps full ownership of the array. Constify these pointer arguments so the code will be safer and easier to read (documenting the caller's ownership). Acked-by: Charles Keepax Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240520-asoc-x1e80100-4-channel-mapping-v4-1-f657159b4aad@linaro.org Signed-off-by: Mark Brown --- include/sound/cs35l41.h | 4 ++-- include/sound/soc-dai.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/sound/cs35l41.h b/include/sound/cs35l41.h index bb70782d15d0..43c6a9ef8d9f 100644 --- a/include/sound/cs35l41.h +++ b/include/sound/cs35l41.h @@ -896,8 +896,8 @@ int cs35l41_test_key_lock(struct device *dev, struct regmap *regmap); int cs35l41_otp_unpack(struct device *dev, struct regmap *regmap); int cs35l41_register_errata_patch(struct device *dev, struct regmap *reg, unsigned int reg_revid); int cs35l41_set_channels(struct device *dev, struct regmap *reg, - unsigned int tx_num, unsigned int *tx_slot, - unsigned int rx_num, unsigned int *rx_slot); + unsigned int tx_num, const unsigned int *tx_slot, + unsigned int rx_num, const unsigned int *rx_slot); int cs35l41_gpio_config(struct regmap *regmap, struct cs35l41_hw_cfg *hw_cfg); void cs35l41_configure_cs_dsp(struct device *dev, struct regmap *reg, struct cs_dsp *dsp); int cs35l41_set_cspl_mbox_cmd(struct device *dev, struct regmap *regmap, diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index adcd8719d343..15ef268c9845 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -188,8 +188,8 @@ int snd_soc_dai_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask, unsigned int rx_mask, int slots, int slot_width); int snd_soc_dai_set_channel_map(struct snd_soc_dai *dai, - unsigned int tx_num, unsigned int *tx_slot, - unsigned int rx_num, unsigned int *rx_slot); + unsigned int tx_num, const unsigned int *tx_slot, + unsigned int rx_num, const unsigned int *rx_slot); int snd_soc_dai_set_tristate(struct snd_soc_dai *dai, int tristate); @@ -305,8 +305,8 @@ struct snd_soc_dai_ops { unsigned int tx_mask, unsigned int rx_mask, int slots, int slot_width); int (*set_channel_map)(struct snd_soc_dai *dai, - unsigned int tx_num, unsigned int *tx_slot, - unsigned int rx_num, unsigned int *rx_slot); + unsigned int tx_num, const unsigned int *tx_slot, + unsigned int rx_num, const unsigned int *rx_slot); int (*get_channel_map)(struct snd_soc_dai *dai, unsigned int *tx_num, unsigned int *tx_slot, unsigned int *rx_num, unsigned int *rx_slot); -- cgit v1.2.3 From 734447685ecc7c6328e40cb1bd4aaeeac03c1413 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 13 May 2024 19:37:20 +0200 Subject: ASoC: topology: Constify an argument of snd_soc_tplg_component_load() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit snd_soc_tplg_component_load() does not modify its "*ops" argument. It only read some values and stores it in "soc_tplg.ops". This argument and the ops field in "struct soc_tplg" can be made const. Signed-off-by: Christophe JAILLET Reviewed-by: Amadeusz Sławiński Link: https://msgid.link/r/f2f983e791d7f941a95556bb147f426a345d84d4.1715526069.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- include/sound/soc-topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc-topology.h b/include/sound/soc-topology.h index f055c6917f6c..1eedd203ac29 100644 --- a/include/sound/soc-topology.h +++ b/include/sound/soc-topology.h @@ -178,7 +178,7 @@ static inline const void *snd_soc_tplg_get_data(struct snd_soc_tplg_hdr *hdr) /* Dynamic Object loading and removal for component drivers */ int snd_soc_tplg_component_load(struct snd_soc_component *comp, - struct snd_soc_tplg_ops *ops, const struct firmware *fw); + const struct snd_soc_tplg_ops *ops, const struct firmware *fw); int snd_soc_tplg_component_remove(struct snd_soc_component *comp); /* Binds event handlers to dynamic widgets */ -- cgit v1.2.3 From 7a147670035ddec35f3fb2ace538ad56ad82c861 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 12 May 2024 13:11:21 +0200 Subject: regulator: consumer: Reorder fields in 'struct regulator_bulk_data' Based on pahole, 2 holes can be combined in 'struct regulator_bulk_data'. On x86_64 and allmodconfig, this shrinks the size of the structure from 32 to 24 bytes. This is usually a win, because this structure is often used for static global variables. As an example: Before: text data bss dec hex filename 3557 162 0 3719 e87 drivers/gpu/drm/msm/dsi/dsi_cfg.o After: text data bss dec hex filename 3477 162 0 3639 e37 drivers/gpu/drm/msm/dsi/dsi_cfg.o Signed-off-by: Christophe JAILLET Link: https://msgid.link/r/35c4edf2dbc6d4f24fb771341ded2989ae32f779.1715512259.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 59d0b9a79e6e..e6f81fc1fb17 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -128,11 +128,11 @@ struct regulator; * * @supply: The name of the supply. Initialised by the user before * using the bulk regulator APIs. + * @consumer: The regulator consumer for the supply. This will be managed + * by the bulk API. * @init_load_uA: After getting the regulator, regulator_set_load() will be * called with this load. Initialised by the user before * using the bulk regulator APIs. - * @consumer: The regulator consumer for the supply. This will be managed - * by the bulk API. * * The regulator APIs provide a series of regulator_bulk_() API calls as * a convenience to consumers which require multiple supplies. This @@ -140,8 +140,8 @@ struct regulator; */ struct regulator_bulk_data { const char *supply; - int init_load_uA; struct regulator *consumer; + int init_load_uA; /* private: Internal use */ int ret; -- cgit v1.2.3 From f261172d39f358dcecce13c310690d3937e0cca6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 17 May 2024 22:40:20 +0300 Subject: spi: bitbang: Use typedef for txrx_*() callbacks With a typedef for the txrx_*() callbacks the code looks neater. Note that typedef for a function is okay to have. Signed-off-by: Andy Shevchenko Link: https://msgid.link/r/20240517194104.747328-2-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- include/linux/spi/spi_bitbang.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h index b930eca2ef7b..7ca08b430ed5 100644 --- a/include/linux/spi/spi_bitbang.h +++ b/include/linux/spi/spi_bitbang.h @@ -4,6 +4,8 @@ #include +typedef u32 (*spi_bb_txrx_word_fn)(struct spi_device *, unsigned int, u32, u8, unsigned int); + struct spi_bitbang { struct mutex lock; u8 busy; @@ -28,9 +30,8 @@ struct spi_bitbang { int (*txrx_bufs)(struct spi_device *spi, struct spi_transfer *t); /* txrx_word[SPI_MODE_*]() just looks like a shift register */ - u32 (*txrx_word[4])(struct spi_device *spi, - unsigned nsecs, - u32 word, u8 bits, unsigned flags); + spi_bb_txrx_word_fn txrx_word[4]; + int (*set_line_direction)(struct spi_device *spi, bool output); }; -- cgit v1.2.3 From b90cc232e2ce8c959b19dc4b183e23e7aec137ab Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 17 May 2024 22:40:22 +0300 Subject: spi: bitbang: Replace hard coded number of SPI modes Instead of using hard coded number of modes, replace it with SPI_MODE_X_MASK + 1 to show relation to the SPI modes. Signed-off-by: Andy Shevchenko Link: https://msgid.link/r/20240517194104.747328-4-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- include/linux/spi/spi_bitbang.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h index 7ca08b430ed5..d4cb83195f7a 100644 --- a/include/linux/spi/spi_bitbang.h +++ b/include/linux/spi/spi_bitbang.h @@ -30,7 +30,7 @@ struct spi_bitbang { int (*txrx_bufs)(struct spi_device *spi, struct spi_transfer *t); /* txrx_word[SPI_MODE_*]() just looks like a shift register */ - spi_bb_txrx_word_fn txrx_word[4]; + spi_bb_txrx_word_fn txrx_word[SPI_MODE_X_MASK + 1]; int (*set_line_direction)(struct spi_device *spi, bool output); }; -- cgit v1.2.3 From 983095eaf6c161ef73d96152bfc1a99ca051cd57 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 25 May 2024 18:00:31 +0200 Subject: dma-buf/fence-array: Add flex array to struct dma_fence_array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is an effort to get rid of all multiplications from allocation functions in order to prevent integer overflows [1][2]. The "struct dma_fence_array" can be refactored to add a flex array in order to have the "callback structures allocated behind the array" be more explicit. Do so: - makes the code more readable and safer. - allows using __counted_by() for additional checks - avoids some pointer arithmetic in dma_fence_array_enable_signaling() Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments [1] Link: https://github.com/KSPP/linux/issues/160 [2] Signed-off-by: Christophe JAILLET Reviewed-by: Kees Cook Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/8b4e556e07b5dd78bb8a39b67ea0a43b199083c8.1716652811.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christian König --- include/linux/dma-fence-array.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index ec7f25def392..29c5650c1038 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -33,6 +33,7 @@ struct dma_fence_array_cb { * @num_pending: fences in the array still pending * @fences: array of the fences * @work: internal irq_work function + * @callbacks: array of callback helpers */ struct dma_fence_array { struct dma_fence base; @@ -43,6 +44,8 @@ struct dma_fence_array { struct dma_fence **fences; struct irq_work work; + + struct dma_fence_array_cb callbacks[] __counted_by(num_fences); }; /** -- cgit v1.2.3 From 0edb555a65d1ef047a9805051c36922b52a38a9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 9 Oct 2023 12:37:26 +0200 Subject: platform: Make platform_driver::remove() return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit struct platform_driver::remove returning an integer made driver authors expect that returning an error code was proper error handling. However the driver core ignores the error and continues to remove the device because there is nothing the core could do anyhow and reentering the remove callback again is only calling for trouble. To prevent such wrong assumptions, change the return type of the remove callback to void. This was prepared by introducing an alternative remove callback returning void and converting all drivers to that. So .remove() can be changed without further changes in drivers. This corresponds to step b) of the plan outlined in commit 5c5a7680e67b ("platform: Provide a remove callback that returns no value"). Signed-off-by: Uwe Kleine-König --- include/linux/platform_device.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 7a41c72c1959..d422db6eec63 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -237,15 +237,14 @@ struct platform_driver { int (*probe)(struct platform_device *); /* - * Traditionally the remove callback returned an int which however is - * ignored by the driver core. This led to wrong expectations by driver - * authors who thought returning an error code was a valid error - * handling strategy. To convert to a callback returning void, new - * drivers should implement .remove_new() until the conversion it done - * that eventually makes .remove() return void. + * .remove_new() is a relic from a prototype conversion of .remove(). + * New drivers are supposed to implement .remove(). Once all drivers are + * converted to not use .remove_new any more, it will be dropped. */ - int (*remove)(struct platform_device *); - void (*remove_new)(struct platform_device *); + union { + void (*remove)(struct platform_device *); + void (*remove_new)(struct platform_device *); + }; void (*shutdown)(struct platform_device *); int (*suspend)(struct platform_device *, pm_message_t state); -- cgit v1.2.3 From c80c4490c280a1678e47d34d2a335a58f1318615 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 May 2024 12:45:31 +0000 Subject: cleanup: Standardize the header guard define's name At some point during early development, the header must have been named , as evidenced by the header guard name: #ifndef __LINUX_GUARDS_H #define __LINUX_GUARDS_H It ended up being , but the old guard name for a file name that was never upstream never changed. Do that now - and while at it, also use the canonical _LINUX prefix, instead of the less common __LINUX prefix. Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Link: https://lore.kernel.org/r/171664113181.10875.8784434350512348496.tip-bot2@tip-bot2 --- include/linux/cleanup.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index c2d09bc4f976..cef68e8e09b6 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_GUARDS_H -#define __LINUX_GUARDS_H +#ifndef _LINUX_CLEANUP_H +#define _LINUX_CLEANUP_H #include @@ -247,4 +247,4 @@ __DEFINE_LOCK_GUARD_0(_name, _lock) { return class_##_name##_lock_ptr(_T); } -#endif /* __LINUX_GUARDS_H */ +#endif /* _LINUX_CLEANUP_H */ -- cgit v1.2.3 From 5350f6ec55df381cd99db2a6bde283328fb3f75a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 3 May 2024 21:42:30 +0300 Subject: mtd: cfi: Get rid of redundant 'else' In the snippets like the following if (...) return / goto / break / continue ...; else ... the 'else' is redundant. Get rid of it. Signed-off-by: Andy Shevchenko Reviewed-by: Jeff Johnson Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240503184230.2927283-1-andriy.shevchenko@linux.intel.com --- include/linux/mtd/cfi.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index 947410faf9e2..35ca19ae21ae 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -308,32 +308,32 @@ static inline uint8_t cfi_read_query(struct map_info *map, uint32_t addr) { map_word val = map_read(map, addr); - if (map_bankwidth_is_1(map)) { + if (map_bankwidth_is_1(map)) return val.x[0]; - } else if (map_bankwidth_is_2(map)) { + if (map_bankwidth_is_2(map)) return cfi16_to_cpu(map, val.x[0]); - } else { - /* No point in a 64-bit byteswap since that would just be - swapping the responses from different chips, and we are - only interested in one chip (a representative sample) */ - return cfi32_to_cpu(map, val.x[0]); - } + /* + * No point in a 64-bit byteswap since that would just be + * swapping the responses from different chips, and we are + * only interested in one chip (a representative sample) + */ + return cfi32_to_cpu(map, val.x[0]); } static inline uint16_t cfi_read_query16(struct map_info *map, uint32_t addr) { map_word val = map_read(map, addr); - if (map_bankwidth_is_1(map)) { + if (map_bankwidth_is_1(map)) return val.x[0] & 0xff; - } else if (map_bankwidth_is_2(map)) { + if (map_bankwidth_is_2(map)) return cfi16_to_cpu(map, val.x[0]); - } else { - /* No point in a 64-bit byteswap since that would just be - swapping the responses from different chips, and we are - only interested in one chip (a representative sample) */ - return cfi32_to_cpu(map, val.x[0]); - } + /* + * No point in a 64-bit byteswap since that would just be + * swapping the responses from different chips, and we are + * only interested in one chip (a representative sample) + */ + return cfi32_to_cpu(map, val.x[0]); } void cfi_udelay(int us); -- cgit v1.2.3 From 56813b244e5f2ace887f04c4a69a2a0041992297 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 27 May 2024 11:27:47 +0200 Subject: w1: Add missing newline and fix typos in w1_bus_master comment - Add missing newline before @return - s/bytes/byte/ - s/handles/handle/ - s/exists/exist/ in dev_info() message Signed-off-by: Thorsten Blum Link: https://lore.kernel.org/r/20240527092746.263038-2-thorsten.blum@toblux.com [krzysztof: squash "w1: Fix typo in dev_info() message"] Signed-off-by: Krzysztof Kozlowski --- include/linux/w1.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/w1.h b/include/linux/w1.h index 9a2a0ef39018..064805bfae3f 100644 --- a/include/linux/w1.h +++ b/include/linux/w1.h @@ -85,7 +85,8 @@ typedef void (*w1_slave_found_callback)(struct w1_master *, u64); * * @data: the first parameter in all the functions below * - * @read_bit: Sample the line level @return the level read (0 or 1) + * @read_bit: Sample the line level + * @return the level read (0 or 1) * * @write_bit: Sets the line level * @@ -95,7 +96,7 @@ typedef void (*w1_slave_found_callback)(struct w1_master *, u64); * touch_bit(1) = write-1 / read cycle * @return the bit read (0 or 1) * - * @read_byte: Reads a bytes. Same as 8 touch_bit(1) calls. + * @read_byte: Reads a byte. Same as 8 touch_bit(1) calls. * @return the byte read * * @write_byte: Writes a byte. Same as 8 touch_bit(x) calls. @@ -114,7 +115,7 @@ typedef void (*w1_slave_found_callback)(struct w1_master *, u64); * @set_pullup: Put out a strong pull-up pulse of the specified duration. * @return -1=Error, 0=completed * - * @search: Really nice hardware can handles the different types of ROM search + * @search: Really nice hardware can handle the different types of ROM search * w1_master* is passed to the slave found callback. * u8 is search_type, W1_SEARCH or W1_ALARM_SEARCH * -- cgit v1.2.3 From 8cb3aeebcb86088e30232277c9bee9054837442b Mon Sep 17 00:00:00 2001 From: Mohan Kumar Date: Mon, 27 May 2024 12:56:06 +0000 Subject: ASoC: simple-card-utils: Split simple_fixup_sample_fmt func Split the simple_fixup_sample_fmt() into two functions by adding one more function named simple_util_get_sample_fmt() to return the sample format value. This is useful for drivers that wish to simply get the sample format without setting the mask. Signed-off-by: Mohan Kumar Signed-off-by: Sameer Pujar Link: https://msgid.link/r/20240527125608.2461300-2-spujar@nvidia.com Signed-off-by: Mark Brown --- include/sound/simple_card_utils.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sound/simple_card_utils.h b/include/sound/simple_card_utils.h index ad67957b7b48..2c2279d082ec 100644 --- a/include/sound/simple_card_utils.h +++ b/include/sound/simple_card_utils.h @@ -174,6 +174,8 @@ void simple_util_parse_convert(struct device_node *np, char *prefix, struct simple_util_data *data); bool simple_util_is_convert_required(const struct simple_util_data *data); +int simple_util_get_sample_fmt(struct simple_util_data *data); + int simple_util_parse_routing(struct snd_soc_card *card, char *prefix); int simple_util_parse_widgets(struct snd_soc_card *card, -- cgit v1.2.3 From 3aa63a569c64e708df547a8913c84e64a06e7853 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 17 May 2024 20:08:40 -0400 Subject: fs: switch timespec64 fields in inode to discrete integers Adjacent struct timespec64's pack poorly. Switch them to discrete integer fields for the seconds and nanoseconds. This shaves 8 bytes off struct inode with a garden-variety Fedora Kconfig on x86_64, but that also moves the i_lock into the previous cacheline, away from the fields it protects. To remedy that, move i_generation above the i_lock, which moves the new 4-byte hole to just after the i_fsnotify_mask in my setup. Amir has plans to use that to expand the i_fsnotify_mask, so add a comment to that effect as well. Cc: Amir Goldstein Suggested-by: Linus Torvalds Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20240517-amtime-v1-1-7b804ca4be8f@kernel.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 48 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0283cf366c2a..639885621608 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -660,9 +660,13 @@ struct inode { }; dev_t i_rdev; loff_t i_size; - struct timespec64 __i_atime; - struct timespec64 __i_mtime; - struct timespec64 __i_ctime; /* use inode_*_ctime accessors! */ + time64_t i_atime_sec; + time64_t i_mtime_sec; + time64_t i_ctime_sec; + u32 i_atime_nsec; + u32 i_mtime_nsec; + u32 i_ctime_nsec; + u32 i_generation; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; u8 i_blkbits; @@ -719,10 +723,10 @@ struct inode { unsigned i_dir_seq; }; - __u32 i_generation; #ifdef CONFIG_FSNOTIFY __u32 i_fsnotify_mask; /* all events this inode cares about */ + /* 32-bit hole reserved for expanding i_fsnotify_mask */ struct fsnotify_mark_connector __rcu *i_fsnotify_marks; #endif @@ -1538,23 +1542,27 @@ struct timespec64 inode_set_ctime_current(struct inode *inode); static inline time64_t inode_get_atime_sec(const struct inode *inode) { - return inode->__i_atime.tv_sec; + return inode->i_atime_sec; } static inline long inode_get_atime_nsec(const struct inode *inode) { - return inode->__i_atime.tv_nsec; + return inode->i_atime_nsec; } static inline struct timespec64 inode_get_atime(const struct inode *inode) { - return inode->__i_atime; + struct timespec64 ts = { .tv_sec = inode_get_atime_sec(inode), + .tv_nsec = inode_get_atime_nsec(inode) }; + + return ts; } static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode, struct timespec64 ts) { - inode->__i_atime = ts; + inode->i_atime_sec = ts.tv_sec; + inode->i_atime_nsec = ts.tv_nsec; return ts; } @@ -1563,28 +1571,32 @@ static inline struct timespec64 inode_set_atime(struct inode *inode, { struct timespec64 ts = { .tv_sec = sec, .tv_nsec = nsec }; + return inode_set_atime_to_ts(inode, ts); } static inline time64_t inode_get_mtime_sec(const struct inode *inode) { - return inode->__i_mtime.tv_sec; + return inode->i_mtime_sec; } static inline long inode_get_mtime_nsec(const struct inode *inode) { - return inode->__i_mtime.tv_nsec; + return inode->i_mtime_nsec; } static inline struct timespec64 inode_get_mtime(const struct inode *inode) { - return inode->__i_mtime; + struct timespec64 ts = { .tv_sec = inode_get_mtime_sec(inode), + .tv_nsec = inode_get_mtime_nsec(inode) }; + return ts; } static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode, struct timespec64 ts) { - inode->__i_mtime = ts; + inode->i_mtime_sec = ts.tv_sec; + inode->i_mtime_nsec = ts.tv_nsec; return ts; } @@ -1598,23 +1610,27 @@ static inline struct timespec64 inode_set_mtime(struct inode *inode, static inline time64_t inode_get_ctime_sec(const struct inode *inode) { - return inode->__i_ctime.tv_sec; + return inode->i_ctime_sec; } static inline long inode_get_ctime_nsec(const struct inode *inode) { - return inode->__i_ctime.tv_nsec; + return inode->i_ctime_nsec; } static inline struct timespec64 inode_get_ctime(const struct inode *inode) { - return inode->__i_ctime; + struct timespec64 ts = { .tv_sec = inode_get_ctime_sec(inode), + .tv_nsec = inode_get_ctime_nsec(inode) }; + + return ts; } static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 ts) { - inode->__i_ctime = ts; + inode->i_ctime_sec = ts.tv_sec; + inode->i_ctime_nsec = ts.tv_nsec; return ts; } -- cgit v1.2.3 From 447e140e66fd226350b3ce86cffc965eaae4c856 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 8 May 2024 13:17:01 +0300 Subject: gpio: Remove legacy API documentation In order to discourage people to use old and legacy GPIO APIs remove the respective documentation completely. It also helps further cleanups of the legacy GPIO API leftovers, which is ongoing task. Signed-off-by: Andy Shevchenko Reviewed-by: Hu Haowen <2023002089@link.tyut.edu.cn> Link: https://lore.kernel.org/r/20240508101703.830066-1-andriy.shevchenko@linux.intel.com Signed-off-by: Bartosz Golaszewski --- include/linux/gpio.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 56ac7e7a2889..063f71b18a7c 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * + * NOTE: This header *must not* be included. * * This is the LEGACY GPIO bulk include file, including legacy APIs. It is * used for GPIO drivers still referencing the global GPIO numberspace, @@ -16,8 +16,6 @@ struct device; -/* see Documentation/driver-api/gpio/legacy.rst */ - /* make these flag values available regardless of GPIO kconfig options */ #define GPIOF_DIR_OUT (0 << 0) #define GPIOF_DIR_IN (1 << 0) @@ -121,8 +119,6 @@ static inline int gpio_to_irq(unsigned gpio) int gpio_request_one(unsigned gpio, unsigned long flags, const char *label); -/* CONFIG_GPIOLIB: bindings for managed devices that want to request gpios */ - int devm_gpio_request(struct device *dev, unsigned gpio, const char *label); int devm_gpio_request_one(struct device *dev, unsigned gpio, unsigned long flags, const char *label); -- cgit v1.2.3 From ca3a91063acc3abc0fb233591d8cda4b37dc39ac Mon Sep 17 00:00:00 2001 From: Danila Tikhonov Date: Sun, 5 May 2024 23:10:33 +0300 Subject: dt-bindings: clock: qcom: Add SM7150 DISPCC clocks Add device tree bindings for the display clock controller on Qualcomm SM7150 platform. Co-developed-by: David Wronek Signed-off-by: David Wronek Signed-off-by: Danila Tikhonov Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20240505201038.276047-4-danila@jiaxyga.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,sm7150-dispcc.h | 59 ++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,sm7150-dispcc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,sm7150-dispcc.h b/include/dt-bindings/clock/qcom,sm7150-dispcc.h new file mode 100644 index 000000000000..fc1fefe8fd72 --- /dev/null +++ b/include/dt-bindings/clock/qcom,sm7150-dispcc.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2018, The Linux Foundation. All rights reserved. + * Copyright (c) 2024, Danila Tikhonov + * Copyright (c) 2024, David Wronek + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_DISPCC_SM7150_H +#define _DT_BINDINGS_CLK_QCOM_DISPCC_SM7150_H + +/* DISPCC clock registers */ +#define DISPCC_PLL0 0 +#define DISPCC_MDSS_AHB_CLK 1 +#define DISPCC_MDSS_AHB_CLK_SRC 2 +#define DISPCC_MDSS_BYTE0_CLK 3 +#define DISPCC_MDSS_BYTE0_CLK_SRC 4 +#define DISPCC_MDSS_BYTE0_DIV_CLK_SRC 5 +#define DISPCC_MDSS_BYTE0_INTF_CLK 6 +#define DISPCC_MDSS_BYTE1_CLK 7 +#define DISPCC_MDSS_BYTE1_CLK_SRC 8 +#define DISPCC_MDSS_BYTE1_DIV_CLK_SRC 9 +#define DISPCC_MDSS_BYTE1_INTF_CLK 10 +#define DISPCC_MDSS_DP_AUX_CLK 11 +#define DISPCC_MDSS_DP_AUX_CLK_SRC 12 +#define DISPCC_MDSS_DP_CRYPTO_CLK 13 +#define DISPCC_MDSS_DP_CRYPTO_CLK_SRC 14 +#define DISPCC_MDSS_DP_LINK_CLK 15 +#define DISPCC_MDSS_DP_LINK_CLK_SRC 16 +#define DISPCC_MDSS_DP_LINK_INTF_CLK 17 +#define DISPCC_MDSS_DP_PIXEL1_CLK 18 +#define DISPCC_MDSS_DP_PIXEL1_CLK_SRC 19 +#define DISPCC_MDSS_DP_PIXEL_CLK 20 +#define DISPCC_MDSS_DP_PIXEL_CLK_SRC 21 +#define DISPCC_MDSS_ESC0_CLK 22 +#define DISPCC_MDSS_ESC0_CLK_SRC 23 +#define DISPCC_MDSS_ESC1_CLK 24 +#define DISPCC_MDSS_ESC1_CLK_SRC 25 +#define DISPCC_MDSS_MDP_CLK 26 +#define DISPCC_MDSS_MDP_CLK_SRC 27 +#define DISPCC_MDSS_MDP_LUT_CLK 28 +#define DISPCC_MDSS_NON_GDSC_AHB_CLK 29 +#define DISPCC_MDSS_PCLK0_CLK 30 +#define DISPCC_MDSS_PCLK0_CLK_SRC 31 +#define DISPCC_MDSS_PCLK1_CLK 32 +#define DISPCC_MDSS_PCLK1_CLK_SRC 33 +#define DISPCC_MDSS_ROT_CLK 34 +#define DISPCC_MDSS_ROT_CLK_SRC 35 +#define DISPCC_MDSS_RSCC_AHB_CLK 36 +#define DISPCC_MDSS_RSCC_VSYNC_CLK 37 +#define DISPCC_MDSS_VSYNC_CLK 38 +#define DISPCC_MDSS_VSYNC_CLK_SRC 39 +#define DISPCC_XO_CLK_SRC 40 +#define DISPCC_SLEEP_CLK 41 +#define DISPCC_SLEEP_CLK_SRC 42 + +/* DISPCC GDSCR */ +#define MDSS_GDSC 0 + +#endif -- cgit v1.2.3 From 0fd2a048368ea99feccd7dfd6a5f42f6d011f10f Mon Sep 17 00:00:00 2001 From: Danila Tikhonov Date: Sun, 5 May 2024 23:10:35 +0300 Subject: dt-bindings: clock: qcom: Add SM7150 CAMCC clocks Add device tree bindings for the camera clock controller on Qualcomm SM7150 platform. Signed-off-by: Danila Tikhonov Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20240505201038.276047-6-danila@jiaxyga.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,sm7150-camcc.h | 113 ++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,sm7150-camcc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,sm7150-camcc.h b/include/dt-bindings/clock/qcom,sm7150-camcc.h new file mode 100644 index 000000000000..ce73ef0fe95d --- /dev/null +++ b/include/dt-bindings/clock/qcom,sm7150-camcc.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2018, The Linux Foundation. All rights reserved. + * Copyright (c) 2024, Danila Tikhonov + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_CAMCC_SM7150_H +#define _DT_BINDINGS_CLK_QCOM_CAMCC_SM7150_H + +/* Hardware clocks */ +#define CAMCC_PLL0_OUT_EVEN 0 +#define CAMCC_PLL0_OUT_ODD 1 +#define CAMCC_PLL1_OUT_EVEN 2 +#define CAMCC_PLL2_OUT_EARLY 3 +#define CAMCC_PLL3_OUT_EVEN 4 +#define CAMCC_PLL4_OUT_EVEN 5 + +/* CAMCC clock registers */ +#define CAMCC_PLL0 6 +#define CAMCC_PLL1 7 +#define CAMCC_PLL2 8 +#define CAMCC_PLL2_OUT_AUX 9 +#define CAMCC_PLL2_OUT_MAIN 10 +#define CAMCC_PLL3 11 +#define CAMCC_PLL4 12 +#define CAMCC_BPS_AHB_CLK 13 +#define CAMCC_BPS_AREG_CLK 14 +#define CAMCC_BPS_AXI_CLK 15 +#define CAMCC_BPS_CLK 16 +#define CAMCC_BPS_CLK_SRC 17 +#define CAMCC_CAMNOC_AXI_CLK 18 +#define CAMCC_CAMNOC_AXI_CLK_SRC 19 +#define CAMCC_CAMNOC_DCD_XO_CLK 20 +#define CAMCC_CCI_0_CLK 21 +#define CAMCC_CCI_0_CLK_SRC 22 +#define CAMCC_CCI_1_CLK 23 +#define CAMCC_CCI_1_CLK_SRC 24 +#define CAMCC_CORE_AHB_CLK 25 +#define CAMCC_CPAS_AHB_CLK 26 +#define CAMCC_CPHY_RX_CLK_SRC 27 +#define CAMCC_CSI0PHYTIMER_CLK 28 +#define CAMCC_CSI0PHYTIMER_CLK_SRC 29 +#define CAMCC_CSI1PHYTIMER_CLK 30 +#define CAMCC_CSI1PHYTIMER_CLK_SRC 31 +#define CAMCC_CSI2PHYTIMER_CLK 32 +#define CAMCC_CSI2PHYTIMER_CLK_SRC 33 +#define CAMCC_CSI3PHYTIMER_CLK 34 +#define CAMCC_CSI3PHYTIMER_CLK_SRC 35 +#define CAMCC_CSIPHY0_CLK 36 +#define CAMCC_CSIPHY1_CLK 37 +#define CAMCC_CSIPHY2_CLK 38 +#define CAMCC_CSIPHY3_CLK 39 +#define CAMCC_FAST_AHB_CLK_SRC 40 +#define CAMCC_FD_CORE_CLK 41 +#define CAMCC_FD_CORE_CLK_SRC 42 +#define CAMCC_FD_CORE_UAR_CLK 43 +#define CAMCC_ICP_AHB_CLK 44 +#define CAMCC_ICP_CLK 45 +#define CAMCC_ICP_CLK_SRC 46 +#define CAMCC_IFE_0_AXI_CLK 47 +#define CAMCC_IFE_0_CLK 48 +#define CAMCC_IFE_0_CLK_SRC 49 +#define CAMCC_IFE_0_CPHY_RX_CLK 50 +#define CAMCC_IFE_0_CSID_CLK 51 +#define CAMCC_IFE_0_CSID_CLK_SRC 52 +#define CAMCC_IFE_0_DSP_CLK 53 +#define CAMCC_IFE_1_AXI_CLK 54 +#define CAMCC_IFE_1_CLK 55 +#define CAMCC_IFE_1_CLK_SRC 56 +#define CAMCC_IFE_1_CPHY_RX_CLK 57 +#define CAMCC_IFE_1_CSID_CLK 58 +#define CAMCC_IFE_1_CSID_CLK_SRC 59 +#define CAMCC_IFE_1_DSP_CLK 60 +#define CAMCC_IFE_LITE_CLK 61 +#define CAMCC_IFE_LITE_CLK_SRC 62 +#define CAMCC_IFE_LITE_CPHY_RX_CLK 63 +#define CAMCC_IFE_LITE_CSID_CLK 64 +#define CAMCC_IFE_LITE_CSID_CLK_SRC 65 +#define CAMCC_IPE_0_AHB_CLK 66 +#define CAMCC_IPE_0_AREG_CLK 67 +#define CAMCC_IPE_0_AXI_CLK 68 +#define CAMCC_IPE_0_CLK 69 +#define CAMCC_IPE_0_CLK_SRC 70 +#define CAMCC_IPE_1_AHB_CLK 71 +#define CAMCC_IPE_1_AREG_CLK 72 +#define CAMCC_IPE_1_AXI_CLK 73 +#define CAMCC_IPE_1_CLK 74 +#define CAMCC_JPEG_CLK 75 +#define CAMCC_JPEG_CLK_SRC 76 +#define CAMCC_LRME_CLK 77 +#define CAMCC_LRME_CLK_SRC 78 +#define CAMCC_MCLK0_CLK 79 +#define CAMCC_MCLK0_CLK_SRC 80 +#define CAMCC_MCLK1_CLK 81 +#define CAMCC_MCLK1_CLK_SRC 82 +#define CAMCC_MCLK2_CLK 83 +#define CAMCC_MCLK2_CLK_SRC 84 +#define CAMCC_MCLK3_CLK 85 +#define CAMCC_MCLK3_CLK_SRC 86 +#define CAMCC_SLEEP_CLK 87 +#define CAMCC_SLEEP_CLK_SRC 88 +#define CAMCC_SLOW_AHB_CLK_SRC 89 +#define CAMCC_XO_CLK_SRC 90 + +/* CAMCC GDSCRs */ +#define BPS_GDSC 0 +#define IFE_0_GDSC 1 +#define IFE_1_GDSC 2 +#define IPE_0_GDSC 3 +#define IPE_1_GDSC 4 +#define TITAN_TOP_GDSC 5 + +#endif -- cgit v1.2.3 From a4be1860b9319e9e55eaa9e28e35e7b19128060c Mon Sep 17 00:00:00 2001 From: Danila Tikhonov Date: Sun, 5 May 2024 23:10:37 +0300 Subject: dt-bindings: clock: qcom: Add SM7150 VIDEOCC clocks Add device tree bindings for the video clock controller on Qualcomm SM7150 platform. Signed-off-by: Danila Tikhonov Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20240505201038.276047-8-danila@jiaxyga.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,sm7150-videocc.h | 28 +++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,sm7150-videocc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,sm7150-videocc.h b/include/dt-bindings/clock/qcom,sm7150-videocc.h new file mode 100644 index 000000000000..d86e0fbb159a --- /dev/null +++ b/include/dt-bindings/clock/qcom,sm7150-videocc.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2018, The Linux Foundation. All rights reserved. + * Copyright (c) 2024, Danila Tikhonov + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_VIDEOCC_SM7150_H +#define _DT_BINDINGS_CLK_QCOM_VIDEOCC_SM7150_H + +#define VIDEOCC_PLL0 0 +#define VIDEOCC_IRIS_AHB_CLK 1 +#define VIDEOCC_IRIS_CLK_SRC 2 +#define VIDEOCC_MVS0_AXI_CLK 3 +#define VIDEOCC_MVS0_CORE_CLK 4 +#define VIDEOCC_MVS1_AXI_CLK 5 +#define VIDEOCC_MVS1_CORE_CLK 6 +#define VIDEOCC_MVSC_CORE_CLK 7 +#define VIDEOCC_MVSC_CTL_AXI_CLK 8 +#define VIDEOCC_VENUS_AHB_CLK 9 +#define VIDEOCC_XO_CLK 10 +#define VIDEOCC_XO_CLK_SRC 11 + +/* VIDEOCC GDSCRs */ +#define VENUS_GDSC 0 +#define VCODEC0_GDSC 1 +#define VCODEC1_GDSC 2 + +#endif -- cgit v1.2.3 From 27c42e925323b975a64429e313b0cf5c0c02a411 Mon Sep 17 00:00:00 2001 From: Kathiravan Thirumoorthy Date: Mon, 25 Mar 2024 21:19:48 +0530 Subject: dt-bindings: arm: qcom,ids: Add SoC ID for IPQ5321 Add the ID for the Qualcomm IPQ5321 SoC. Acked-by: Krzysztof Kozlowski Reviewed-by: Mukesh Ojha Signed-off-by: Kathiravan Thirumoorthy Link: https://lore.kernel.org/r/20240325-ipq5321-sku-support-v2-1-f30ce244732f@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/arm/qcom,ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h index d040033dc8ee..a2958952a9ec 100644 --- a/include/dt-bindings/arm/qcom,ids.h +++ b/include/dt-bindings/arm/qcom,ids.h @@ -272,6 +272,7 @@ #define QCOM_ID_QCS8550 603 #define QCOM_ID_QCM8550 604 #define QCOM_ID_IPQ5300 624 +#define QCOM_ID_IPQ5321 650 /* * The board type and revision information, used by Qualcomm bootloaders and -- cgit v1.2.3 From 3fefd9b594aa6be9f120733f5bb57b07d47871a9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 17 Apr 2024 18:56:57 +0100 Subject: fs: Remove i_blocks_per_page The last caller has been converted to i_blocks_per_folio() so we can remove this wrapper. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Dave Kleikamp --- include/linux/pagemap.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 3d69589c00a4..63f2f3602a7f 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1536,10 +1536,4 @@ unsigned int i_blocks_per_folio(struct inode *inode, struct folio *folio) { return folio_size(folio) >> inode->i_blkbits; } - -static inline -unsigned int i_blocks_per_page(struct inode *inode, struct page *page) -{ - return i_blocks_per_folio(inode, page_folio(page)); -} #endif /* _LINUX_PAGEMAP_H */ -- cgit v1.2.3 From a0a44d9175b349df2462089140fb7f292100bd7c Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 27 May 2024 11:01:28 +0200 Subject: mm, slab: don't wrap internal functions with alloc_hooks() The functions __kmalloc_noprof(), kmalloc_large_noprof(), kmalloc_trace_noprof() and their _node variants are all internal to the implementations of kmalloc_noprof() and kmalloc_node_noprof() and are only declared in the "public" slab.h and exported so that those implementations can be static inline and distinguish the build-time constant size variants. The only other users for some of the internal functions are slub_kunit and fortify_kunit tests which make very short-lived allocations. Therefore we can stop wrapping them with the alloc_hooks() macro. Instead add a __ prefix to all of them and a comment documenting these as internal. Also rename __kmalloc_trace() to __kmalloc_cache() which is more descriptive - it is a variant of __kmalloc() where the exact kmalloc cache has been already determined. The usage in fortify_kunit can be removed completely, as the internal functions should be tested already through kmalloc() tests in the test variant that passes non-constant allocation size. Reported-by: Kent Overstreet Cc: Suren Baghdasaryan Cc: Kees Cook Reviewed-by: Kent Overstreet Acked-by: David Rientjes Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 7247e217e21b..ed6bee5ec2b6 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -528,9 +528,6 @@ static_assert(PAGE_SHIFT <= 20); #include -void *__kmalloc_noprof(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1); -#define __kmalloc(...) alloc_hooks(__kmalloc_noprof(__VA_ARGS__)) - /** * kmem_cache_alloc - Allocate an object * @cachep: The cache to allocate from. @@ -568,31 +565,34 @@ static __always_inline void kfree_bulk(size_t size, void **p) kmem_cache_free_bulk(NULL, size, p); } -void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment - __alloc_size(1); -#define __kmalloc_node(...) alloc_hooks(__kmalloc_node_noprof(__VA_ARGS__)) - void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment __malloc; #define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__)) -void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t flags, size_t size) - __assume_kmalloc_alignment __alloc_size(3); +/* + * The following functions are not to be used directly and are intended only + * for internal use from kmalloc() and kmalloc_node() + * with the exception of kunit tests + */ + +void *__kmalloc_noprof(size_t size, gfp_t flags) + __assume_kmalloc_alignment __alloc_size(1); + +void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node) + __assume_kmalloc_alignment __alloc_size(1); -void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, - int node, size_t size) __assume_kmalloc_alignment - __alloc_size(4); -#define kmalloc_trace(...) alloc_hooks(kmalloc_trace_noprof(__VA_ARGS__)) +void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t flags, size_t size) + __assume_kmalloc_alignment __alloc_size(3); -#define kmalloc_node_trace(...) alloc_hooks(kmalloc_node_trace_noprof(__VA_ARGS__)) +void *__kmalloc_cache_node_noprof(struct kmem_cache *s, gfp_t gfpflags, + int node, size_t size) + __assume_kmalloc_alignment __alloc_size(4); -void *kmalloc_large_noprof(size_t size, gfp_t flags) __assume_page_alignment - __alloc_size(1); -#define kmalloc_large(...) alloc_hooks(kmalloc_large_noprof(__VA_ARGS__)) +void *__kmalloc_large_noprof(size_t size, gfp_t flags) + __assume_page_alignment __alloc_size(1); -void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) __assume_page_alignment - __alloc_size(1); -#define kmalloc_large_node(...) alloc_hooks(kmalloc_large_node_noprof(__VA_ARGS__)) +void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) + __assume_page_alignment __alloc_size(1); /** * kmalloc - allocate kernel memory @@ -654,10 +654,10 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f unsigned int index; if (size > KMALLOC_MAX_CACHE_SIZE) - return kmalloc_large_noprof(size, flags); + return __kmalloc_large_noprof(size, flags); index = kmalloc_index(size); - return kmalloc_trace_noprof( + return __kmalloc_cache_noprof( kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], flags, size); } @@ -671,10 +671,10 @@ static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gf unsigned int index; if (size > KMALLOC_MAX_CACHE_SIZE) - return kmalloc_large_node_noprof(size, flags, node); + return __kmalloc_large_node_noprof(size, flags, node); index = kmalloc_index(size); - return kmalloc_node_trace_noprof( + return __kmalloc_cache_node_noprof( kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], flags, node, size); } -- cgit v1.2.3 From 582d79f34330db4ccee85620cf95ee7ccb9a0d01 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 27 May 2024 15:57:50 +0200 Subject: drm/connector: Introduce an HDMI connector initialization function A lot of the various HDMI drivers duplicate some logic that depends on the HDMI spec itself and not really a particular hardware implementation. Output BPC or format selection, infoframe generation are good examples of such areas. This creates a lot of boilerplate, with a lot of variations, which makes it hard for userspace to rely on, and makes it difficult to get it right for drivers. In the next patches, we'll add a lot of infrastructure around the drm_connector and drm_connector_state structures, which will allow to abstract away the duplicated logic. This infrastructure comes with a few requirements though, and thus we need a new initialization function. Hopefully, this will make drivers simpler to handle, and their behaviour more consistent. Reviewed-by: Dave Stevenson Reviewed-by: Sui Jingfeng Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-1-c5af16c3aae2@kernel.org Signed-off-by: Maxime Ripard --- include/drm/drm_connector.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 58ee9adf9091..7ef1ff83d885 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -1908,6 +1908,11 @@ int drmm_connector_init(struct drm_device *dev, const struct drm_connector_funcs *funcs, int connector_type, struct i2c_adapter *ddc); +int drmm_connector_hdmi_init(struct drm_device *dev, + struct drm_connector *connector, + const struct drm_connector_funcs *funcs, + int connector_type, + struct i2c_adapter *ddc); void drm_connector_attach_edid_property(struct drm_connector *connector); int drm_connector_register(struct drm_connector *connector); void drm_connector_unregister(struct drm_connector *connector); -- cgit v1.2.3 From 54cb39e2293b1e221708d3ac157ecc59086e1b46 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 27 May 2024 15:57:52 +0200 Subject: drm/connector: hdmi: Create an HDMI sub-state The next features we will need to share across drivers will need to store some parameters for drivers to use, such as the selected output format. Let's create a new connector sub-state dedicated to HDMI controllers, that will eventually store everything we need. Reviewed-by: Dave Stevenson Reviewed-by: Sui Jingfeng Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-3-c5af16c3aae2@kernel.org Signed-off-by: Maxime Ripard --- include/drm/display/drm_hdmi_state_helper.h | 16 ++++++++++++++++ include/drm/drm_connector.h | 7 +++++++ 2 files changed, 23 insertions(+) create mode 100644 include/drm/display/drm_hdmi_state_helper.h (limited to 'include') diff --git a/include/drm/display/drm_hdmi_state_helper.h b/include/drm/display/drm_hdmi_state_helper.h new file mode 100644 index 000000000000..837899db315a --- /dev/null +++ b/include/drm/display/drm_hdmi_state_helper.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef DRM_HDMI_STATE_HELPER_H_ +#define DRM_HDMI_STATE_HELPER_H_ + +struct drm_atomic_state; +struct drm_connector; +struct drm_connector_state; + +void __drm_atomic_helper_connector_hdmi_reset(struct drm_connector *connector, + struct drm_connector_state *new_conn_state); + +int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector, + struct drm_atomic_state *state); + +#endif // DRM_HDMI_STATE_HELPER_H_ diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 7ef1ff83d885..5fe3f6a519cd 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -1031,6 +1031,13 @@ struct drm_connector_state { * DRM blob property for HDR output metadata */ struct drm_property_blob *hdr_output_metadata; + + /** + * @hdmi: HDMI-related variable and properties. Filled by + * @drm_atomic_helper_connector_hdmi_check(). + */ + struct { + } hdmi; }; /** -- cgit v1.2.3 From aadb3e16b8f30cc1c1efdfe162f400e026385bfb Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 27 May 2024 15:57:53 +0200 Subject: drm/connector: hdmi: Add output BPC to the connector state We'll add automatic selection of the output BPC in a following patch, but let's add it to the HDMI connector state already. Reviewed-by: Dave Stevenson Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-4-c5af16c3aae2@kernel.org Signed-off-by: Maxime Ripard --- include/drm/drm_connector.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 5fe3f6a519cd..4ddb438ea2dd 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -1037,6 +1037,10 @@ struct drm_connector_state { * @drm_atomic_helper_connector_hdmi_check(). */ struct { + /** + * @output_bpc: Bits per color channel to output. + */ + unsigned int output_bpc; } hdmi; }; @@ -1686,6 +1690,11 @@ struct drm_connector { */ struct drm_property_blob *path_blob_ptr; + /** + * @max_bpc: Maximum bits per color channel the connector supports. + */ + unsigned int max_bpc; + /** * @max_bpc_property: Default connector property for the max bpc to be * driven out of the connector. @@ -1919,7 +1928,8 @@ int drmm_connector_hdmi_init(struct drm_device *dev, struct drm_connector *connector, const struct drm_connector_funcs *funcs, int connector_type, - struct i2c_adapter *ddc); + struct i2c_adapter *ddc, + unsigned int max_bpc); void drm_connector_attach_edid_property(struct drm_connector *connector); int drm_connector_register(struct drm_connector *connector); void drm_connector_unregister(struct drm_connector *connector); -- cgit v1.2.3 From 948f01d5e5595023c2e7cfc0184a322be00ef214 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 27 May 2024 15:57:56 +0200 Subject: drm/connector: hdmi: Add support for output format Just like BPC, we'll add support for automatic selection of the output format for HDMI connectors. Let's add the needed defaults and fields for now. Reviewed-by: Dave Stevenson Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-7-c5af16c3aae2@kernel.org Signed-off-by: Maxime Ripard --- include/drm/drm_connector.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 4ddb438ea2dd..4b6b1117c7d0 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -368,6 +368,9 @@ enum drm_panel_orientation { DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, }; +const char * +drm_hdmi_connector_get_output_format_name(enum hdmi_colorspace fmt); + /** * struct drm_monitor_range_info - Panel's Monitor range in EDID for * &drm_display_info @@ -1041,6 +1044,11 @@ struct drm_connector_state { * @output_bpc: Bits per color channel to output. */ unsigned int output_bpc; + + /** + * @output_format: Pixel format to output in. + */ + enum hdmi_colorspace output_format; } hdmi; }; @@ -1906,6 +1914,17 @@ struct drm_connector { /** @hdr_sink_metadata: HDR Metadata Information read from sink */ struct hdr_sink_metadata hdr_sink_metadata; + + /** + * @hdmi: HDMI-related variable and properties. + */ + struct { + /** + * @supported_formats: Bitmask of @hdmi_colorspace + * supported by the controller. + */ + unsigned long supported_formats; + } hdmi; }; #define obj_to_connector(x) container_of(x, struct drm_connector, base) @@ -1929,6 +1948,7 @@ int drmm_connector_hdmi_init(struct drm_device *dev, const struct drm_connector_funcs *funcs, int connector_type, struct i2c_adapter *ddc, + unsigned long supported_formats, unsigned int max_bpc); void drm_connector_attach_edid_property(struct drm_connector *connector); int drm_connector_register(struct drm_connector *connector); -- cgit v1.2.3 From 40167bcbd19c241fc30a912fa8a8276b9ed1a12e Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 27 May 2024 15:57:58 +0200 Subject: drm/display: hdmi: Add HDMI compute clock helper A lot of HDMI drivers have some variation of the formula to calculate the TMDS character rate from a mode, but few of them actually take all parameters into account. Let's create a helper to provide that rate taking all parameters into account. Reviewed-by: Dave Stevenson Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-9-c5af16c3aae2@kernel.org Signed-off-by: Maxime Ripard --- include/drm/display/drm_hdmi_helper.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/drm/display/drm_hdmi_helper.h b/include/drm/display/drm_hdmi_helper.h index 76d234826e22..57e3b18c15ec 100644 --- a/include/drm/display/drm_hdmi_helper.h +++ b/include/drm/display/drm_hdmi_helper.h @@ -24,4 +24,8 @@ drm_hdmi_infoframe_set_hdr_metadata(struct hdmi_drm_infoframe *frame, void drm_hdmi_avi_infoframe_content_type(struct hdmi_avi_infoframe *frame, const struct drm_connector_state *conn_state); +unsigned long long +drm_hdmi_compute_mode_clock(const struct drm_display_mode *mode, + unsigned int bpc, enum hdmi_colorspace fmt); + #endif -- cgit v1.2.3 From f035f4097f1e0a35a457b72427bb0c06ca0c81c4 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 27 May 2024 15:58:00 +0200 Subject: drm/connector: hdmi: Calculate TMDS character rate Most HDMI drivers have some code to calculate the TMDS character rate, usually to adjust an internal clock to match what the mode requires. Since the TMDS character rates mostly depends on the resolution, whether we need to repeat pixels or not, the bpc count and the format, we can now derive it from the HDMI connector state that stores all those infos and remove the duplication from drivers. Reviewed-by: Dave Stevenson Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-11-c5af16c3aae2@kernel.org Signed-off-by: Maxime Ripard --- include/drm/drm_connector.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 4b6b1117c7d0..3bdcf904a11b 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -1049,6 +1049,11 @@ struct drm_connector_state { * @output_format: Pixel format to output in. */ enum hdmi_colorspace output_format; + + /** + * @tmds_char_rate: TMDS Character Rate, in Hz. + */ + unsigned long long tmds_char_rate; } hdmi; }; -- cgit v1.2.3 From e5030a74f976b4e808e28e78805c87203ac1a48d Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 27 May 2024 15:58:02 +0200 Subject: drm/connector: hdmi: Add custom hook to filter TMDS character rate Most of the HDMI controllers have an upper TMDS character rate limit they can't exceed. On "embedded"-grade display controllers, it will typically be lower than what high-grade monitors can provide these days, so drivers will filter the TMDS character rate based on the controller capabilities. To make that easier to handle for drivers, let's provide an optional hook to be implemented by drivers so they can tell the HDMI controller helpers if a given TMDS character rate is reachable for them or not. This will then be useful to figure out the best format and bpc count for a given mode. Reviewed-by: Dave Stevenson Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-13-c5af16c3aae2@kernel.org Signed-off-by: Maxime Ripard --- include/drm/drm_connector.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 3bdcf904a11b..ffef967869d9 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -38,6 +38,7 @@ struct drm_connector_helper_funcs; struct drm_modeset_acquire_ctx; struct drm_device; struct drm_crtc; +struct drm_display_mode; struct drm_encoder; struct drm_panel; struct drm_property; @@ -1057,6 +1058,30 @@ struct drm_connector_state { } hdmi; }; +/** + * struct drm_connector_hdmi_funcs - drm_hdmi_connector control functions + */ +struct drm_connector_hdmi_funcs { + /** + * @tmds_char_rate_valid: + * + * This callback is invoked at atomic_check time to figure out + * whether a particular TMDS character rate is supported by the + * driver. + * + * The @tmds_char_rate_valid callback is optional. + * + * Returns: + * + * Either &drm_mode_status.MODE_OK or one of the failure reasons + * in &enum drm_mode_status. + */ + enum drm_mode_status + (*tmds_char_rate_valid)(const struct drm_connector *connector, + const struct drm_display_mode *mode, + unsigned long long tmds_rate); +}; + /** * struct drm_connector_funcs - control connectors on a given device * @@ -1929,6 +1954,11 @@ struct drm_connector { * supported by the controller. */ unsigned long supported_formats; + + /** + * @funcs: HDMI connector Control Functions + */ + const struct drm_connector_hdmi_funcs *funcs; } hdmi; }; @@ -1951,6 +1981,7 @@ int drmm_connector_init(struct drm_device *dev, int drmm_connector_hdmi_init(struct drm_device *dev, struct drm_connector *connector, const struct drm_connector_funcs *funcs, + const struct drm_connector_hdmi_funcs *hdmi_funcs, int connector_type, struct i2c_adapter *ddc, unsigned long supported_formats, -- cgit v1.2.3 From ab52af4ba7c7dc2e226ede5935a0587743b747d3 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 27 May 2024 15:58:07 +0200 Subject: drm/connector: hdmi: Add Broadcast RGB property The i915 driver has a property to force the RGB range of an HDMI output. The vc4 driver then implemented the same property with the same semantics. KWin has support for it, and a PR for mutter is also there to support it. Both drivers implementing the same property with the same semantics, plus the userspace having support for it, is proof enough that it's pretty much a de-facto standard now and we can provide helpers for it. Let's plumb it into the newly created HDMI connector. Reviewed-by: Dave Stevenson Acked-by: Pekka Paalanen Reviewed-by: Sebastian Wick Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-18-c5af16c3aae2@kernel.org Signed-off-by: Maxime Ripard --- include/drm/drm_connector.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include') diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index ffef967869d9..207635715466 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -369,6 +369,29 @@ enum drm_panel_orientation { DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, }; +/** + * enum drm_hdmi_broadcast_rgb - Broadcast RGB Selection for an HDMI @drm_connector + */ +enum drm_hdmi_broadcast_rgb { + /** + * @DRM_HDMI_BROADCAST_RGB_AUTO: The RGB range is selected + * automatically based on the mode. + */ + DRM_HDMI_BROADCAST_RGB_AUTO, + + /** + * @DRM_HDMI_BROADCAST_RGB_FULL: Full range RGB is forced. + */ + DRM_HDMI_BROADCAST_RGB_FULL, + + /** + * @DRM_HDMI_BROADCAST_RGB_LIMITED: Limited range RGB is forced. + */ + DRM_HDMI_BROADCAST_RGB_LIMITED, +}; + +const char * +drm_hdmi_connector_get_broadcast_rgb_name(enum drm_hdmi_broadcast_rgb broadcast_rgb); const char * drm_hdmi_connector_get_output_format_name(enum hdmi_colorspace fmt); @@ -1041,6 +1064,12 @@ struct drm_connector_state { * @drm_atomic_helper_connector_hdmi_check(). */ struct { + /** + * @broadcast_rgb: Connector property to pass the + * Broadcast RGB selection value. + */ + enum drm_hdmi_broadcast_rgb broadcast_rgb; + /** * @output_bpc: Bits per color channel to output. */ @@ -1757,6 +1786,12 @@ struct drm_connector { */ struct drm_property *privacy_screen_hw_state_property; + /** + * @broadcast_rgb_property: Connector property to set the + * Broadcast RGB selection to output with. + */ + struct drm_property *broadcast_rgb_property; + #define DRM_CONNECTOR_POLL_HPD (1 << 0) #define DRM_CONNECTOR_POLL_CONNECT (1 << 1) #define DRM_CONNECTOR_POLL_DISCONNECT (1 << 2) @@ -2096,6 +2131,7 @@ int drm_connector_attach_scaling_mode_property(struct drm_connector *connector, u32 scaling_mode_mask); int drm_connector_attach_vrr_capable_property( struct drm_connector *connector); +int drm_connector_attach_broadcast_rgb_property(struct drm_connector *connector); int drm_connector_attach_colorspace_property(struct drm_connector *connector); int drm_connector_attach_hdr_output_metadata_property(struct drm_connector *connector); bool drm_connector_atomic_hdr_metadata_equal(struct drm_connector_state *old_state, -- cgit v1.2.3 From 027d435906490812d4568ff371a8b63c24a36bcd Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 27 May 2024 15:58:09 +0200 Subject: drm/connector: hdmi: Add RGB Quantization Range to the connector state HDMI controller drivers will need to figure out the RGB range they need to configure based on a mode and property values. Let's expose that in the HDMI connector state so drivers can just use that value. Reviewed-by: Dave Stevenson Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-20-c5af16c3aae2@kernel.org Signed-off-by: Maxime Ripard --- include/drm/drm_connector.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 207635715466..dcad2d46fabe 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -1070,6 +1070,12 @@ struct drm_connector_state { */ enum drm_hdmi_broadcast_rgb broadcast_rgb; + /** + * @is_full_range: Is the output supposed to use a full + * RGB Quantization Range or not? + */ + bool is_limited_range; + /** * @output_bpc: Bits per color channel to output. */ -- cgit v1.2.3 From f378b77227bc4732922c57f92be89438bb1018a1 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 27 May 2024 15:58:11 +0200 Subject: drm/connector: hdmi: Add Infoframes generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Infoframes in KMS is usually handled by a bunch of low-level helpers that require quite some boilerplate for drivers. This leads to discrepancies with how drivers generate them, and which are actually sent. Now that we have everything needed to generate them in the HDMI connector state, we can generate them in our common logic so that drivers can simply reuse what we precomputed. Cc: Ville Syrjälä Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-22-c5af16c3aae2@kernel.org Signed-off-by: Maxime Ripard --- include/drm/display/drm_hdmi_state_helper.h | 6 ++ include/drm/drm_connector.h | 111 +++++++++++++++++++++++++++- 2 files changed, 116 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/display/drm_hdmi_state_helper.h b/include/drm/display/drm_hdmi_state_helper.h index 837899db315a..eb162ff24de0 100644 --- a/include/drm/display/drm_hdmi_state_helper.h +++ b/include/drm/display/drm_hdmi_state_helper.h @@ -6,6 +6,7 @@ struct drm_atomic_state; struct drm_connector; struct drm_connector_state; +struct hdmi_audio_infoframe; void __drm_atomic_helper_connector_hdmi_reset(struct drm_connector *connector, struct drm_connector_state *new_conn_state); @@ -13,4 +14,9 @@ void __drm_atomic_helper_connector_hdmi_reset(struct drm_connector *connector, int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector, struct drm_atomic_state *state); +int drm_atomic_helper_connector_hdmi_update_audio_infoframe(struct drm_connector *connector, + struct hdmi_audio_infoframe *frame); +int drm_atomic_helper_connector_hdmi_update_infoframes(struct drm_connector *connector, + struct drm_atomic_state *state); + #endif // DRM_HDMI_STATE_HELPER_H_ diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index dcad2d46fabe..e04a8a0d1bbd 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -914,6 +914,21 @@ struct drm_tv_connector_state { unsigned int hue; }; +/** + * struct drm_connector_hdmi_infoframe - HDMI Infoframe container + */ +struct drm_connector_hdmi_infoframe { + /** + * @data: HDMI Infoframe structure + */ + union hdmi_infoframe data; + + /** + * @set: Is the content of @data valid? + */ + bool set; +}; + /** * struct drm_connector_state - mutable connector state */ @@ -1071,7 +1086,36 @@ struct drm_connector_state { enum drm_hdmi_broadcast_rgb broadcast_rgb; /** - * @is_full_range: Is the output supposed to use a full + * @infoframes: HDMI Infoframes matching that state + */ + struct { + /** + * @avi: AVI Infoframes structure matching our + * state. + */ + struct drm_connector_hdmi_infoframe avi; + + /** + * @hdr_drm: DRM (Dynamic Range and Mastering) + * Infoframes structure matching our state. + */ + struct drm_connector_hdmi_infoframe hdr_drm; + + /** + * @spd: SPD Infoframes structure matching our + * state. + */ + struct drm_connector_hdmi_infoframe spd; + + /** + * @vendor: HDMI Vendor Infoframes structure + * matching our state. + */ + struct drm_connector_hdmi_infoframe hdmi; + } infoframes; + + /** + * @is_limited_range: Is the output supposed to use a limited * RGB Quantization Range or not? */ bool is_limited_range; @@ -1115,6 +1159,41 @@ struct drm_connector_hdmi_funcs { (*tmds_char_rate_valid)(const struct drm_connector *connector, const struct drm_display_mode *mode, unsigned long long tmds_rate); + + /** + * @clear_infoframe: + * + * This callback is invoked through + * @drm_atomic_helper_connector_hdmi_update_infoframes during a + * commit to clear the infoframes into the hardware. It will be + * called multiple times, once for every disabled infoframe + * type. + * + * The @clear_infoframe callback is optional. + * + * Returns: + * 0 on success, a negative error code otherwise + */ + int (*clear_infoframe)(struct drm_connector *connector, + enum hdmi_infoframe_type type); + + /** + * @write_infoframe: + * + * This callback is invoked through + * @drm_atomic_helper_connector_hdmi_update_infoframes during a + * commit to program the infoframes into the hardware. It will + * be called multiple times, once for every updated infoframe + * type. + * + * The @write_infoframe callback is mandatory. + * + * Returns: + * 0 on success, a negative error code otherwise + */ + int (*write_infoframe)(struct drm_connector *connector, + enum hdmi_infoframe_type type, + const u8 *buffer, size_t len); }; /** @@ -1990,6 +2069,18 @@ struct drm_connector { * @hdmi: HDMI-related variable and properties. */ struct { +#define DRM_CONNECTOR_HDMI_VENDOR_LEN 8 + /** + * @vendor: HDMI Controller Vendor Name + */ + unsigned char vendor[DRM_CONNECTOR_HDMI_VENDOR_LEN] __nonstring; + +#define DRM_CONNECTOR_HDMI_PRODUCT_LEN 16 + /** + * @product: HDMI Controller Product Name + */ + unsigned char product[DRM_CONNECTOR_HDMI_PRODUCT_LEN] __nonstring; + /** * @supported_formats: Bitmask of @hdmi_colorspace * supported by the controller. @@ -2000,6 +2091,23 @@ struct drm_connector { * @funcs: HDMI connector Control Functions */ const struct drm_connector_hdmi_funcs *funcs; + + /** + * @infoframes: Current Infoframes output by the connector + */ + struct { + /** + * @lock: Mutex protecting against concurrent access to + * the infoframes, most notably between KMS and ALSA. + */ + struct mutex lock; + + /** + * @audio: Current Audio Infoframes structure. Protected + * by @lock. + */ + struct drm_connector_hdmi_infoframe audio; + } infoframes; } hdmi; }; @@ -2021,6 +2129,7 @@ int drmm_connector_init(struct drm_device *dev, struct i2c_adapter *ddc); int drmm_connector_hdmi_init(struct drm_device *dev, struct drm_connector *connector, + const char *vendor, const char *product, const struct drm_connector_funcs *funcs, const struct drm_connector_hdmi_funcs *hdmi_funcs, int connector_type, -- cgit v1.2.3 From db003a28e03f95f2bcb63f037a2078b8870b1ecd Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 28 May 2024 14:33:20 +0200 Subject: netfs: fix kernel doc for nets_wait_for_outstanding_io() The @inode parameter wasn't documented leading to new doc build warnings. Fixes: f89ea63f1c65 ("netfs, 9p: Fix race between umount and async request completion") Link: https://lore.kernel.org/r/20240528133050.7e09d78e@canb.auug.org.au Signed-off-by: Christian Brauner --- include/linux/netfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 3ca3906bb8da..5d0288938cc2 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -521,7 +521,7 @@ static inline struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx) /** * netfs_wait_for_outstanding_io - Wait for outstanding I/O to complete - * @ctx: The netfs inode to wait on + * @inode: The netfs inode to wait on * * Wait for outstanding I/O requests of any type to complete. This is intended * to be called from inode eviction routines. This makes sure that any -- cgit v1.2.3 From 620c266f394932e5decc4b34683a75dfc59dc2f4 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 24 May 2024 12:19:39 +0200 Subject: fhandle: relax open_by_handle_at() permission checks A current limitation of open_by_handle_at() is that it's currently not possible to use it from within containers at all because we require CAP_DAC_READ_SEARCH in the initial namespace. That's unfortunate because there are scenarios where using open_by_handle_at() from within containers. Two examples: (1) cgroupfs allows to encode cgroups to file handles and reopen them with open_by_handle_at(). (2) Fanotify allows placing filesystem watches they currently aren't usable in containers because the returned file handles cannot be used. Here's a proposal for relaxing the permission check for open_by_handle_at(). (1) Opening file handles when the caller has privileges over the filesystem (1.1) The caller has an unobstructed view of the filesystem. (1.2) The caller has permissions to follow a path to the file handle. This doesn't address the problem of opening a file handle when only a portion of a filesystem is exposed as is common in containers by e.g., bind-mounting a subtree. The proposal to solve this use-case is: (2) Opening file handles when the caller has privileges over a subtree (2.1) The caller is able to reach the file from the provided mount fd. (2.2) The caller has permissions to construct an unobstructed path to the file handle. (2.3) The caller has permissions to follow a path to the file handle. The relaxed permission checks are currently restricted to directory file handles which are what both cgroupfs and fanotify need. Handling disconnected non-directory file handles would lead to a potentially non-deterministic api. If a disconnected non-directory file handle is provided we may fail to decode a valid path that we could use for permission checking. That in itself isn't a problem as we would just return EACCES in that case. However, confusion may arise if a non-disconnected dentry ends up in the cache later and those opening the file handle would suddenly succeed. * It's potentially possible to use timing information (side-channel) to infer whether a given inode exists. I don't think that's particularly problematic. Thanks to Jann for bringing this to my attention. * An unrelated note (IOW, these are thoughts that apply to open_by_handle_at() generically and are unrelated to the changes here): Jann pointed out that we should verify whether deleted files could potentially be reopened through open_by_handle_at(). I don't think that's possible though. Another potential thing to check is whether open_by_handle_at() could be abused to open internal stuff like memfds or gpu stuff. I don't think so but I haven't had the time to completely verify this. This dates back to discussions Amir and I had quite some time ago and thanks to him for providing a lot of details around the export code and related patches! Link: https://lore.kernel.org/r/20240524-vfs-open_by_handle_at-v1-1-3d4b7d22736b@kernel.org Reviewed-by: Amir Goldstein Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- include/linux/exportfs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index bb37ad5cc954..893a1d21dc1c 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -158,6 +158,7 @@ struct fid { #define EXPORT_FH_CONNECTABLE 0x1 /* Encode file handle with parent */ #define EXPORT_FH_FID 0x2 /* File handle may be non-decodeable */ +#define EXPORT_FH_DIR_ONLY 0x4 /* Only decode file handle for a directory */ /** * struct export_operations - for nfsd to communicate with file systems @@ -305,6 +306,7 @@ static inline int exportfs_encode_fid(struct inode *inode, struct fid *fid, extern struct dentry *exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, int fileid_type, + unsigned int flags, int (*acceptable)(void *, struct dentry *), void *context); extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, -- cgit v1.2.3 From 1866407831deb945a16c60c38246f28c2475b28a Mon Sep 17 00:00:00 2001 From: Richard Acayan Date: Thu, 23 May 2024 21:20:25 -0400 Subject: dt-bindings: arm: qcom,ids: Add SoC ID for SDM670 The socinfo driver uses SoC IDs from the device tree bindings. Add the SoC ID for SDM670. Signed-off-by: Richard Acayan Reviewed-by: Konrad Dybcio Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240524012023.318965-6-mailingradian@gmail.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/arm/qcom,ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h index a2958952a9ec..d6c9e9472121 100644 --- a/include/dt-bindings/arm/qcom,ids.h +++ b/include/dt-bindings/arm/qcom,ids.h @@ -175,6 +175,7 @@ #define QCOM_ID_SDA630 327 #define QCOM_ID_MSM8905 331 #define QCOM_ID_SDX202 333 +#define QCOM_ID_SDM670 336 #define QCOM_ID_SDM450 338 #define QCOM_ID_SM8150 339 #define QCOM_ID_SDA845 341 -- cgit v1.2.3 From 14a1d1dc35d346a1523f38f6517c349dfa447a58 Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Thu, 9 May 2024 16:06:48 +0200 Subject: dt-bindings: clock: rk3128: Add PCLK_MIPIPHY The DPHY's APB clock is required to be exposed in order to be able to enable it and access the phy's registers. Signed-off-by: Alex Bee Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20240509140653.168591-3-knaerzche@gmail.com Signed-off-by: Heiko Stuebner --- include/dt-bindings/clock/rk3128-cru.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/clock/rk3128-cru.h b/include/dt-bindings/clock/rk3128-cru.h index 6a47825dac5d..1be455ba4985 100644 --- a/include/dt-bindings/clock/rk3128-cru.h +++ b/include/dt-bindings/clock/rk3128-cru.h @@ -116,6 +116,7 @@ #define PCLK_GMAC 367 #define PCLK_PMU_PRE 368 #define PCLK_SIM_CARD 369 +#define PCLK_MIPIPHY 370 /* hclk gates */ #define HCLK_SPDIF 440 -- cgit v1.2.3 From 4edd7dc82bd6be6989c034ccbbbccdc61034e33b Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 30 Apr 2024 11:43:43 +0530 Subject: PCI: endpoint: Rename core_init() callback in 'struct pci_epc_event_ops' to epc_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit core_init() callback is used to notify the EPC initialization event to the EPF drivers. The 'core' prefix was used indicate that the controller IP core has completed initialization. But it serves no purpose as the EPF driver will only care about the EPC initialization as a whole and there is no real benefit to distinguish the IP core part. Rename the core_init() callback in 'struct pci_epc_event_ops' to epc_init() to make it more clear. Link: https://lore.kernel.org/linux-pci/20240430-pci-epf-rework-v4-2-22832d0d456f@linaro.org Tested-by: Niklas Cassel Signed-off-by: Manivannan Sadhasivam Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Reviewed-by: Niklas Cassel --- include/linux/pci-epf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index adee6a1b35db..afe3bfd88742 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -70,13 +70,13 @@ struct pci_epf_ops { /** * struct pci_epc_event_ops - Callbacks for capturing the EPC events - * @core_init: Callback for the EPC initialization complete event + * @epc_init: Callback for the EPC initialization complete event * @link_up: Callback for the EPC link up event * @link_down: Callback for the EPC link down event * @bme: Callback for the EPC BME (Bus Master Enable) event */ struct pci_epc_event_ops { - int (*core_init)(struct pci_epf *epf); + int (*epc_init)(struct pci_epf *epf); int (*link_up)(struct pci_epf *epf); int (*link_down)(struct pci_epf *epf); int (*bme)(struct pci_epf *epf); -- cgit v1.2.3 From f58838d7feb04809257f54a8b256786d2f9dfe01 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 30 Apr 2024 11:43:44 +0530 Subject: PCI: endpoint: Rename BME to Bus Master Enable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BME which stands for 'Bus Master Enable' is not defined in the PCIe base spec even though it is commonly referred in many places (vendor docs). To align with the spec, rename it to its expansion 'Bus Master Enable'. Suggested-by: Damien Le Moal Link: https://lore.kernel.org/linux-pci/20240430-pci-epf-rework-v4-3-22832d0d456f@linaro.org Link: https://lore.kernel.org/linux-pci/20240430-pci-epf-rework-v4-4-22832d0d456f@linaro.org Tested-by: Niklas Cassel Signed-off-by: Manivannan Sadhasivam Signed-off-by: Krzysztof Wilczyński [bhelgaas: squash removal of irrelevant 'Link is enabled'] Signed-off-by: Bjorn Helgaas Reviewed-by: Niklas Cassel --- include/linux/pci-epc.h | 2 +- include/linux/pci-epf.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index acc5f96161fe..11115cd0fe5b 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -226,7 +226,7 @@ void pci_epc_linkup(struct pci_epc *epc); void pci_epc_linkdown(struct pci_epc *epc); void pci_epc_init_notify(struct pci_epc *epc); void pci_epc_notify_pending_init(struct pci_epc *epc, struct pci_epf *epf); -void pci_epc_bme_notify(struct pci_epc *epc); +void pci_epc_bus_master_enable_notify(struct pci_epc *epc); void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf, enum pci_epc_interface_type type); int pci_epc_write_header(struct pci_epc *epc, u8 func_no, u8 vfunc_no, diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index afe3bfd88742..dc759eb7157c 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -73,13 +73,13 @@ struct pci_epf_ops { * @epc_init: Callback for the EPC initialization complete event * @link_up: Callback for the EPC link up event * @link_down: Callback for the EPC link down event - * @bme: Callback for the EPC BME (Bus Master Enable) event + * @bus_master_enable: Callback for the EPC Bus Master Enable event */ struct pci_epc_event_ops { int (*epc_init)(struct pci_epf *epf); int (*link_up)(struct pci_epf *epf); int (*link_down)(struct pci_epf *epf); - int (*bme)(struct pci_epf *epf); + int (*bus_master_enable)(struct pci_epf *epf); }; /** -- cgit v1.2.3 From 4eed3dd7116814c82630b0f1b0f73fa96707134f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 7 May 2024 13:25:19 +0300 Subject: resource: Use typedef for alignf callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To make it simpler to declare resource constraint alignf callbacks, add typedef for it and document it. Suggested-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240507102523.57320-5-ilpo.jarvinen@linux.intel.com Tested-by: Lidong Wang Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Reviewed-by: Mika Westerberg --- include/linux/ioport.h | 22 ++++++++++++++++++---- include/linux/pci.h | 5 +---- 2 files changed, 19 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index db7fe25f3370..28266426e5bf 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -188,6 +188,23 @@ enum { #define DEFINE_RES_DMA(_dma) \ DEFINE_RES_DMA_NAMED((_dma), NULL) +/** + * typedef resource_alignf - Resource alignment callback + * @data: Private data used by the callback + * @res: Resource candidate range (an empty resource space) + * @size: The minimum size of the empty space + * @align: Alignment from the constraints + * + * Callback allows calculating resource placement and alignment beyond min, + * max, and align fields in the struct resource_constraint. + * + * Return: Start address for the resource. + */ +typedef resource_size_t (*resource_alignf)(void *data, + const struct resource *res, + resource_size_t size, + resource_size_t align); + /* PC/ISA/whatever - the normal PC address spaces: IO and memory */ extern struct resource ioport_resource; extern struct resource iomem_resource; @@ -207,10 +224,7 @@ extern void arch_remove_reservations(struct resource *avail); extern int allocate_resource(struct resource *root, struct resource *new, resource_size_t size, resource_size_t min, resource_size_t max, resource_size_t align, - resource_size_t (*alignf)(void *, - const struct resource *, - resource_size_t, - resource_size_t), + resource_alignf alignf, void *alignf_data); struct resource *lookup_resource(struct resource *root, resource_size_t start); int adjust_resource(struct resource *res, resource_size_t start, diff --git a/include/linux/pci.h b/include/linux/pci.h index fb004fd4e889..760fdc4b37cc 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1551,10 +1551,7 @@ int __must_check pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res, resource_size_t size, resource_size_t align, resource_size_t min, unsigned long type_mask, - resource_size_t (*alignf)(void *, - const struct resource *, - resource_size_t, - resource_size_t), + resource_alignf alignf, void *alignf_data); -- cgit v1.2.3 From 2700225304e8e4f0f29f2bbb8ad0c112c9666d90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 7 May 2024 13:25:21 +0300 Subject: resource: Export find_resource_space() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PCI bridge window logic needs to find out in advance to the actual allocation if there is an empty space big enough to fit the window. Export find_resource_space() for the purpose. Also move the struct resource_constraint into generic header to be able to use the new interface. Link: https://lore.kernel.org/r/20240507102523.57320-7-ilpo.jarvinen@linux.intel.com Tested-by: Lidong Wang Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Reviewed-by: Mika Westerberg --- include/linux/ioport.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 28266426e5bf..6e9fb667a1c5 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -205,6 +205,25 @@ typedef resource_size_t (*resource_alignf)(void *data, resource_size_t size, resource_size_t align); +/** + * struct resource_constraint - constraints to be met while searching empty + * resource space + * @min: The minimum address for the memory range + * @max: The maximum address for the memory range + * @align: Alignment for the start address of the empty space + * @alignf: Additional alignment constraints callback + * @alignf_data: Data provided for @alignf callback + * + * Contains the range and alignment constraints that have to be met during + * find_resource_space(). @alignf can be NULL indicating no alignment beyond + * @align is necessary. + */ +struct resource_constraint { + resource_size_t min, max, align; + resource_alignf alignf; + void *alignf_data; +}; + /* PC/ISA/whatever - the normal PC address spaces: IO and memory */ extern struct resource ioport_resource; extern struct resource iomem_resource; @@ -278,6 +297,9 @@ static inline bool resource_union(const struct resource *r1, const struct resour return true; } +int find_resource_space(struct resource *root, struct resource *new, + resource_size_t size, struct resource_constraint *constraint); + /* Convenience shorthand with allocation */ #define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), 0) #define request_muxed_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), IORESOURCE_MUXED) -- cgit v1.2.3 From c7ce956bb6d0f32ab921b6ffba1a6a834df96f21 Mon Sep 17 00:00:00 2001 From: MarileneGarcia Date: Sun, 19 May 2024 00:10:27 -0300 Subject: drm/dp: Fix documentation warning It fixes the following warnings when the kernel documentation is generated: ./include/drm/display/drm_dp_helper.h:126: warning: Function parameter or struct member 'mode' not described in 'drm_dp_as_sdp' ./include/drm/display/drm_dp_helper.h:126: warning: Excess struct member 'operation_mode' description in 'drm_dp_as_sdp' Signed-off-by: MarileneGarcia Fixes: 0bbb8f594e33 ("drm/dp: Add Adaptive Sync SDP logging") Reviewed-by: Dmitry Baryshkov Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/r/20240405141640.09b0bdbf@canb.auug.org.au Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20240519031027.433751-1-marilene.agarcia@gmail.com --- include/drm/display/drm_dp_helper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index 8bed890eec2c..8defcc399f42 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -112,7 +112,7 @@ struct drm_dp_vsc_sdp { * @target_rr: Target Refresh * @duration_incr_ms: Successive frame duration increase * @duration_decr_ms: Successive frame duration decrease - * @operation_mode: Adaptive Sync Operation Mode + * @mode: Adaptive Sync Operation Mode */ struct drm_dp_as_sdp { unsigned char sdp_type; -- cgit v1.2.3 From 475beea0b9f631656b5cc39429a39696876af613 Mon Sep 17 00:00:00 2001 From: Alexandru Gagniuc Date: Tue, 30 Apr 2024 23:07:43 -0500 Subject: dt-bindings: clock: Add PCIe pipe related clocks for IPQ9574 Add defines for the missing PCIe PIPE clocks. Signed-off-by: Alexandru Gagniuc Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240501040800.1542805-2-mr.nuke.me@gmail.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,ipq9574-gcc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,ipq9574-gcc.h b/include/dt-bindings/clock/qcom,ipq9574-gcc.h index 08fd3a37acaa..52123c5a09fa 100644 --- a/include/dt-bindings/clock/qcom,ipq9574-gcc.h +++ b/include/dt-bindings/clock/qcom,ipq9574-gcc.h @@ -216,4 +216,8 @@ #define GCC_CRYPTO_AHB_CLK 207 #define GCC_USB0_PIPE_CLK 208 #define GCC_USB0_SLEEP_CLK 209 +#define GCC_PCIE0_PIPE_CLK 210 +#define GCC_PCIE1_PIPE_CLK 211 +#define GCC_PCIE2_PIPE_CLK 212 +#define GCC_PCIE3_PIPE_CLK 213 #endif -- cgit v1.2.3 From 8526f8c877baf3f9e678b31fd7d1066b776775cc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:02:13 +0200 Subject: wifi: nl80211: clean up coalescing rule handling There's no need to allocate a tiny struct and then an array again, just allocate the two together and use __counted_by(). Also unify the freeing. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240523120213.48a40cfb96f9.Ia02bf8f8fefbf533c64c5fa26175848d4a3a7899@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d79180bec7a1..5da9bb0ac6a4 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3566,8 +3566,8 @@ struct cfg80211_coalesce_rules { * @n_rules: number of rules */ struct cfg80211_coalesce { - struct cfg80211_coalesce_rules *rules; int n_rules; + struct cfg80211_coalesce_rules rules[] __counted_by(n_rules); }; /** -- cgit v1.2.3 From ed7ee6a69f9289337af4835a908aa782263d4852 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 29 May 2024 10:39:01 +0200 Subject: statx: Update offset commentary for struct statx In commit 2a82bb02941f ("statx: stx_subvol"), a new member was added to struct statx, but the offset comment was not correct. Update it. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20240529081725.3769290-1-john.g.garry@oracle.com Signed-off-by: Christian Brauner --- include/uapi/linux/stat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 67626d535316..95770941ee2c 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -126,8 +126,8 @@ struct statx { __u64 stx_mnt_id; __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ - __u64 stx_subvol; /* Subvolume identifier */ /* 0xa0 */ + __u64 stx_subvol; /* Subvolume identifier */ __u64 __spare3[11]; /* Spare space for future expansion */ /* 0x100 */ }; -- cgit v1.2.3 From 1deba6e24c221c61c6eab8656a53f8c17035932b Mon Sep 17 00:00:00 2001 From: Brent Lu Date: Mon, 27 May 2024 14:35:47 -0500 Subject: ASoC: SOF: sof-audio: add sof_dai_get_tdm_slots function An new interface, sof_dai_get_tdm_slots(), is added for machine driver to get tdm slot number from topology. The dai_get_param() callback needs to support new parameter type SOF_DAI_PARAM_INTEL_SSP_TDM_SLOTS by returning the tdm slot number of specific SSP port. Reviewed-by: Bard Liao Signed-off-by: Brent Lu Signed-off-by: Pierre-Louis Bossart Link: https://msgid.link/r/20240527193552.165567-14-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- include/sound/sof.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/sof.h b/include/sound/sof.h index ec6c30d54592..64fd5504cb2b 100644 --- a/include/sound/sof.h +++ b/include/sound/sof.h @@ -173,5 +173,6 @@ struct sof_dev_desc { int sof_dai_get_mclk(struct snd_soc_pcm_runtime *rtd); int sof_dai_get_bclk(struct snd_soc_pcm_runtime *rtd); +int sof_dai_get_tdm_slots(struct snd_soc_pcm_runtime *rtd); #endif -- cgit v1.2.3 From cc5ac966f26193ab185cc43d64d9f1ae998ccb6e Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 22 May 2024 19:42:57 +0800 Subject: cachefiles: add output string to cachefiles_obj_[get|put]_ondemand_fd This lets us see the correct trace output. Fixes: c8383054506c ("cachefiles: notify the user daemon when looking up cookie") Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240522114308.2402121-2-libaokun@huaweicloud.com Acked-by: Jeff Layton Reviewed-by: Jingbo Xu Signed-off-by: Christian Brauner --- include/trace/events/cachefiles.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index cf4b98b9a9ed..e3213af847cd 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -127,7 +127,9 @@ enum cachefiles_error_trace { EM(cachefiles_obj_see_lookup_cookie, "SEE lookup_cookie") \ EM(cachefiles_obj_see_lookup_failed, "SEE lookup_failed") \ EM(cachefiles_obj_see_withdraw_cookie, "SEE withdraw_cookie") \ - E_(cachefiles_obj_see_withdrawal, "SEE withdrawal") + EM(cachefiles_obj_see_withdrawal, "SEE withdrawal") \ + EM(cachefiles_obj_get_ondemand_fd, "GET ondemand_fd") \ + E_(cachefiles_obj_put_ondemand_fd, "PUT ondemand_fd") #define cachefiles_coherency_traces \ EM(cachefiles_coherency_check_aux, "BAD aux ") \ -- cgit v1.2.3 From da4a827416066191aafeeccee50a8836a826ba10 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 22 May 2024 19:43:00 +0800 Subject: cachefiles: fix slab-use-after-free in cachefiles_ondemand_daemon_read() We got the following issue in a fuzz test of randomly issuing the restore command: ================================================================== BUG: KASAN: slab-use-after-free in cachefiles_ondemand_daemon_read+0xb41/0xb60 Read of size 8 at addr ffff888122e84088 by task ondemand-04-dae/963 CPU: 13 PID: 963 Comm: ondemand-04-dae Not tainted 6.8.0-dirty #564 Call Trace: kasan_report+0x93/0xc0 cachefiles_ondemand_daemon_read+0xb41/0xb60 vfs_read+0x169/0xb50 ksys_read+0xf5/0x1e0 Allocated by task 116: kmem_cache_alloc+0x140/0x3a0 cachefiles_lookup_cookie+0x140/0xcd0 fscache_cookie_state_machine+0x43c/0x1230 [...] Freed by task 792: kmem_cache_free+0xfe/0x390 cachefiles_put_object+0x241/0x480 fscache_cookie_state_machine+0x5c8/0x1230 [...] ================================================================== Following is the process that triggers the issue: mount | daemon_thread1 | daemon_thread2 ------------------------------------------------------------ cachefiles_withdraw_cookie cachefiles_ondemand_clean_object(object) cachefiles_ondemand_send_req REQ_A = kzalloc(sizeof(*req) + data_len) wait_for_completion(&REQ_A->done) cachefiles_daemon_read cachefiles_ondemand_daemon_read REQ_A = cachefiles_ondemand_select_req msg->object_id = req->object->ondemand->ondemand_id ------ restore ------ cachefiles_ondemand_restore xas_for_each(&xas, req, ULONG_MAX) xas_set_mark(&xas, CACHEFILES_REQ_NEW) cachefiles_daemon_read cachefiles_ondemand_daemon_read REQ_A = cachefiles_ondemand_select_req copy_to_user(_buffer, msg, n) xa_erase(&cache->reqs, id) complete(&REQ_A->done) ------ close(fd) ------ cachefiles_ondemand_fd_release cachefiles_put_object cachefiles_put_object kmem_cache_free(cachefiles_object_jar, object) REQ_A->object->ondemand->ondemand_id // object UAF !!! When we see the request within xa_lock, req->object must not have been freed yet, so grab the reference count of object before xa_unlock to avoid the above issue. Fixes: 0a7e54c1959c ("cachefiles: resend an open request if the read request's object is closed") Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240522114308.2402121-5-libaokun@huaweicloud.com Acked-by: Jeff Layton Reviewed-by: Jia Zhu Reviewed-by: Jingbo Xu Signed-off-by: Christian Brauner --- include/trace/events/cachefiles.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index e3213af847cd..7d931db02b93 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -33,6 +33,8 @@ enum cachefiles_obj_ref_trace { cachefiles_obj_see_withdrawal, cachefiles_obj_get_ondemand_fd, cachefiles_obj_put_ondemand_fd, + cachefiles_obj_get_read_req, + cachefiles_obj_put_read_req, }; enum fscache_why_object_killed { @@ -129,7 +131,9 @@ enum cachefiles_error_trace { EM(cachefiles_obj_see_withdraw_cookie, "SEE withdraw_cookie") \ EM(cachefiles_obj_see_withdrawal, "SEE withdrawal") \ EM(cachefiles_obj_get_ondemand_fd, "GET ondemand_fd") \ - E_(cachefiles_obj_put_ondemand_fd, "PUT ondemand_fd") + EM(cachefiles_obj_put_ondemand_fd, "PUT ondemand_fd") \ + EM(cachefiles_obj_get_read_req, "GET read_req") \ + E_(cachefiles_obj_put_read_req, "PUT read_req") #define cachefiles_coherency_traces \ EM(cachefiles_coherency_check_aux, "BAD aux ") \ -- cgit v1.2.3 From 7c327d56597d8de1680cf24e956b704270d3d84a Mon Sep 17 00:00:00 2001 From: Richard Maina Date: Wed, 29 May 2024 11:09:55 -0700 Subject: hwspinlock: Introduce hwspin_lock_bust() When a remoteproc crashes or goes down unexpectedly this can result in a state where locks held by the remoteproc will remain locked possibly resulting in deadlock. This new API hwspin_lock_bust() allows hwspinlock implementers to define a bust operation for freeing previously acquired hwspinlocks after verifying ownership of the acquired lock. Signed-off-by: Richard Maina Reviewed-by: Bjorn Andersson Signed-off-by: Chris Lew Link: https://lore.kernel.org/r/20240529-hwspinlock-bust-v3-1-c8b924ffa5a2@quicinc.com Signed-off-by: Bjorn Andersson --- include/linux/hwspinlock.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/hwspinlock.h b/include/linux/hwspinlock.h index bfe7c1f1ac6d..f0231dbc4777 100644 --- a/include/linux/hwspinlock.h +++ b/include/linux/hwspinlock.h @@ -68,6 +68,7 @@ int __hwspin_lock_timeout(struct hwspinlock *, unsigned int, int, int __hwspin_trylock(struct hwspinlock *, int, unsigned long *); void __hwspin_unlock(struct hwspinlock *, int, unsigned long *); int of_hwspin_lock_get_id_byname(struct device_node *np, const char *name); +int hwspin_lock_bust(struct hwspinlock *hwlock, unsigned int id); int devm_hwspin_lock_free(struct device *dev, struct hwspinlock *hwlock); struct hwspinlock *devm_hwspin_lock_request(struct device *dev); struct hwspinlock *devm_hwspin_lock_request_specific(struct device *dev, @@ -127,6 +128,11 @@ void __hwspin_unlock(struct hwspinlock *hwlock, int mode, unsigned long *flags) { } +static inline int hwspin_lock_bust(struct hwspinlock *hwlock, unsigned int id) +{ + return 0; +} + static inline int of_hwspin_lock_get_id(struct device_node *np, int index) { return 0; -- cgit v1.2.3 From 2e3f0d693875db698891ffe89a18121bda5b95b8 Mon Sep 17 00:00:00 2001 From: Chris Lew Date: Wed, 29 May 2024 11:09:57 -0700 Subject: soc: qcom: smem: Add qcom_smem_bust_hwspin_lock_by_host() Add qcom_smem_bust_hwspin_lock_by_host to enable remoteproc to bust the hwspin_lock owned by smem. In the event the remoteproc crashes unexpectedly, the remoteproc driver can invoke this API to try and bust the hwspin_lock and release the lock if still held by the remoteproc device. Signed-off-by: Chris Lew Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20240529-hwspinlock-bust-v3-3-c8b924ffa5a2@quicinc.com Signed-off-by: Bjorn Andersson --- include/linux/soc/qcom/smem.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/soc/qcom/smem.h b/include/linux/soc/qcom/smem.h index a36a3b9d4929..03187bc95851 100644 --- a/include/linux/soc/qcom/smem.h +++ b/include/linux/soc/qcom/smem.h @@ -14,4 +14,6 @@ phys_addr_t qcom_smem_virt_to_phys(void *p); int qcom_smem_get_soc_id(u32 *id); +int qcom_smem_bust_hwspin_lock_by_host(unsigned int host); + #endif -- cgit v1.2.3 From 5e514f1cba090e1c8fff03e92a175eccfe46305f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 May 2024 12:52:50 +0000 Subject: tcp: add tcp_done_with_error() helper tcp_reset() ends with a sequence that is carefuly ordered. We need to fix [e]poll bugs in the following patches, it makes sense to use a common helper. Suggested-by: Neal Cardwell Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Link: https://lore.kernel.org/r/20240528125253.1966136-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 060e95b331a2..32815a40dea1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -677,6 +677,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb, /* tcp_input.c */ void tcp_rearm_rto(struct sock *sk); void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); +void tcp_done_with_error(struct sock *sk, int err); void tcp_reset(struct sock *sk, struct sk_buff *skb); void tcp_fin(struct sock *sk); void tcp_check_space(struct sock *sk); -- cgit v1.2.3 From bbb31b7ae14594aa2a7e74923ee38f312404ad66 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 28 May 2024 16:05:09 +0100 Subject: net: dsa: remove mac_prepare()/mac_finish() shims No DSA driver makes use of the mac_prepare()/mac_finish() shimmed operations anymore, so we can remove these. Signed-off-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/E1sByNx-00ELW1-Vp@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index b60e7e410aba..f9ae3ca66b6f 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -882,15 +882,9 @@ struct dsa_switch_ops { struct phylink_pcs *(*phylink_mac_select_pcs)(struct dsa_switch *ds, int port, phy_interface_t iface); - int (*phylink_mac_prepare)(struct dsa_switch *ds, int port, - unsigned int mode, - phy_interface_t interface); void (*phylink_mac_config)(struct dsa_switch *ds, int port, unsigned int mode, const struct phylink_link_state *state); - int (*phylink_mac_finish)(struct dsa_switch *ds, int port, - unsigned int mode, - phy_interface_t interface); void (*phylink_mac_link_down)(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface); -- cgit v1.2.3 From 6f6bfbc595fbae95f4c1ff80c87d089604e5e6a1 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Mon, 27 May 2024 23:11:37 -0700 Subject: RDMA/bnxt_re: Expose the MSN table capability for user library BNXT_RE_COMP_MASK_UCNTX_HW_RETX_ENABLED was introduced to share the HW retransmit capability between driver and lib. The main difference in implementation for HW Retransmit support is the usage of MSN table or PSN table . When HW retrans is enabled, HW expects MSN table to be allocated by driver/lib, else PSN table (for older adapters). FW expose a new field which gives MSN capability. Drivers and libs can depend on the new field instead of HW Retrasns capability. For adapters which support HW_RETX feature, MSN table capability will be set. For older adapters, this value will be 0(to maintain backward compatibility with older FW). Rename UAPI just to capture the correct name of the HW capability that driver/library is interested in. No functional impact even if older rdma-core is used. Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1716876697-25970-3-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- include/uapi/rdma/bnxt_re-abi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h index c0c34aca90ec..e61104f35d73 100644 --- a/include/uapi/rdma/bnxt_re-abi.h +++ b/include/uapi/rdma/bnxt_re-abi.h @@ -55,7 +55,7 @@ enum { BNXT_RE_UCNTX_CMASK_WC_DPI_ENABLED = 0x04ULL, BNXT_RE_UCNTX_CMASK_DBR_PACING_ENABLED = 0x08ULL, BNXT_RE_UCNTX_CMASK_POW2_DISABLED = 0x10ULL, - BNXT_RE_COMP_MASK_UCNTX_HW_RETX_ENABLED = 0x40, + BNXT_RE_UCNTX_CMASK_MSN_TABLE_ENABLED = 0x40, }; enum bnxt_re_wqe_mode { -- cgit v1.2.3 From fdefb918496235a11d6c5477c34c81aab2c1343b Mon Sep 17 00:00:00 2001 From: Konstantin Taranov Date: Wed, 22 May 2024 01:24:01 -0700 Subject: RDMA/mana_ib: Implement uapi to create and destroy RC QP Implement user requests to create and destroy an RC QP. As the user does not have an FMR queue, it is skipped and NO_FMR flag is used. Signed-off-by: Konstantin Taranov Link: https://lore.kernel.org/r/1716366242-558-3-git-send-email-kotaranov@linux.microsoft.com Reviewed-by: Zhu Yanjun Signed-off-by: Leon Romanovsky --- include/uapi/rdma/mana-abi.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/mana-abi.h b/include/uapi/rdma/mana-abi.h index 2c41cc315218..45c2df619f07 100644 --- a/include/uapi/rdma/mana-abi.h +++ b/include/uapi/rdma/mana-abi.h @@ -45,6 +45,15 @@ struct mana_ib_create_qp_resp { __u32 reserved; }; +struct mana_ib_create_rc_qp { + __aligned_u64 queue_buf[4]; + __u32 queue_size[4]; +}; + +struct mana_ib_create_rc_qp_resp { + __u32 queue_id[4]; +}; + struct mana_ib_create_wq { __aligned_u64 wq_buf_addr; __u32 wq_buf_size; -- cgit v1.2.3 From 73fc975318e0ab3385c5b3372c7b296ae58c8d6b Mon Sep 17 00:00:00 2001 From: Durai Manickam KR Date: Wed, 24 Apr 2024 11:03:45 +0530 Subject: drm: atmel-hlcdc: Define XLCDC specific registers The register address of the XLCDC IP used in SAM9X7 SoC family are different from the previous HLCDC. Defining those address space with valid macros. Signed-off-by: Durai Manickam KR [manikandan.m@microchip.com: Remove unused macro definitions] Signed-off-by: Manikandan Muralidharan Acked-by: Lee Jones Acked-by: Sam Ravnborg Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20240424053351.589830-3-manikandan.m@microchip.com --- include/linux/mfd/atmel-hlcdc.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/atmel-hlcdc.h b/include/linux/mfd/atmel-hlcdc.h index a186119a49b5..80d675a03b39 100644 --- a/include/linux/mfd/atmel-hlcdc.h +++ b/include/linux/mfd/atmel-hlcdc.h @@ -22,6 +22,8 @@ #define ATMEL_HLCDC_DITHER BIT(6) #define ATMEL_HLCDC_DISPDLY BIT(7) #define ATMEL_HLCDC_MODE_MASK GENMASK(9, 8) +#define ATMEL_XLCDC_MODE_MASK GENMASK(10, 8) +#define ATMEL_XLCDC_DPI BIT(11) #define ATMEL_HLCDC_PP BIT(10) #define ATMEL_HLCDC_VSPSU BIT(12) #define ATMEL_HLCDC_VSPHO BIT(13) @@ -34,6 +36,12 @@ #define ATMEL_HLCDC_IDR 0x30 #define ATMEL_HLCDC_IMR 0x34 #define ATMEL_HLCDC_ISR 0x38 +#define ATMEL_XLCDC_ATTRE 0x3c + +#define ATMEL_XLCDC_BASE_UPDATE BIT(0) +#define ATMEL_XLCDC_OVR1_UPDATE BIT(1) +#define ATMEL_XLCDC_OVR3_UPDATE BIT(2) +#define ATMEL_XLCDC_HEO_UPDATE BIT(3) #define ATMEL_HLCDC_CLKPOL BIT(0) #define ATMEL_HLCDC_CLKSEL BIT(2) @@ -48,6 +56,8 @@ #define ATMEL_HLCDC_DISP BIT(2) #define ATMEL_HLCDC_PWM BIT(3) #define ATMEL_HLCDC_SIP BIT(4) +#define ATMEL_XLCDC_SD BIT(5) +#define ATMEL_XLCDC_CM BIT(6) #define ATMEL_HLCDC_SOF BIT(0) #define ATMEL_HLCDC_SYNCDIS BIT(1) -- cgit v1.2.3 From 2c7afc2a880cd4899f9dd6bfa62f10f84773148b Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Thu, 16 May 2024 22:17:31 +0100 Subject: kunit: Cover 'assert.c' with tests There are multiple assertion formatting functions in the `assert.c` file, which are not covered with tests yet. Implement the KUnit test for these functions. The test consists of 11 test cases for the following functions: 1) 'is_literal' 2) 'is_str_literal' 3) 'kunit_assert_prologue', test case for multiple assert types 4) 'kunit_assert_print_msg' 5) 'kunit_unary_assert_format' 6) 'kunit_ptr_not_err_assert_format' 7) 'kunit_binary_assert_format' 8) 'kunit_binary_ptr_assert_format' 9) 'kunit_binary_str_assert_format' 10) 'kunit_assert_hexdump' 11) 'kunit_mem_assert_format' The test aims at maximizing the branch coverage for the assertion formatting functions. As you can see, it covers some of the static helper functions as well, so mark the static functions in `assert.c` as 'VISIBLE_IF_KUNIT' and conditionally export them with EXPORT_SYMBOL_IF_KUNIT. Add the corresponding definitions to `assert.h`. Build the assert test when CONFIG_KUNIT_TEST is enabled, similar to how it is done for the string stream test. Signed-off-by: Ivan Orlov Reviewed-by: Rae Moar Acked-by: David Gow Signed-off-by: Shuah Khan --- include/kunit/assert.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/kunit/assert.h b/include/kunit/assert.h index 24c2b9fa61e8..7e7490a74b13 100644 --- a/include/kunit/assert.h +++ b/include/kunit/assert.h @@ -218,4 +218,15 @@ void kunit_mem_assert_format(const struct kunit_assert *assert, const struct va_format *message, struct string_stream *stream); +#if IS_ENABLED(CONFIG_KUNIT) +void kunit_assert_print_msg(const struct va_format *message, + struct string_stream *stream); +bool is_literal(const char *text, long long value); +bool is_str_literal(const char *text, const char *value); +void kunit_assert_hexdump(struct string_stream *stream, + const void *buf, + const void *compared_buf, + const size_t len); +#endif + #endif /* _KUNIT_ASSERT_H */ -- cgit v1.2.3 From 020e6c22bd6e67592f38b47d0f1926a831482560 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 10 May 2024 15:36:57 -0700 Subject: kcsan: Add example to data_race() kerneldoc header Although the data_race() kerneldoc header accurately states what it does, some of the implications and usage patterns are non-obvious. Therefore, add a brief locking example and also state how to have KCSAN ignore accesses while also preventing the compiler from folding, spindling, or otherwise mutilating the access. [ paulmck: Apply Bart Van Assche feedback. ] [ paulmck: Apply feedback from Marco Elver. ] Reported-by: Bart Van Assche Signed-off-by: Paul E. McKenney Cc: Marco Elver Cc: Breno Leitao Cc: Jens Axboe --- include/linux/compiler.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 8c252e073bd8..68a24a3a6979 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -194,9 +194,17 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, * This data_race() macro is useful for situations in which data races * should be forgiven. One example is diagnostic code that accesses * shared variables but is not a part of the core synchronization design. + * For example, if accesses to a given variable are protected by a lock, + * except for diagnostic code, then the accesses under the lock should + * be plain C-language accesses and those in the diagnostic code should + * use data_race(). This way, KCSAN will complain if buggy lockless + * accesses to that variable are introduced, even if the buggy accesses + * are protected by READ_ONCE() or WRITE_ONCE(). * * This macro *does not* affect normal code generation, but is a hint - * to tooling that data races here are to be ignored. + * to tooling that data races here are to be ignored. If the access must + * be atomic *and* KCSAN should ignore the access, use both data_race() + * and READ_ONCE(), for example, data_race(READ_ONCE(x)). */ #define data_race(expr) \ ({ \ -- cgit v1.2.3 From 73287fe228721b05690e671adbcccc6cf5435be6 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 29 May 2024 23:59:39 -0700 Subject: bpf: pass bpf_struct_ops_link to callbacks in bpf_struct_ops. Pass an additional pointer of bpf_struct_ops_link to callback function reg, unreg, and update provided by subsystems defined in bpf_struct_ops. A bpf_struct_ops_map can be registered for multiple links. Passing a pointer of bpf_struct_ops_link helps subsystems to distinguish them. This pointer will be used in the later patches to let the subsystem initiate a detachment on a link that was registered to it previously. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240530065946.979330-2-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5e694a308081..19f8836382fc 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1730,9 +1730,9 @@ struct bpf_struct_ops { int (*init_member)(const struct btf_type *t, const struct btf_member *member, void *kdata, const void *udata); - int (*reg)(void *kdata); - void (*unreg)(void *kdata); - int (*update)(void *kdata, void *old_kdata); + int (*reg)(void *kdata, struct bpf_link *link); + void (*unreg)(void *kdata, struct bpf_link *link); + int (*update)(void *kdata, void *old_kdata, struct bpf_link *link); int (*validate)(void *kdata); void *cfi_stubs; struct module *owner; -- cgit v1.2.3 From 1adddc97aa44c8783f9f0276ea70854d56f9f6df Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 29 May 2024 23:59:41 -0700 Subject: bpf: support epoll from bpf struct_ops links. Add epoll support to bpf struct_ops links to trigger EPOLLHUP event upon detachment. This patch implements the "poll" of the "struct file_operations" for BPF links and introduces a new "poll" operator in the "struct bpf_link_ops". By implementing "poll" of "struct bpf_link_ops" for the links of struct_ops, the file descriptor of a struct_ops link can be added to an epoll file descriptor to receive EPOLLHUP events. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240530065946.979330-4-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 19f8836382fc..5eb61120e4f5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1612,6 +1612,7 @@ struct bpf_link_ops { struct bpf_link_info *info); int (*update_map)(struct bpf_link *link, struct bpf_map *new_map, struct bpf_map *old_map); + __poll_t (*poll)(struct file *file, struct poll_table_struct *pts); }; struct bpf_tramp_link { -- cgit v1.2.3 From 67c3e8353f45c27800eecc46e00e8272f063f7d1 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 29 May 2024 23:59:42 -0700 Subject: bpf: export bpf_link_inc_not_zero. bpf_link_inc_not_zero() will be used by kernel modules. We will use it in bpf_testmod.c later. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240530065946.979330-5-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5eb61120e4f5..a834f4b761bc 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2334,6 +2334,7 @@ int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer); int bpf_link_settle(struct bpf_link_primer *primer); void bpf_link_cleanup(struct bpf_link_primer *primer); void bpf_link_inc(struct bpf_link *link); +struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link); void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); struct bpf_link *bpf_link_get_from_fd(u32 ufd); @@ -2705,6 +2706,11 @@ static inline void bpf_link_inc(struct bpf_link *link) { } +static inline struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) +{ + return NULL; +} + static inline void bpf_link_put(struct bpf_link *link) { } -- cgit v1.2.3 From 9ec54934ce857065e38523a2010e20182e76f515 Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Thu, 30 May 2024 17:25:07 +0300 Subject: scsi: ufs: core: Allow RTT negotiation The rtt-upiu packets precede any data-out upiu packets, thus synchronizing the data input to the device: this mostly applies to write operations, but there are other operations that requires rtt as well. There are several rules binding this rtt - data-out dialog, specifically There can be at most outstanding bMaxNumOfRTT such packets. This might have an effect on write performance (sequential write in particular), as each data-out upiu must wait for its rtt sibling. UFSHCI expects bMaxNumOfRTT to be min(bDeviceRTTCap, NORTT). However, as of today, there does not appears to be no-one who sets it: not the host controller nor the driver. It wasn't an issue up to now: bMaxNumOfRTT is set to 2 after manufacturing, and wasn't limiting the write performance. UFS4.0, and specifically gear 5 changes this, and requires the device to be more attentive. This doesn't come free - the device has to allocate more resources to that end, but the sequential write performance improvement is significant. Early measurements shows 25% gain when moving from rtt 2 to 9. Therefore, set bMaxNumOfRTT to be min(bDeviceRTTCap, NORTT) as UFSHCI expects. Signed-off-by: Avri Altman Link: https://lore.kernel.org/r/20240530142510.734-2-avri.altman@wdc.com Reviewed-by: Bean Huo Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/ufs/ufs.h | 2 ++ include/ufs/ufshcd.h | 2 ++ include/ufs/ufshci.h | 1 + 3 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/ufs/ufs.h b/include/ufs/ufs.h index b6003749bc83..853e95957c31 100644 --- a/include/ufs/ufs.h +++ b/include/ufs/ufs.h @@ -592,6 +592,8 @@ struct ufs_dev_info { enum ufs_rtc_time rtc_type; time64_t rtc_time_baseline; u32 rtc_update_period; + + u8 rtt_cap; /* bDeviceRTTCap */ }; /* diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index bad88bd91995..d74bd2d67b06 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -819,6 +819,7 @@ enum ufshcd_mcq_opr { * @capabilities: UFS Controller Capabilities * @mcq_capabilities: UFS Multi Circular Queue capabilities * @nutrs: Transfer Request Queue depth supported by controller + * @nortt - Max outstanding RTTs supported by controller * @nutmrs: Task Management Queue depth supported by controller * @reserved_slot: Used to submit device commands. Protected by @dev_cmd.lock. * @ufs_version: UFS Version to which controller complies @@ -957,6 +958,7 @@ struct ufs_hba { u32 capabilities; int nutrs; + int nortt; u32 mcq_capabilities; int nutmrs; u32 reserved_slot; diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h index 385e1c6b8d60..c50f92bf2e1d 100644 --- a/include/ufs/ufshci.h +++ b/include/ufs/ufshci.h @@ -68,6 +68,7 @@ enum { /* Controller capability masks */ enum { MASK_TRANSFER_REQUESTS_SLOTS = 0x0000001F, + MASK_NUMBER_OUTSTANDING_RTT = 0x0000FF00, MASK_TASK_MANAGEMENT_REQUEST_SLOTS = 0x00070000, MASK_EHSLUTRD_SUPPORTED = 0x00400000, MASK_AUTO_HIBERN8_SUPPORT = 0x00800000, -- cgit v1.2.3 From e75ff63300c5e8fac31649f438ebad6af88e0032 Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Thu, 30 May 2024 17:25:08 +0300 Subject: scsi: ufs: core: Maximum RTT supported by the host driver Allow platform vendors to take precedence having their own max rtt support. This makes sense because the host controller's nortt characteristic may vary among vendors. while at it, set this value for Mediatek, as requested by Peter - https://lore.kernel.org/all/0a57d6bab739d6a10584f2baba115d00dfc9c94c.camel@mediatek.com/ Signed-off-by: Avri Altman Link: https://lore.kernel.org/r/20240530142510.734-3-avri.altman@wdc.com Reviewed-by: Peter Wang Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index d74bd2d67b06..ef04ec8aad69 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -295,6 +295,7 @@ struct ufs_pwr_mode_info { /** * struct ufs_hba_variant_ops - variant specific callbacks * @name: variant name + * @max_num_rtt: maximum RTT supported by the host * @init: called when the driver is initialized * @exit: called to cleanup everything done in init * @get_ufs_hci_version: called to get UFS HCI version @@ -332,6 +333,7 @@ struct ufs_pwr_mode_info { */ struct ufs_hba_variant_ops { const char *name; + int max_num_rtt; int (*init)(struct ufs_hba *); void (*exit)(struct ufs_hba *); u32 (*get_ufs_hci_version)(struct ufs_hba *); -- cgit v1.2.3 From 2fc39848952dfb91a9233563cc1444669b8e79c3 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Mon, 20 May 2024 07:14:56 +0900 Subject: scsi: ufs: mcq: Fix missing argument 'hba' in MCQ_OPR_OFFSET_n The MCQ_OPR_OFFSET_n macro takes 'hba' in the caller context without receiving 'hba' instance as an argument. To prevent potential bugs in future use cases, add an argument 'hba'. Fixes: 2468da61ea09 ("scsi: ufs: core: mcq: Configure operation and runtime interface") Cc: Asutosh Das Signed-off-by: Minwoo Im Link: https://lore.kernel.org/r/20240519221457.772346-2-minwoo.im@samsung.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index ef04ec8aad69..1150b86e9355 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1135,6 +1135,12 @@ static inline bool is_mcq_enabled(struct ufs_hba *hba) return hba->mcq_enabled; } +static inline unsigned int ufshcd_mcq_opr_offset(struct ufs_hba *hba, + enum ufshcd_mcq_opr opr, int idx) +{ + return hba->mcq_opr[opr].offset + hba->mcq_opr[opr].stride * idx; +} + #ifdef CONFIG_SCSI_UFS_VARIABLE_SG_ENTRY_SIZE static inline size_t ufshcd_sg_entry_size(const struct ufs_hba *hba) { -- cgit v1.2.3 From e8a1d87b7983b461d1d625e2973cdaadc0bd8ff5 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Mon, 20 May 2024 07:14:57 +0900 Subject: scsi: ufs: mcq: Convert MCQ_CFG_n to an inline function Inline functions are preferred over macros. Convert the MCQ_CFG_n macro to an inline function. Signed-off-by: Minwoo Im Link: https://lore.kernel.org/r/20240519221457.772346-3-minwoo.im@samsung.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 1150b86e9355..df68fb1d4f3f 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1130,6 +1130,8 @@ struct ufs_hw_queue { struct mutex sq_mutex; }; +#define MCQ_QCFG_SIZE 0x40 + static inline bool is_mcq_enabled(struct ufs_hba *hba) { return hba->mcq_enabled; @@ -1141,6 +1143,11 @@ static inline unsigned int ufshcd_mcq_opr_offset(struct ufs_hba *hba, return hba->mcq_opr[opr].offset + hba->mcq_opr[opr].stride * idx; } +static inline unsigned int ufshcd_mcq_cfg_offset(unsigned int reg, int idx) +{ + return reg + MCQ_QCFG_SIZE * idx; +} + #ifdef CONFIG_SCSI_UFS_VARIABLE_SG_ENTRY_SIZE static inline size_t ufshcd_sg_entry_size(const struct ufs_hba *hba) { -- cgit v1.2.3 From a79d8fe2ff8e78e549dc86cc853a61b029404871 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 29 May 2024 12:09:08 +0800 Subject: ipv6: sr: restruct ifdefines There are too many ifdef in IPv6 segment routing code that may cause logic problems. like commit 160e9d275218 ("ipv6: sr: fix invalid unregister error path"). To avoid this, the init functions are redefined for both cases. The code could be more clear after all fidefs are removed. Suggested-by: Simon Horman Suggested-by: David Ahern Signed-off-by: Hangbin Liu Reviewed-by: Sabrina Dubroca Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240529040908.3472952-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- include/net/seg6.h | 7 +++++++ include/net/seg6_hmac.h | 7 +++++++ 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/include/net/seg6.h b/include/net/seg6.h index af668f17b398..82b3fbbcbb93 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -52,10 +52,17 @@ static inline struct seg6_pernet_data *seg6_pernet(struct net *net) extern int seg6_init(void); extern void seg6_exit(void); +#ifdef CONFIG_IPV6_SEG6_LWTUNNEL extern int seg6_iptunnel_init(void); extern void seg6_iptunnel_exit(void); extern int seg6_local_init(void); extern void seg6_local_exit(void); +#else +static inline int seg6_iptunnel_init(void) { return 0; } +static inline void seg6_iptunnel_exit(void) {} +static inline int seg6_local_init(void) { return 0; } +static inline void seg6_local_exit(void) {} +#endif extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced); extern struct ipv6_sr_hdr *seg6_get_srh(struct sk_buff *skb, int flags); diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h index 2b5d2ee5613e..24f733b3e3fe 100644 --- a/include/net/seg6_hmac.h +++ b/include/net/seg6_hmac.h @@ -49,9 +49,16 @@ extern int seg6_hmac_info_del(struct net *net, u32 key); extern int seg6_push_hmac(struct net *net, struct in6_addr *saddr, struct ipv6_sr_hdr *srh); extern bool seg6_hmac_validate_skb(struct sk_buff *skb); +#ifdef CONFIG_IPV6_SEG6_HMAC extern int seg6_hmac_init(void); extern void seg6_hmac_exit(void); extern int seg6_hmac_net_init(struct net *net); extern void seg6_hmac_net_exit(struct net *net); +#else +static inline int seg6_hmac_init(void) { return 0; } +static inline void seg6_hmac_exit(void) {} +static inline int seg6_hmac_net_init(struct net *net) { return 0; } +static inline void seg6_hmac_net_exit(struct net *net) {} +#endif #endif -- cgit v1.2.3 From 02d00dc73d8d0613f82063ed53d67912a422c21e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 29 May 2024 14:29:29 +0100 Subject: net: phylink: rename ovr_an_inband to default_an_inband Since ovr_an_inband no longer overrides every MLO_AN_xxx mode, rename it to reflect what it now does - it changes the default mode from MLO_AN_PHY to MLO_AN_INBAND. Fix up the two users of this. Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Halaney Link: https://lore.kernel.org/r/E1sCJMv-00Ecr1-Sk@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/phylink.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 5ea6b2ad2396..a30a692acc32 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -141,7 +141,8 @@ enum phylink_op_type { * @mac_requires_rxc: if true, the MAC always requires a receive clock from PHY. * The PHY driver should start the clock signal as soon as * possible and avoid stopping it during suspend events. - * @ovr_an_inband: if true, override PCS to MLO_AN_INBAND + * @default_an_inband: if true, defaults to MLO_AN_INBAND rather than + * MLO_AN_PHY. A fixed-link specification will override. * @get_fixed_state: callback to execute to determine the fixed link state, * if MAC link is at %MLO_AN_FIXED mode. * @supported_interfaces: bitmap describing which PHY_INTERFACE_MODE_xxx @@ -154,7 +155,7 @@ struct phylink_config { bool poll_fixed_state; bool mac_managed_pm; bool mac_requires_rxc; - bool ovr_an_inband; + bool default_an_inband; void (*get_fixed_state)(struct phylink_config *config, struct phylink_link_state *state); DECLARE_PHY_INTERFACE_MASK(supported_interfaces); -- cgit v1.2.3 From 83f55b01dd903040d6ab64f4f9d78a27391a5916 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 29 May 2024 14:29:40 +0100 Subject: net: stmmac: rename xpcs_an_inband to default_an_inband Rename xpcs_an_inband to default_an_inband to reflect the change in phylink and its changed functionality. Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Halaney Link: https://lore.kernel.org/r/E1sCJN6-00EcrD-43@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/stmmac.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index f92c195c76ed..8f0f156d50d3 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -83,7 +83,7 @@ struct stmmac_priv; struct stmmac_mdio_bus_data { unsigned int phy_mask; unsigned int has_xpcs; - unsigned int xpcs_an_inband; + unsigned int default_an_inband; int *irqs; int probed_phy_irq; bool needs_reset; -- cgit v1.2.3 From 5607ca92e6274dfb85d0ff7c4e91e6c4ddb6d25c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 31 May 2024 13:41:22 +0200 Subject: leds: core: Add led_mc_set_brightness() function Add a new led_mc_set_brightness() function for in kernel color/brightness changing of multi-color LEDs. led-class-multicolor can be build as a module and led_mc_set_brightness() will have the builtin callers, so put led_mc_set_brightness() inside led-core instead, just like how led_set_brightness() is part of the core and not of the led-class object. This also adds a new LED_MULTI_COLOR led_classdev flag to allow led_mc_set_brightness() to verify that it is operating on a multi-color LED classdev, avoiding casting the passed in LED classdev to a multi-color LED classdev, when it actually is not a multi-color LED. Signed-off-by: Hans de Goede Reviewed-by: Jacek Anaszewski Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240531114124.45346-5-hdegoede@redhat.com Signed-off-by: Lee Jones --- include/linux/leds.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/leds.h b/include/linux/leds.h index 6300313c46b7..ae07e44f1dcb 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -107,6 +107,7 @@ struct led_classdev { #define LED_BRIGHT_HW_CHANGED BIT(21) #define LED_RETAIN_AT_SHUTDOWN BIT(22) #define LED_INIT_DEFAULT_TRIGGER BIT(23) +#define LED_MULTI_COLOR BIT(24) /* set_brightness_work / blink_timer flags, atomic, private. */ unsigned long work_flags; @@ -373,6 +374,25 @@ void led_set_brightness(struct led_classdev *led_cdev, unsigned int brightness); */ int led_set_brightness_sync(struct led_classdev *led_cdev, unsigned int value); +/** + * led_mc_set_brightness - set mc LED color intensity values and brightness + * @led_cdev: the LED to set + * @intensity_value: array of per color intensity values to set + * @num_colors: amount of entries in intensity_value array + * @brightness: the brightness to set the LED to + * + * Set a multi-color LED's per color intensity values and brightness. + * If necessary, this cancels the software blink timer. This function is + * guaranteed not to sleep. + * + * Calling this function on a non multi-color led_classdev or with the wrong + * num_colors value is an error. In this case an error will be logged once + * and the call will do nothing. + */ +void led_mc_set_brightness(struct led_classdev *led_cdev, + unsigned int *intensity_value, unsigned int num_colors, + unsigned int brightness); + /** * led_update_brightness - update LED brightness * @led_cdev: the LED to query -- cgit v1.2.3 From 0921a57c91648b08857b47a2f26fa7942f06120f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 31 May 2024 13:41:23 +0200 Subject: leds: trigger: Add led_mc_trigger_event() function Add a new led_mc_trigger_event() function for triggers which want to change the color of a multi-color LED based on their trigger conditions. Signed-off-by: Hans de Goede Reviewed-by: Jacek Anaszewski Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240531114124.45346-6-hdegoede@redhat.com Signed-off-by: Lee Jones --- include/linux/leds.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/leds.h b/include/linux/leds.h index ae07e44f1dcb..517b6198df07 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -510,6 +510,9 @@ void led_trigger_register_simple(const char *name, struct led_trigger **trigger); void led_trigger_unregister_simple(struct led_trigger *trigger); void led_trigger_event(struct led_trigger *trigger, enum led_brightness event); +void led_mc_trigger_event(struct led_trigger *trig, + unsigned int *intensity_value, unsigned int num_colors, + enum led_brightness brightness); void led_trigger_blink(struct led_trigger *trigger, unsigned long delay_on, unsigned long delay_off); void led_trigger_blink_oneshot(struct led_trigger *trigger, @@ -552,6 +555,9 @@ static inline void led_trigger_register_simple(const char *name, static inline void led_trigger_unregister_simple(struct led_trigger *trigger) {} static inline void led_trigger_event(struct led_trigger *trigger, enum led_brightness event) {} +static inline void led_mc_trigger_event(struct led_trigger *trig, + unsigned int *intensity_value, unsigned int num_colors, + enum led_brightness brightness) {} static inline void led_trigger_blink(struct led_trigger *trigger, unsigned long delay_on, unsigned long delay_off) {} -- cgit v1.2.3 From 9af12f57f1f9785f231d31a7365ad244c656b7ff Mon Sep 17 00:00:00 2001 From: Kate Hsuan Date: Fri, 31 May 2024 13:41:24 +0200 Subject: power: supply: power-supply-leds: Add charging_orange_full_green trigger for RGB LED Add a charging_orange_full_green LED trigger and the trigger is based on led_mc_trigger_event() which can set an RGB LED when the trigger is triggered. The LED will show orange when the battery status is charging. The LED will show green when the battery status is full. Link: https://lore.kernel.org/linux-leds/f40a0b1a-ceac-e269-c2dd-0158c5b4a1ad@gmail.com/ Signed-off-by: Kate Hsuan Acked-by: Sebastian Reichel Reviewed-by: Andy Shevchenko [hdegoede@redhat.com change color order to RGB] Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240531114124.45346-7-hdegoede@redhat.com Signed-off-by: Lee Jones --- include/linux/power_supply.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 8e5705a56b85..c852cc882501 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -319,6 +319,8 @@ struct power_supply { char *online_trig_name; struct led_trigger *charging_blink_full_solid_trig; char *charging_blink_full_solid_trig_name; + struct led_trigger *charging_orange_full_green_trig; + char *charging_orange_full_green_trig_name; #endif }; -- cgit v1.2.3 From b44d79d6bad16c30978c2cee3421133d3f181494 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Mon, 27 May 2024 16:29:33 +0300 Subject: platform/x86/intel/tpmi: Add support for performance limit reasons MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add TPMI ID 0x0C (Perf Limit Reasons) to the list of supported TPMI IDs. Reviewed-by: Andy Shevchenko Reviewed-by: Ilpo Järvinen Signed-off-by: Tero Kristo Link: https://lore.kernel.org/r/20240527133400.483634-2-tero.kristo@linux.intel.com Signed-off-by: Ilpo Järvinen --- include/linux/intel_tpmi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/intel_tpmi.h b/include/linux/intel_tpmi.h index 1e880cb0f454..a88ac937d2c2 100644 --- a/include/linux/intel_tpmi.h +++ b/include/linux/intel_tpmi.h @@ -21,6 +21,7 @@ enum intel_tpmi_id { TPMI_ID_PEM = 1, /* Power and Perf excursion Monitor */ TPMI_ID_UNCORE = 2, /* Uncore Frequency Scaling */ TPMI_ID_SST = 5, /* Speed Select Technology */ + TPMI_ID_PLR = 0xc, /* Performance Limit Reasons */ TPMI_CONTROL_ID = 0x80, /* Special ID for getting feature status */ TPMI_INFO_ID = 0x81, /* Special ID for PCI BDF and Package ID information */ }; -- cgit v1.2.3 From d36842bacf8e3491f555059f27de57b3436cc3ff Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Mon, 27 May 2024 16:29:34 +0300 Subject: platform/x86/intel/tpmi: Add API to get debugfs root MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add new API to get the debugfs root directory for TPMI. This allows any TPMI devices to add their own debugfs items under the same directory structure. Reviewed-by: Andy Shevchenko Reviewed-by: Ilpo Järvinen Signed-off-by: Tero Kristo Link: https://lore.kernel.org/r/20240527133400.483634-3-tero.kristo@linux.intel.com Signed-off-by: Ilpo Järvinen --- include/linux/intel_tpmi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/intel_tpmi.h b/include/linux/intel_tpmi.h index a88ac937d2c2..ff480b47ae64 100644 --- a/include/linux/intel_tpmi.h +++ b/include/linux/intel_tpmi.h @@ -54,4 +54,5 @@ struct resource *tpmi_get_resource_at_index(struct auxiliary_device *auxdev, int int tpmi_get_resource_count(struct auxiliary_device *auxdev); int tpmi_get_feature_status(struct auxiliary_device *auxdev, int feature_id, bool *read_blocked, bool *write_blocked); +struct dentry *tpmi_get_debugfs_dir(struct auxiliary_device *auxdev); #endif -- cgit v1.2.3 From 4d2bcefa965b06a1f2be6912456bcfa86a34f184 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 31 May 2024 13:29:02 +0100 Subject: mm: Reduce the number of slab->folio casts Mark a few more folio functions as taking a const folio pointer, which allows us to remove a few places in slab which cast away the const. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Vlastimil Babka --- include/linux/mm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9849dfda44d4..a983d371fafa 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1105,7 +1105,7 @@ static inline unsigned int compound_order(struct page *page) * * Return: The order of the folio. */ -static inline unsigned int folio_order(struct folio *folio) +static inline unsigned int folio_order(const struct folio *folio) { if (!folio_test_large(folio)) return 0; @@ -2145,7 +2145,7 @@ static inline struct folio *folio_next(struct folio *folio) * it from being split. It is not necessary for the folio to be locked. * Return: The base-2 logarithm of the size of this folio. */ -static inline unsigned int folio_shift(struct folio *folio) +static inline unsigned int folio_shift(const struct folio *folio) { return PAGE_SHIFT + folio_order(folio); } @@ -2158,7 +2158,7 @@ static inline unsigned int folio_shift(struct folio *folio) * it from being split. It is not necessary for the folio to be locked. * Return: The number of bytes in this folio. */ -static inline size_t folio_size(struct folio *folio) +static inline size_t folio_size(const struct folio *folio) { return PAGE_SIZE << folio_order(folio); } -- cgit v1.2.3 From b7c5e64fecfa88764791679cca4786ac65de739e Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 29 May 2024 22:52:30 -0600 Subject: vfio: Create vfio_fs_type with inode per device By linking all the device fds we provide to userspace to an address space through a new pseudo fs, we can use tools like unmap_mapping_range() to zap all vmas associated with a device. Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240530045236.1005864-2-alex.williamson@redhat.com Signed-off-by: Alex Williamson --- include/linux/vfio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 8b1a29820409..000a6cab2d31 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -64,6 +64,7 @@ struct vfio_device { struct completion comp; struct iommufd_access *iommufd_access; void (*put_kvm)(struct kvm *kvm); + struct inode *inode; #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_device *iommufd_device; u8 iommufd_attached:1; -- cgit v1.2.3 From aac6db75a9fc2c7a6f73e152df8f15101dda38e6 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 29 May 2024 22:52:31 -0600 Subject: vfio/pci: Use unmap_mapping_range() With the vfio device fd tied to the address space of the pseudo fs inode, we can use the mm to track all vmas that might be mmap'ing device BARs, which removes our vma_list and all the complicated lock ordering necessary to manually zap each related vma. Note that we can no longer store the pfn in vm_pgoff if we want to use unmap_mapping_range() to zap a selective portion of the device fd corresponding to BAR mappings. This also converts our mmap fault handler to use vmf_insert_pfn() because we no longer have a vma_list to avoid the concurrency problem with io_remap_pfn_range(). The goal is to eventually use the vm_ops huge_fault handler to avoid the additional faulting overhead, but vmf_insert_pfn_{pmd,pud}() need to learn about pfnmaps first. Also, Jason notes that a race exists between unmap_mapping_range() and the fops mmap callback if we were to call io_remap_pfn_range() to populate the vma on mmap. Specifically, mmap_region() does call_mmap() before it does vma_link_file() which gives a window where the vma is populated but invisible to unmap_mapping_range(). Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240530045236.1005864-3-alex.williamson@redhat.com Signed-off-by: Alex Williamson --- include/linux/vfio_pci_core.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index a2c8b8bba711..f87067438ed4 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -93,8 +93,6 @@ struct vfio_pci_core_device { struct list_head sriov_pfs_item; struct vfio_pci_core_device *sriov_pf_core_dev; struct notifier_block nb; - struct mutex vma_lock; - struct list_head vma_list; struct rw_semaphore memory_lock; }; -- cgit v1.2.3 From 055afc34fd219c8e2290d736ad186edd58870a9e Mon Sep 17 00:00:00 2001 From: Unnathi Chalicheemala Date: Fri, 31 May 2024 09:45:25 -0700 Subject: soc: qcom: llcc: Add regmap for Broadcast_AND region Until SM8450, there was only one broadcast region (Broadcast_OR) used to broadcast write and check for status bit 0. >From SM8450 onwards another broadcast region (Broadcast_AND) has been added which checks for status bit 1. This hasn't been updated and Broadcast_OR region was wrongly being used to check for status bit 1 all along. Hence define new regmap structure for Broadcast_AND region and initialize this regmap when HW block version is greater than 4.1, otherwise initialize as a NULL pointer for backwards compatibility. Switch from broadcast_OR to broadcast_AND region (when defined in DT) for checking status bit 1 as Broadcast_OR region checks only for bit 0. Signed-off-by: Unnathi Chalicheemala Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/9cf19928a67eaa577ae0f02de5bf86276be34ea2.1717014052.git.quic_uchalich@quicinc.com Signed-off-by: Bjorn Andersson --- include/linux/soc/qcom/llcc-qcom.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 1a886666bbb6..9e9f528b1370 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -115,7 +115,8 @@ struct llcc_edac_reg_offset { /** * struct llcc_drv_data - Data associated with the llcc driver * @regmaps: regmaps associated with the llcc device - * @bcast_regmap: regmap associated with llcc broadcast offset + * @bcast_regmap: regmap associated with llcc broadcast OR offset + * @bcast_and_regmap: regmap associated with llcc broadcast AND offset * @cfg: pointer to the data structure for slice configuration * @edac_reg_offset: Offset of the LLCC EDAC registers * @lock: mutex associated with each slice @@ -129,6 +130,7 @@ struct llcc_edac_reg_offset { struct llcc_drv_data { struct regmap **regmaps; struct regmap *bcast_regmap; + struct regmap *bcast_and_regmap; const struct llcc_slice_config *cfg; const struct llcc_edac_reg_offset *edac_reg_offset; struct mutex lock; -- cgit v1.2.3 From 69c8b998717c03adeada070882512ebeae11d71f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 29 May 2024 09:29:22 -0700 Subject: net: qstat: extend kdoc about get_base_stats mlx5 has a dedicated queue for PTP packets. Clarify that this sort of queues can also be accounted towards the base. Reviewed-by: Joe Damato Link: https://lore.kernel.org/r/20240529162922.3690698-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/netdev_queues.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index a8a7e48dfa6c..5ca019d294ca 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -62,6 +62,8 @@ struct netdev_queue_stats_tx { * statistics will not generally add up to the total number of events for * the device. The @get_base_stats callback allows filling in the delta * between events for currently live queues and overall device history. + * @get_base_stats can also be used to report any miscellaneous packets + * transferred outside of the main set of queues used by the networking stack. * When the statistics for the entire device are queried, first @get_base_stats * is issued to collect the delta, and then a series of per-queue callbacks. * Only statistics which are set in @get_base_stats will be reported -- cgit v1.2.3 From 964a504b1261b641797d93cdbf79b68ff7d85720 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 31 May 2024 15:47:00 +0200 Subject: power: supply: leds: Add power_supply_[un]register_led_trigger() Add power_supply_[un]register_led_trigger() helper functions. The primary goal of this is as a preparation patch for adding an activate callback to the power-supply LED triggers to ensure that power-supply LEDs get the correct initial value when the LED gets registered after the power_supply has been registered (this will use the psy back pointer). There also is quite a lot of code duplication in the existing LED trigger registration in the form of the kasprintf() for the name-template for each trigger + related error handling. This duplication is removed by these new helpers. Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240531134702.166145-2-hdegoede@redhat.com Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index c852cc882501..e218a8ab72ee 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -310,17 +310,11 @@ struct power_supply { #ifdef CONFIG_LEDS_TRIGGERS struct led_trigger *charging_full_trig; - char *charging_full_trig_name; struct led_trigger *charging_trig; - char *charging_trig_name; struct led_trigger *full_trig; - char *full_trig_name; struct led_trigger *online_trig; - char *online_trig_name; struct led_trigger *charging_blink_full_solid_trig; - char *charging_blink_full_solid_trig_name; struct led_trigger *charging_orange_full_green_trig; - char *charging_orange_full_green_trig_name; #endif }; -- cgit v1.2.3 From 6c951a84dab9c0e051aec54d7497f25e910a4953 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 31 May 2024 15:47:01 +0200 Subject: power: supply: leds: Share trig pointer for online and charging_full Either 5 different LED triggers are registered for battery power-supply devices or a single online LED trigger is used for non battery power-supply devices. These 5 / 1 LED trigger(s) are never used at the same time. So there is no need for a separate LED trigger pointer for the online trigger. Rename the first battery trigger from charging_full_trig to just trig and use this for the online trigger too. Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240531134702.166145-3-hdegoede@redhat.com Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index e218a8ab72ee..65082ef75692 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -309,10 +309,9 @@ struct power_supply { #endif #ifdef CONFIG_LEDS_TRIGGERS - struct led_trigger *charging_full_trig; + struct led_trigger *trig; struct led_trigger *charging_trig; struct led_trigger *full_trig; - struct led_trigger *online_trig; struct led_trigger *charging_blink_full_solid_trig; struct led_trigger *charging_orange_full_green_trig; #endif -- cgit v1.2.3 From a14a569a9918a0c7e340257a17dbc088bb27db72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 29 May 2024 08:27:11 +0200 Subject: platform/chrome: cros_ec_proto: Introduce cros_ec_cmd_readmem() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To read from the EC memory different mechanism are possible. ECs connected via LPC expose their memory via a ->cmd_readmem operation. Other protocols require the usage of EC_CMD_READ_MEMMAP, which on the other hand is not implemented by LPC ECs. Provide a helper that automatically selects the correct mechanism. Signed-off-by: Thomas Weißschuh Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20240529-cros_ec-hwmon-v4-1-5cdf0c5db50a@weissschuh.net Signed-off-by: Tzung-Bi Shih --- include/linux/platform_data/cros_ec_proto.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 8865e350c12a..1ddc52603f9a 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -261,6 +261,8 @@ int cros_ec_get_sensor_count(struct cros_ec_dev *ec); int cros_ec_cmd(struct cros_ec_device *ec_dev, unsigned int version, int command, const void *outdata, size_t outsize, void *indata, size_t insize); +int cros_ec_cmd_readmem(struct cros_ec_device *ec_dev, u8 offset, u8 size, void *dest); + /** * cros_ec_get_time_ns() - Return time in ns. * -- cgit v1.2.3 From 6b2e29977518ec13ef3022f234ff8f3014c243da Mon Sep 17 00:00:00 2001 From: Lakshmi Sowjanya D Date: Mon, 13 May 2024 16:08:02 +0530 Subject: timekeeping: Provide infrastructure for converting to/from a base clock Hardware time stamps like provided by PTP clock implementations are based on a clock which feeds both the PCIe device and the system clock. For further processing the underlying hardwarre clock timestamp must be converted to the system clock. Right now this requires drivers to invoke an architecture specific conversion function, e.g. to convert the ART (Always Running Timer) timestamp to a TSC timestamp. As the system clock is aware of the underlying base clock, this can be moved to the core code by providing a base clock property for the system clock which contains the conversion factors and assigning a clocksource ID to the base clock. Add the required data structures and the conversion infrastructure in the core code to prepare for converting X86 and the related PTP drivers over. [ tglx: Added a missing READ_ONCE(). Massaged change log ] Co-developed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner Co-developed-by: Christopher S. Hall Signed-off-by: Christopher S. Hall Signed-off-by: Lakshmi Sowjanya D Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240513103813.5666-2-lakshmi.sowjanya.d@intel.com --- include/linux/clocksource.h | 27 +++++++++++++++++++++++++++ include/linux/timekeeping.h | 2 ++ 2 files changed, 29 insertions(+) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 0ad8b550bb4b..d35b677b08fe 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -21,6 +21,7 @@ #include #include +struct clocksource_base; struct clocksource; struct module; @@ -50,6 +51,7 @@ struct module; * multiplication * @name: Pointer to clocksource name * @list: List head for registration (internal) + * @freq_khz: Clocksource frequency in khz. * @rating: Rating value for selection (higher is better) * To avoid rating inflation the following * list should give you a guide as to how @@ -70,6 +72,8 @@ struct module; * validate the clocksource from which the snapshot was * taken. * @flags: Flags describing special properties + * @base: Hardware abstraction for clock on which a clocksource + * is based * @enable: Optional function to enable the clocksource * @disable: Optional function to disable the clocksource * @suspend: Optional suspend function for the clocksource @@ -107,10 +111,12 @@ struct clocksource { u64 max_cycles; const char *name; struct list_head list; + u32 freq_khz; int rating; enum clocksource_ids id; enum vdso_clock_mode vdso_clock_mode; unsigned long flags; + struct clocksource_base *base; int (*enable)(struct clocksource *cs); void (*disable)(struct clocksource *cs); @@ -306,4 +312,25 @@ static inline unsigned int clocksource_get_max_watchdog_retry(void) void clocksource_verify_percpu(struct clocksource *cs); +/** + * struct clocksource_base - hardware abstraction for clock on which a clocksource + * is based + * @id: Defaults to CSID_GENERIC. The id value is used for conversion + * functions which require that the current clocksource is based + * on a clocksource_base with a particular ID in certain snapshot + * functions to allow callers to validate the clocksource from + * which the snapshot was taken. + * @freq_khz: Nominal frequency of the base clock in kHz + * @offset: Offset between the base clock and the clocksource + * @numerator: Numerator of the clock ratio between base clock and the clocksource + * @denominator: Denominator of the clock ratio between base clock and the clocksource + */ +struct clocksource_base { + enum clocksource_ids id; + u32 freq_khz; + u64 offset; + u32 numerator; + u32 denominator; +}; + #endif /* _LINUX_CLOCKSOURCE_H */ diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 0ea7823b7f31..b2ee182d891e 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -310,10 +310,12 @@ struct system_device_crosststamp { * timekeeping code to verify comparability of two cycle values. * The default ID, CSID_GENERIC, does not identify a specific * clocksource. + * @use_nsecs: @cycles is in nanoseconds. */ struct system_counterval_t { u64 cycles; enum clocksource_ids cs_id; + bool use_nsecs; }; /* -- cgit v1.2.3 From 3a52886c8f972c3a5b70bfec330c71817cd7fc63 Mon Sep 17 00:00:00 2001 From: Lakshmi Sowjanya D Date: Mon, 13 May 2024 16:08:03 +0530 Subject: x86/tsc: Provide ART base clock information for TSC The core code provides a new mechanism to allow conversion between ART and TSC. This allows to replace the x86 specific ART/TSC conversion functions. Prepare for removal by filling in the base clock conversion information for ART and associating the base clock to the TSC clocksource. The existing conversion functions will be removed once the usage sites are converted over to the new model. [ tglx: Massaged change log ] Co-developed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner Co-developed-by: Christopher S. Hall Signed-off-by: Christopher S. Hall Signed-off-by: Lakshmi Sowjanya D Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240513103813.5666-3-lakshmi.sowjanya.d@intel.com --- include/linux/clocksource_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/clocksource_ids.h b/include/linux/clocksource_ids.h index a4fa3436940c..2bb4d8c2f1b0 100644 --- a/include/linux/clocksource_ids.h +++ b/include/linux/clocksource_ids.h @@ -9,6 +9,7 @@ enum clocksource_ids { CSID_X86_TSC_EARLY, CSID_X86_TSC, CSID_X86_KVM_CLK, + CSID_X86_ART, CSID_MAX, }; -- cgit v1.2.3 From 02ecee07ca30f76f2a0f1381661a688b8e501ab0 Mon Sep 17 00:00:00 2001 From: Lakshmi Sowjanya D Date: Mon, 13 May 2024 16:08:10 +0530 Subject: timekeeping: Add function to convert realtime to base clock PPS (Pulse Per Second) generates a hardware pulse every second based on CLOCK_REALTIME. This works fine when the pulse is generated in software from a hrtimer callback function. For hardware which generates the pulse by programming a timer it is required to convert CLOCK_REALTIME to the underlying hardware clock. The X86 Timed IO device is based on the Always Running Timer (ART), which is the base clock of the TSC, which is usually the system clocksource on X86. The core code already has functionality to convert base clock timestamps to system clocksource timestamps, but there is no support for converting the other way around. Provide the required functionality to support such devices in a generic way to avoid code duplication in drivers: 1) ktime_real_to_base_clock() to convert a CLOCK_REALTIME timestamp to a base clock timestamp 2) timekeeping_clocksource_has_base() to allow drivers to validate that the system clocksource is based on a particular clocksource ID. [ tglx: Simplify timekeeping_clocksource_has_base() and add missing READ_ONCE() ] Co-developed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner Co-developed-by: Christopher S. Hall Signed-off-by: Christopher S. Hall Signed-off-by: Lakshmi Sowjanya D Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240513103813.5666-10-lakshmi.sowjanya.d@intel.com --- include/linux/timekeeping.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index b2ee182d891e..fc12a9ba2c88 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -318,6 +318,10 @@ struct system_counterval_t { bool use_nsecs; }; +extern bool ktime_real_to_base_clock(ktime_t treal, + enum clocksource_ids base_id, u64 *cycles); +extern bool timekeeping_clocksource_has_base(enum clocksource_ids id); + /* * Get cross timestamp between system clock and device clock */ -- cgit v1.2.3 From 56f45266df67aa0f5b2a6881c8c4d16dbfff6b7d Mon Sep 17 00:00:00 2001 From: Ken Sloat Date: Thu, 15 Dec 2022 16:07:15 +0000 Subject: pwm: xilinx: Fix u32 overflow issue in 32-bit width PWM mode. This timer HW supports 8, 16 and 32-bit timer widths. This driver currently uses a u32 to store the max possible value of the timer. However, statements perform addition of 2 in xilinx_pwm_apply() when calculating the period_cycles and duty_cycles values. Since priv->max is a u32, this will result in an overflow to 1 which will not only be incorrect but fail on range comparison. This results in making it impossible to set the PWM in this timer mode. There are two obvious solutions to the current problem: 1. Cast each instance where overflow occurs to u64. 2. Change priv->max from a u32 to a u64. Solution #1 requires more code modifications, and leaves opportunity to introduce similar overflows if other math statements are added in the future. These may also go undetected if running in non 32-bit timer modes. Solution #2 is the much smaller and cleaner approach and thus the chosen method in this patch. This was tested on a Zynq UltraScale+ with multiple instances of the PWM IP. Signed-off-by: Ken Sloat Reviewed-by: Michal Simek Reviewed-by: Sean Anderson Link: https://lore.kernel.org/r/SJ0P222MB0107490C5371B848EF04351CA1E19@SJ0P222MB0107.NAMP222.PROD.OUTLOOK.COM Signed-off-by: Michal Simek --- include/clocksource/timer-xilinx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/clocksource/timer-xilinx.h b/include/clocksource/timer-xilinx.h index c0f56fe6d22a..d116f18de899 100644 --- a/include/clocksource/timer-xilinx.h +++ b/include/clocksource/timer-xilinx.h @@ -41,7 +41,7 @@ struct regmap; struct xilinx_timer_priv { struct regmap *map; struct clk *clk; - u32 max; + u64 max; }; /** -- cgit v1.2.3 From fcf544ac64397776a18246eba5a8ca72a47c4405 Mon Sep 17 00:00:00 2001 From: Jay Buddhabhatti Date: Wed, 24 Apr 2024 05:49:00 -0700 Subject: soc: xilinx: Add cb event for subsystem restart Add support to register subsystem restart events from firmware for Versal and Versal NET platforms. This event is received when firmware requests for subsystem restart. After receiving this event, the kernel needs to be restarted. Signed-off-by: Jay Buddhabhatti Link: https://lore.kernel.org/r/20240424124900.29287-1-jay.buddhabhatti@amd.com Signed-off-by: Michal Simek --- include/linux/firmware/xlnx-event-manager.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/firmware/xlnx-event-manager.h b/include/linux/firmware/xlnx-event-manager.h index 82e8254b0f80..645dd34155e6 100644 --- a/include/linux/firmware/xlnx-event-manager.h +++ b/include/linux/firmware/xlnx-event-manager.h @@ -1,4 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Xilinx Event Management Driver + * + * Copyright (C) 2024, Advanced Micro Devices, Inc. + */ #ifndef _FIRMWARE_XLNX_EVENT_MANAGER_H_ #define _FIRMWARE_XLNX_EVENT_MANAGER_H_ @@ -7,6 +12,11 @@ #define CB_MAX_PAYLOAD_SIZE (4U) /*In payload maximum 32bytes */ +#define EVENT_SUBSYSTEM_RESTART (4U) + +#define PM_DEV_ACPU_0_0 (0x1810c0afU) +#define PM_DEV_ACPU_0 (0x1810c003U) + /************************** Exported Function *****************************/ typedef void (*event_cb_func_t)(const u32 *payload, void *data); -- cgit v1.2.3 From 494c55a1ec0ab40198cf43f5a41c7c5e0b70e7fc Mon Sep 17 00:00:00 2001 From: Ronak Jain Date: Thu, 25 Apr 2024 02:59:13 -0700 Subject: firmware: xilinx: Move FIRMWARE_VERSION_MASK to xlnx-zynqmp.h Move FIRMWARE_VERSION_MASK macro to xlnx-zynqmp.h so that other drivers can use it for verifying the supported firmware version. Signed-off-by: Ronak Jain Signed-off-by: Anand Ashok Dumbre Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/20240425095913.919390-1-ronak.jain@amd.com Signed-off-by: Michal Simek --- include/linux/firmware/xlnx-zynqmp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 1a069a56c961..d7d07afc0532 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -52,6 +52,9 @@ #define API_ID_MASK GENMASK(7, 0) #define MODULE_ID_MASK GENMASK(11, 8) +/* Firmware feature check version mask */ +#define FIRMWARE_VERSION_MASK 0xFFFFU + /* ATF only commands */ #define TF_A_PM_REGISTER_SGI 0xa04 #define PM_GET_TRUSTZONE_VERSION 0xa03 -- cgit v1.2.3 From a5b7365f28c191df6b93f60942d2b9a9fe71746c Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 3 Jun 2024 14:58:41 +0800 Subject: soundwire: bus: add stream refcount The notion of stream is by construction based on a multi-bus capability, to allow for aggregation of Peripheral devices or functions located on different segments. We currently count how many master_rt contexts are used by a stream, but we don't have the dual refcount of how many streams are allocated on a given bus. This refcount will be useful to check if BTP/BRA streams can be allocated. Note that the stream_refcount is modified in sdw_master_rt_alloc() and sdw_master_rt_free() which are both called with the bus_lock mutex held, so there's no need for refcount_ primitives for additional protection. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20240603065841.4860-2-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 13e96d8b7423..94fc1b57c57b 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -903,6 +903,7 @@ struct sdw_master_ops { * meaningful if multi_link is set. If set to 1, hardware-based * synchronization will be used even if a stream only uses a single * SoundWire segment. + * @stream_refcount: number of streams currently using this bus */ struct sdw_bus { struct device *dev; @@ -933,6 +934,7 @@ struct sdw_bus { u32 bank_switch_timeout; bool multi_link; int hw_sync_min_links; + int stream_refcount; }; int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, -- cgit v1.2.3 From 9b5fd115e7d5a98b82054cff5c96f6768ee06845 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 3 Jun 2024 15:02:40 +0800 Subject: soundwire: intel_ace2.x: add AC timing extensions for PantherLake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ACE3 IP used in PantherLake exposes new bitfields in the ACTMCTL register to better control clocks/delays. These bitfields were reserved/zero in the ACE2.x IP, to simplify the integration the new bifields are added unconditionally. The behavior will only be impacted when the firmware exposes DSD properties to set non-zero values. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Reviewed-by: Ranjani Sridharan Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20240603070240.5165-1-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- include/linux/soundwire/sdw_intel.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 8e78417156e3..d537587b4499 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -182,6 +182,11 @@ #define SDW_SHIM2_INTEL_VS_ACTMCTL_DODSE BIT(2) #define SDW_SHIM2_INTEL_VS_ACTMCTL_DOAIS GENMASK(4, 3) #define SDW_SHIM2_INTEL_VS_ACTMCTL_DOAISE BIT(5) +#define SDW_SHIM3_INTEL_VS_ACTMCTL_CLSS BIT(6) +#define SDW_SHIM3_INTEL_VS_ACTMCTL_CLDS GENMASK(11, 7) +#define SDW_SHIM3_INTEL_VS_ACTMCTL_DODSE2 GENMASK(13, 12) +#define SDW_SHIM3_INTEL_VS_ACTMCTL_DOAISE2 BIT(14) +#define SDW_SHIM3_INTEL_VS_ACTMCTL_CLDE BIT(15) /** * struct sdw_intel_stream_params_data: configuration passed during -- cgit v1.2.3 From 32fe91524e1651b9a0e11ddfbc63de9aa485cf48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 13 May 2024 11:25:18 +0200 Subject: sysctl: constify ctl_table arguments of utility function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In a future commit the proc_handlers themselves will change to "const struct ctl_table". As a preparation for that adapt the internal helper. Signed-off-by: Thomas Weißschuh Reviewed-by: Kees Cook Signed-off-by: Joel Granados --- include/linux/sysctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 09db2f2e6488..54fbec062772 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -237,7 +237,7 @@ extern struct ctl_table_header *register_sysctl_mount_point(const char *path); void do_sysctl_args(void); bool sysctl_is_alias(char *param); -int do_proc_douintvec(struct ctl_table *table, int write, +int do_proc_douintvec(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos, int (*conv)(unsigned long *lvalp, unsigned int *valp, -- cgit v1.2.3 From 45919c28134519080a85a5fb66d0f65955ef7572 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 28 May 2024 05:05:14 +0000 Subject: ASoC: simple-card-utils: remove both playback/capture_only check soc-pcm.c :: soc_get_playback_capture() will indicate error if both playback_only / capture_only were true. Thus, graph_util_parse_link_direction() which setup playback_only / capture_only don't need to check it. And, its return value is not used on existing driver. Let's remove it. Signed-off-by: Kuninori Morimoto Link: https://msgid.link/r/87a5kah6gm.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/simple_card_utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/simple_card_utils.h b/include/sound/simple_card_utils.h index 2c2279d082ec..0a6435ac5c5f 100644 --- a/include/sound/simple_card_utils.h +++ b/include/sound/simple_card_utils.h @@ -197,7 +197,7 @@ int graph_util_is_ports0(struct device_node *port); int graph_util_parse_dai(struct device *dev, struct device_node *ep, struct snd_soc_dai_link_component *dlc, int *is_single_link); -int graph_util_parse_link_direction(struct device_node *np, +void graph_util_parse_link_direction(struct device_node *np, bool *is_playback_only, bool *is_capture_only); #ifdef DEBUG -- cgit v1.2.3 From 3d9a0a25336433d48ddca1e1f4fea25fd4a3b1d8 Mon Sep 17 00:00:00 2001 From: Sreenath Vijayan Date: Thu, 30 May 2024 13:15:47 +0530 Subject: printk: Rename console_replay_all() and update context Rename console_replay_all() to console_try_replay_all() to make clear that the implementation is best effort. Also, the function should not be called in NMI context as it takes locks, so update the comment in code. Fixes: 693f75b91a91 ("printk: Add function to replay kernel log on consoles") Fixes: 1b743485e27f ("tty/sysrq: Replay kernel log messages on consoles via sysrq") Suggested-by: Petr Mladek Signed-off-by: Shimoyashiki Taichi Signed-off-by: Sreenath Vijayan Link: https://lore.kernel.org/r/Zlguq/wU21Z8MqI4@sreenath.vijayan@sony.com Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek --- include/linux/printk.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/printk.h b/include/linux/printk.h index 40afab23881a..ca4c9271daf6 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -195,7 +195,7 @@ void show_regs_print_info(const char *log_lvl); extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold; extern asmlinkage void dump_stack(void) __cold; void printk_trigger_flush(void); -void console_replay_all(void); +void console_try_replay_all(void); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -275,7 +275,7 @@ static inline void dump_stack(void) static inline void printk_trigger_flush(void) { } -static inline void console_replay_all(void) +static inline void console_try_replay_all(void) { } #endif -- cgit v1.2.3 From 96a02b9fa95108269cd0cd012f091f86bd6f41a4 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 15 May 2024 17:08:04 +0200 Subject: KVM: Unexport kvm_debugfs_dir After faf01aef0570 ("KVM: PPC: Merge powerpc's debugfs entry content into generic entry") kvm_debugfs_dir is not used anywhere else outside of kvm_main.c Unexport it and make it static. Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240515150804.9354-1-bp@kernel.org Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 692c01e41a18..c80fe03a1fa4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1955,8 +1955,6 @@ struct _kvm_stats_desc { HALT_POLL_HIST_COUNT), \ STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking) -extern struct dentry *kvm_debugfs_dir; - ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header, const struct _kvm_stats_desc *desc, void *stats, size_t size_stats, -- cgit v1.2.3 From 61df7b82820494368bd46071ca97e43a3dfc3b11 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 2 May 2024 17:57:51 -0400 Subject: lsm: fixup the inode xattr capability handling The current security_inode_setxattr() and security_inode_removexattr() hooks rely on individual LSMs to either call into the associated capability hooks (cap_inode_setxattr() or cap_inode_removexattr()), or return a magic value of 1 to indicate that the LSM layer itself should perform the capability checks. Unfortunately, with the default return value for these LSM hooks being 0, an individual LSM hook returning a 1 will cause the LSM hook processing to exit early, potentially skipping a LSM. Thankfully, with the exception of the BPF LSM, none of the LSMs which currently register inode xattr hooks should end up returning a value of 1, and in the BPF LSM case, with the BPF LSM hooks executing last there should be no real harm in stopping processing of the LSM hooks. However, the reliance on the individual LSMs to either call the capability hooks themselves, or signal the LSM with a return value of 1, is fragile and relies on a specific set of LSMs being enabled. This patch is an effort to resolve, or minimize, these issues. Before we discuss the solution, there are a few observations and considerations that we need to take into account: * BPF LSM registers an implementation for every LSM hook, and that implementation simply returns the hook's default return value, a 0 in this case. We want to ensure that the default BPF LSM behavior results in the capability checks being called. * SELinux and Smack do not expect the traditional capability checks to be applied to the xattrs that they "own". * SELinux and Smack are currently written in such a way that the xattr capability checks happen before any additional LSM specific access control checks. SELinux does apply SELinux specific access controls to all xattrs, even those not "owned" by SELinux. * IMA and EVM also register xattr hooks but assume that the LSM layer and specific LSMs have already authorized the basic xattr operation. In order to ensure we perform the capability based access controls before the individual LSM access controls, perform only one capability access control check for each operation, and clarify the logic around applying the capability controls, we need a mechanism to determine if any of the enabled LSMs "own" a particular xattr and want to take responsibility for controlling access to that xattr. The solution in this patch is to create a new LSM hook, 'inode_xattr_skipcap', that is not exported to the rest of the kernel via a security_XXX() function, but is used by the LSM layer to determine if a LSM wants to control access to a given xattr and avoid the traditional capability controls. Registering an inode_xattr_skipcap hook is optional, if a LSM declines to register an implementation, or uses an implementation that simply returns the default value (0), there is no effect as the LSM continues to enforce the capability based controls (unless another LSM takes ownership of the xattr). If none of the LSMs signal that the capability checks should be skipped, the capability check is performed and if access is granted the individual LSM xattr access control hooks are executed, keeping with the DAC-before-LSM convention. Cc: stable@vger.kernel.org Acked-by: Casey Schaufler Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index f804b76cde44..8fd87f823d3a 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -144,6 +144,7 @@ LSM_HOOK(int, 0, inode_setattr, struct mnt_idmap *idmap, struct dentry *dentry, LSM_HOOK(void, LSM_RET_VOID, inode_post_setattr, struct mnt_idmap *idmap, struct dentry *dentry, int ia_valid) LSM_HOOK(int, 0, inode_getattr, const struct path *path) +LSM_HOOK(int, 0, inode_xattr_skipcap, const char *name) LSM_HOOK(int, 0, inode_setxattr, struct mnt_idmap *idmap, struct dentry *dentry, const char *name, const void *value, size_t size, int flags) -- cgit v1.2.3 From 32d99593bdc91442fe6183d97b060210cc9c8193 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 16 Mar 2024 00:10:15 -0700 Subject: rcu: Add lockdep_assert_in_rcu_read_lock() and friends There is no direct RCU counterpart to lockdep_assert_irqs_disabled() and friends. Although it is possible to construct them, it would be more convenient to have the following lockdep assertions: lockdep_assert_in_rcu_read_lock() lockdep_assert_in_rcu_read_lock_bh() lockdep_assert_in_rcu_read_lock_sched() lockdep_assert_in_rcu_reader() This commit therefore creates them. Reported-by: Jens Axboe Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index dfd2399f2cde..8470a85f6563 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -421,11 +421,71 @@ static inline void rcu_preempt_sleep_check(void) { } "Illegal context switch in RCU-sched read-side critical section"); \ } while (0) +// See RCU_LOCKDEP_WARN() for an explanation of the double call to +// debug_lockdep_rcu_enabled(). +static inline bool lockdep_assert_rcu_helper(bool c) +{ + return debug_lockdep_rcu_enabled() && + (c || !rcu_is_watching() || !rcu_lockdep_current_cpu_online()) && + debug_lockdep_rcu_enabled(); +} + +/** + * lockdep_assert_in_rcu_read_lock - WARN if not protected by rcu_read_lock() + * + * Splats if lockdep is enabled and there is no rcu_read_lock() in effect. + */ +#define lockdep_assert_in_rcu_read_lock() \ + WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_lock_map))) + +/** + * lockdep_assert_in_rcu_read_lock_bh - WARN if not protected by rcu_read_lock_bh() + * + * Splats if lockdep is enabled and there is no rcu_read_lock_bh() in effect. + * Note that local_bh_disable() and friends do not suffice here, instead an + * actual rcu_read_lock_bh() is required. + */ +#define lockdep_assert_in_rcu_read_lock_bh() \ + WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_bh_lock_map))) + +/** + * lockdep_assert_in_rcu_read_lock_sched - WARN if not protected by rcu_read_lock_sched() + * + * Splats if lockdep is enabled and there is no rcu_read_lock_sched() + * in effect. Note that preempt_disable() and friends do not suffice here, + * instead an actual rcu_read_lock_sched() is required. + */ +#define lockdep_assert_in_rcu_read_lock_sched() \ + WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_sched_lock_map))) + +/** + * lockdep_assert_in_rcu_reader - WARN if not within some type of RCU reader + * + * Splats if lockdep is enabled and there is no RCU reader of any + * type in effect. Note that regions of code protected by things like + * preempt_disable, local_bh_disable(), and local_irq_disable() all qualify + * as RCU readers. + * + * Note that this will never trigger in PREEMPT_NONE or PREEMPT_VOLUNTARY + * kernels that are not also built with PREEMPT_COUNT. But if you have + * lockdep enabled, you might as well also enable PREEMPT_COUNT. + */ +#define lockdep_assert_in_rcu_reader() \ + WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_lock_map) && \ + !lock_is_held(&rcu_bh_lock_map) && \ + !lock_is_held(&rcu_sched_lock_map) && \ + preemptible())) + #else /* #ifdef CONFIG_PROVE_RCU */ #define RCU_LOCKDEP_WARN(c, s) do { } while (0 && (c)) #define rcu_sleep_check() do { } while (0) +#define lockdep_assert_in_rcu_read_lock() do { } while (0) +#define lockdep_assert_in_rcu_read_lock_bh() do { } while (0) +#define lockdep_assert_in_rcu_read_lock_sched() do { } while (0) +#define lockdep_assert_in_rcu_reader() do { } while (0) + #endif /* #else #ifdef CONFIG_PROVE_RCU */ /* -- cgit v1.2.3 From ce418966a83320837f59f04a646fa4716e132dc8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 25 Apr 2024 16:18:32 +0200 Subject: rcu/nocb: Fix segcblist state machine comments about bypass The parts explaining the bypass lifecycle in (de-)offloading are out of date and/or wrong. Bypass is simply enabled whenever SEGCBLIST_RCU_CORE flag is off. Fix the comments accordingly. Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/rcu_segcblist.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index 659d13a7ddaa..1e5b4ef167ca 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -141,7 +141,7 @@ struct rcu_cblist { * | SEGCBLIST_KTHREAD_GP | * | | * | CB/GP kthreads handle callbacks holding nocb_lock, local rcu_core() | - * | handles callbacks concurrently. Bypass enqueue is enabled. | + * | handles callbacks concurrently. Bypass enqueue is disabled. | * | Invoke RCU core so we make sure not to preempt it in the middle with | * | leaving some urgent work unattended within a jiffy. | * ---------------------------------------------------------------------------- @@ -154,8 +154,7 @@ struct rcu_cblist { * | SEGCBLIST_KTHREAD_GP | * | | * | CB/GP kthreads and local rcu_core() handle callbacks concurrently | - * | holding nocb_lock. Wake up CB and GP kthreads if necessary. Disable | - * | bypass enqueue. | + * | holding nocb_lock. Wake up CB and GP kthreads if necessary. | * ---------------------------------------------------------------------------- * | * v @@ -185,7 +184,7 @@ struct rcu_cblist { * | | * | Callbacks processed by rcu_core() from softirqs or local | * | rcuc kthread, while holding nocb_lock. Forbid nocb_timer to be armed. | - * | Flush pending nocb_timer. Flush nocb bypass callbacks. | + * | Flush pending nocb_timer. | * ---------------------------------------------------------------------------- * | * v -- cgit v1.2.3 From aa97b9a56906f5965a7c5752790d174cadc8b820 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 25 Apr 2024 16:18:33 +0200 Subject: rcu/nocb: Fix segcblist state machine stale comments about timers The (de-)offloading process used to take care about the NOCB timer when it depended on the per-rdp NOCB locking. However this isn't the case anymore for a long while. It can now safely be armed and run during the (de-)offloading process, which doesn't care about it anymore. Update the comments accordingly. Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/rcu_segcblist.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index 1e5b4ef167ca..8018045989af 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -80,8 +80,7 @@ struct rcu_cblist { * | SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED | * | | * | Callbacks processed by rcu_core() from softirqs or local | - * | rcuc kthread, while holding nocb_lock. Waking up CB and GP kthreads, | - * | allowing nocb_timer to be armed. | + * | rcuc kthread, while holding nocb_lock. Waking up CB and GP kthreads. | * ---------------------------------------------------------------------------- * | * v @@ -183,8 +182,7 @@ struct rcu_cblist { * | SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | * | | * | Callbacks processed by rcu_core() from softirqs or local | - * | rcuc kthread, while holding nocb_lock. Forbid nocb_timer to be armed. | - * | Flush pending nocb_timer. | + * | rcuc kthread, while holding nocb_lock. | * ---------------------------------------------------------------------------- * | * v -- cgit v1.2.3 From 483d5bf23125a9127ffcb3f7a3b3539b34df67d4 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 25 Apr 2024 16:18:34 +0200 Subject: rcu/nocb: Use kthread parking instead of ad-hoc implementation Upon NOCB deoffloading, the rcuo kthread must be forced to sleep until the corresponding rdp is ever offloaded again. The deoffloader clears the SEGCBLIST_OFFLOADED flag, wakes up the rcuo kthread which then notices that change and clears in turn its SEGCBLIST_KTHREAD_CB flag before going to sleep, until it ever sees the SEGCBLIST_OFFLOADED flag again, should a re-offloading happen. Upon NOCB offloading, the rcuo kthread must be forced to wake up and handle callbacks until the corresponding rdp is ever deoffloaded again. The offloader sets the SEGCBLIST_OFFLOADED flag, wakes up the rcuo kthread which then notices that change and sets in turn its SEGCBLIST_KTHREAD_CB flag before going to check callbacks, until it ever sees the SEGCBLIST_OFFLOADED flag cleared again, should a de-offloading happen again. This is all a crude ad-hoc and error-prone kthread (un-)parking re-implementation. Consolidate the behaviour with the appropriate API instead. [ paulmck: Apply Qiang Zhang feedback provided in Link: below. ] Link: https://lore.kernel.org/all/20240509074046.15629-1-qiang.zhang1211@gmail.com/ Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/rcu_segcblist.h | 81 +++++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index 8018045989af..ba95c06675e1 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -84,31 +84,31 @@ struct rcu_cblist { * ---------------------------------------------------------------------------- * | * v - * ----------------------------------- - * | | - * v v - * --------------------------------------- ----------------------------------| - * | SEGCBLIST_RCU_CORE | | | SEGCBLIST_RCU_CORE | | - * | SEGCBLIST_LOCKING | | | SEGCBLIST_LOCKING | | - * | SEGCBLIST_OFFLOADED | | | SEGCBLIST_OFFLOADED | | - * | SEGCBLIST_KTHREAD_CB | | SEGCBLIST_KTHREAD_GP | - * | | | | - * | | | | - * | CB kthread woke up and | | GP kthread woke up and | - * | acknowledged SEGCBLIST_OFFLOADED. | | acknowledged SEGCBLIST_OFFLOADED| - * | Processes callbacks concurrently | | | - * | with rcu_core(), holding | | | - * | nocb_lock. | | | - * --------------------------------------- ----------------------------------- - * | | - * ----------------------------------- + * ---------------------------------------------------------------------------- + * | SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED | + * | + unparked CB kthread | + * | | + * | CB kthread got unparked and processes callbacks concurrently with | + * | rcu_core(), holding nocb_lock. | + * --------------------------------------------------------------------------- + * | + * v + * ---------------------------------------------------------------------------| + * | SEGCBLIST_RCU_CORE | | + * | SEGCBLIST_LOCKING | | + * | SEGCBLIST_OFFLOADED | | + * | SEGCBLIST_KTHREAD_GP | + * | + unparked CB kthread | + * | | + * | GP kthread woke up and acknowledged nocb_lock. | + * ---------------------------------------- ----------------------------------- * | * v * |--------------------------------------------------------------------------| - * | SEGCBLIST_LOCKING | | - * | SEGCBLIST_OFFLOADED | | + * | SEGCBLIST_LOCKING | | + * | SEGCBLIST_OFFLOADED | | * | SEGCBLIST_KTHREAD_GP | | - * | SEGCBLIST_KTHREAD_CB | + * | + unparked CB kthread | * | | * | Kthreads handle callbacks holding nocb_lock, local rcu_core() stops | * | handling callbacks. Enable bypass queueing. | @@ -124,8 +124,8 @@ struct rcu_cblist { * |--------------------------------------------------------------------------| * | SEGCBLIST_LOCKING | | * | SEGCBLIST_OFFLOADED | | - * | SEGCBLIST_KTHREAD_CB | | * | SEGCBLIST_KTHREAD_GP | + * | + unparked CB kthread | * | | * | CB/GP kthreads handle callbacks holding nocb_lock, local rcu_core() | * | ignores callbacks. Bypass enqueue is enabled. | @@ -136,8 +136,8 @@ struct rcu_cblist { * | SEGCBLIST_RCU_CORE | | * | SEGCBLIST_LOCKING | | * | SEGCBLIST_OFFLOADED | | - * | SEGCBLIST_KTHREAD_CB | | * | SEGCBLIST_KTHREAD_GP | + * | + unparked CB kthread | * | | * | CB/GP kthreads handle callbacks holding nocb_lock, local rcu_core() | * | handles callbacks concurrently. Bypass enqueue is disabled. | @@ -149,40 +149,31 @@ struct rcu_cblist { * |--------------------------------------------------------------------------| * | SEGCBLIST_RCU_CORE | | * | SEGCBLIST_LOCKING | | - * | SEGCBLIST_KTHREAD_CB | | * | SEGCBLIST_KTHREAD_GP | + * | + unparked CB kthread | * | | * | CB/GP kthreads and local rcu_core() handle callbacks concurrently | - * | holding nocb_lock. Wake up CB and GP kthreads if necessary. | + * | holding nocb_lock. Wake up GP kthread if necessary. | * ---------------------------------------------------------------------------- * | * v - * ----------------------------------- - * | | - * v v - * ---------------------------------------------------------------------------| - * | | | - * | SEGCBLIST_RCU_CORE | | SEGCBLIST_RCU_CORE | | - * | SEGCBLIST_LOCKING | | SEGCBLIST_LOCKING | | - * | SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP | - * | | | - * | GP kthread woke up and | CB kthread woke up and | - * | acknowledged the fact that | acknowledged the fact that | - * | SEGCBLIST_OFFLOADED got cleared. | SEGCBLIST_OFFLOADED got cleared. | - * | | The CB kthread goes to sleep | - * | The callbacks from the target CPU | until it ever gets re-offloaded. | - * | will be ignored from the GP kthread | | - * | loop. | | + * |--------------------------------------------------------------------------| + * | SEGCBLIST_RCU_CORE | | + * | SEGCBLIST_LOCKING | | + * | + unparked CB kthread | + * | | + * | GP kthread woke up and acknowledged the fact that SEGCBLIST_OFFLOADED | + * | got cleared. The callbacks from the target CPU will be ignored from the| + * | GP kthread loop. | * ---------------------------------------------------------------------------- - * | | - * ----------------------------------- * | * v * ---------------------------------------------------------------------------- * | SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | + * | + parked CB kthread | * | | - * | Callbacks processed by rcu_core() from softirqs or local | - * | rcuc kthread, while holding nocb_lock. | + * | CB kthread is parked. Callbacks processed by rcu_core() from softirqs or | + * | local rcuc kthread, while holding nocb_lock. | * ---------------------------------------------------------------------------- * | * v -- cgit v1.2.3 From 9855c37edf0009cc276cecfee09f7e76e2380212 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 25 Apr 2024 16:24:04 +0200 Subject: Revert "rcu-tasks: Fix synchronize_rcu_tasks() VS zap_pid_ns_processes()" This reverts commit 28319d6dc5e2ffefa452c2377dd0f71621b5bff0. The race it fixed was subject to conditions that don't exist anymore since: 1612160b9127 ("rcu-tasks: Eliminate deadlocks involving do_exit() and RCU tasks") This latter commit removes the use of SRCU that used to cover the RCU-tasks blind spot on exit between the tasklist's removal and the final preemption disabling. The task is now placed instead into a temporary list inside which voluntary sleeps are accounted as RCU-tasks quiescent states. This would disarm the deadlock initially reported against PID namespace exit. Signed-off-by: Frederic Weisbecker Reviewed-by: Oleg Nesterov Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index dfd2399f2cde..61cb3de236af 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -209,7 +209,6 @@ void synchronize_rcu_tasks_rude(void); #define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false) void exit_tasks_rcu_start(void); -void exit_tasks_rcu_stop(void); void exit_tasks_rcu_finish(void); #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */ #define rcu_tasks_classic_qs(t, preempt) do { } while (0) @@ -218,7 +217,6 @@ void exit_tasks_rcu_finish(void); #define call_rcu_tasks call_rcu #define synchronize_rcu_tasks synchronize_rcu static inline void exit_tasks_rcu_start(void) { } -static inline void exit_tasks_rcu_stop(void) { } static inline void exit_tasks_rcu_finish(void) { } #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */ -- cgit v1.2.3 From 4ce6e8a859f0503d97aac6869bc3b1a24b15601d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 28 May 2024 13:33:29 -0700 Subject: hwmon: Add PEC attribute support to hardware monitoring core Several hardware monitoring chips optionally support Packet Error Checking (PEC). For some chips, PEC support can be enabled simply by setting I2C_CLIENT_PEC in the i2c client data structure. Others require chip specific code to enable or disable PEC support. Introduce hwmon_chip_pec and HWMON_C_PEC to simplify adding configurable PEC support for hardware monitoring drivers. A driver can set HWMON_C_PEC in its chip information data to indicate PEC support. If a chip requires chip specific code to enable or disable PEC support, the driver only needs to implement support for the hwmon_chip_pec attribute to its write function. Packet Error Checking is only supported for SMBus devices. HWMON_C_PEC must therefore only be set by a driver if the parent device is an I2C device. Attempts to set HWMON_C_PEC on any other device type is not supported and rejected. The code calls i2c_check_functionality() to check if PEC is supported by the I2C/SMBus controller. This function is only available if CONFIG_I2C is enabled and reachable. For this reason, the added code needs to depend on reachability of CONFIG_I2C. Cc: Radu Sabau Acked-by: Nuno Sa Signed-off-by: Guenter Roeck --- include/linux/hwmon.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index edf96f249eb5..e94314760aab 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -45,6 +45,7 @@ enum hwmon_chip_attributes { hwmon_chip_power_samples, hwmon_chip_temp_samples, hwmon_chip_beep_enable, + hwmon_chip_pec, }; #define HWMON_C_TEMP_RESET_HISTORY BIT(hwmon_chip_temp_reset_history) @@ -60,6 +61,7 @@ enum hwmon_chip_attributes { #define HWMON_C_POWER_SAMPLES BIT(hwmon_chip_power_samples) #define HWMON_C_TEMP_SAMPLES BIT(hwmon_chip_temp_samples) #define HWMON_C_BEEP_ENABLE BIT(hwmon_chip_beep_enable) +#define HWMON_C_PEC BIT(hwmon_chip_pec) enum hwmon_temp_attributes { hwmon_temp_enable, -- cgit v1.2.3 From d0edc54455398248154062664f7d1b35e6ec6e01 Mon Sep 17 00:00:00 2001 From: Bingbu Cao Date: Fri, 10 May 2024 14:27:47 +0800 Subject: media: ipu-bridge: add mod_devicetable.h header inclusion ACPI_ID_LEN is defined in mod_devicetable.h, so the header should be guaranteed to included in ipu-bridge.h instead of the source files which include ipu-bridge.h. Signed-off-by: Bingbu Cao Reviewed-by: Andy Shevchenko Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/media/ipu-bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/media/ipu-bridge.h b/include/media/ipu-bridge.h index 783bda6d5cc3..16fac765456e 100644 --- a/include/media/ipu-bridge.h +++ b/include/media/ipu-bridge.h @@ -3,6 +3,7 @@ #ifndef __IPU_BRIDGE_H #define __IPU_BRIDGE_H +#include #include #include #include -- cgit v1.2.3 From 1d7804281df3f09f0a109d00406e859a00bae7ae Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 24 Apr 2024 18:39:07 +0300 Subject: media: subdev: Fix use of sd->enabled_streams in call_s_stream() call_s_stream() uses sd->enabled_streams to track whether streaming has already been enabled. However, v4l2_subdev_enable/disable_streams_fallback(), which was the original user of this field, already uses it, and v4l2_subdev_enable/disable_streams_fallback() will call call_s_stream(). This leads to a conflict as both functions set the field. Afaics, both functions set the field to the same value, so it won't cause a runtime bug, but it's still wrong and if we, e.g., change how v4l2_subdev_enable/disable_streams_fallback() operates we might easily cause bugs. Fix this by adding a new field, 's_stream_enabled', for call_s_stream(). Reviewed-by: Umang Jain Reviewed-by: Laurent Pinchart Tested-by: Umang Jain Signed-off-by: Tomi Valkeinen Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/media/v4l2-subdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index e30c463d90e5..d3f15882927b 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -1045,6 +1045,8 @@ struct v4l2_subdev_platform_data { * v4l2_subdev_enable_streams() and * v4l2_subdev_disable_streams() helper functions for fallback * cases. + * @s_stream_enabled: Tracks whether streaming has been enabled with s_stream. + * This is only for call_s_stream() internal use. * * Each instance of a subdev driver should create this struct, either * stand-alone or embedded in a larger struct. @@ -1093,6 +1095,7 @@ struct v4l2_subdev { */ struct v4l2_subdev_state *active_state; u64 enabled_streams; + bool s_stream_enabled; }; -- cgit v1.2.3 From 61d6c8c896c1ccde350c281817847a32b0c6b83b Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 24 Apr 2024 18:39:08 +0300 Subject: media: subdev: Improve v4l2_subdev_enable/disable_streams_fallback v4l2_subdev_enable/disable_streams_fallback() supports falling back to .s_stream() for subdevs with a single source pad. It also tracks the enabled streams for that one pad in the sd->enabled_streams field. Tracking the enabled streams with sd->enabled_streams does not make sense, as with .s_stream() there can only be a single stream per pad. Thus, as the v4l2_subdev_enable/disable_streams_fallback() only supports a single source pad, all we really need is a boolean which tells whether streaming has been enabled on this pad or not. However, as we only need a true/false state for a pad (instead of tracking which streams have been enabled for a pad), we can easily extend the fallback mechanism to support multiple source pads as we only need to keep track of which pads have been enabled. Change the sd->enabled_streams field to sd->enabled_pads, which is a 64-bit bitmask tracking the enabled source pads. With this change we can remove the restriction that v4l2_subdev_enable/disable_streams_fallback() only supports a single source pad. Reviewed-by: Laurent Pinchart Tested-by: Umang Jain Signed-off-by: Tomi Valkeinen Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/media/v4l2-subdev.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index d3f15882927b..f191cf00c840 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -1041,10 +1041,9 @@ struct v4l2_subdev_platform_data { * @active_state: Active state for the subdev (NULL for subdevs tracking the * state internally). Initialized by calling * v4l2_subdev_init_finalize(). - * @enabled_streams: Bitmask of enabled streams used by - * v4l2_subdev_enable_streams() and - * v4l2_subdev_disable_streams() helper functions for fallback - * cases. + * @enabled_pads: Bitmask of enabled pads used by v4l2_subdev_enable_streams() + * and v4l2_subdev_disable_streams() helper functions for + * fallback cases. * @s_stream_enabled: Tracks whether streaming has been enabled with s_stream. * This is only for call_s_stream() internal use. * @@ -1094,7 +1093,7 @@ struct v4l2_subdev { * doesn't support it. */ struct v4l2_subdev_state *active_state; - u64 enabled_streams; + u64 enabled_pads; bool s_stream_enabled; }; -- cgit v1.2.3 From 5f3ce14fae742d1d23061c3122d93edb879ebf53 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 24 Apr 2024 18:39:09 +0300 Subject: media: subdev: Add v4l2_subdev_is_streaming() Add a helper function which returns whether the subdevice is streaming, i.e. if .s_stream or .enable_streams has been called successfully. Reviewed-by: Umang Jain Reviewed-by: Laurent Pinchart Tested-by: Umang Jain Signed-off-by: Tomi Valkeinen Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/media/v4l2-subdev.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index f191cf00c840..23c970d09870 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -1956,4 +1956,17 @@ extern const struct v4l2_subdev_ops v4l2_subdev_call_wrappers; void v4l2_subdev_notify_event(struct v4l2_subdev *sd, const struct v4l2_event *ev); +/** + * v4l2_subdev_is_streaming() - Returns if the subdevice is streaming + * @sd: The subdevice + * + * v4l2_subdev_is_streaming() tells if the subdevice is currently streaming. + * "Streaming" here means whether .s_stream() or .enable_streams() has been + * successfully called, and the streaming has not yet been disabled. + * + * If the subdevice implements .enable_streams() this function must be called + * while holding the active state lock. + */ +bool v4l2_subdev_is_streaming(struct v4l2_subdev *sd); + #endif /* _V4L2_SUBDEV_H */ -- cgit v1.2.3 From f8e9662e4da6d4e394a3d9cf0d335863650d712d Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 24 Apr 2024 18:39:14 +0300 Subject: media: subdev: Improve s_stream documentation Now that enable/disable_streams operations are available for single-stream subdevices too, there's no reason to use the old s_stream operation on new drivers. Extend the documentation reflecting this. Signed-off-by: Tomi Valkeinen Reviewed-by: Umang Jain Reviewed-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/media/v4l2-subdev.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index 23c970d09870..df66365576dd 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -450,6 +450,15 @@ enum v4l2_subdev_pre_streamon_flags { * already started or stopped subdev. Also see call_s_stream wrapper in * v4l2-subdev.c. * + * New drivers should instead implement &v4l2_subdev_pad_ops.enable_streams + * and &v4l2_subdev_pad_ops.disable_streams operations, and use + * v4l2_subdev_s_stream_helper for the &v4l2_subdev_video_ops.s_stream + * operation to support legacy users. + * + * Drivers should also not call the .s_stream() subdev operation directly, + * but use the v4l2_subdev_enable_streams() and + * v4l2_subdev_disable_streams() helpers. + * * @g_pixelaspect: callback to return the pixelaspect ratio. * * @s_rx_buffer: set a host allocated memory buffer for the subdev. The subdev -- cgit v1.2.3 From e73412fdeb541e777b3fd50b665781c1856422b5 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 9 May 2024 00:40:43 +0300 Subject: media: v4l2-subdev: Fix v4l2_subdev_state_get_format() documentation The documentation of the v4l2_subdev_state_get_format() macro incorrectly references __v4l2_subdev_state_get_format() instead of __v4l2_subdev_state_gen_call(). Fix it, and also update the list of similar macros to add the missing v4l2_subdev_state_get_interval(). Suggested-by: Sakari Ailus Signed-off-by: Laurent Pinchart Reviewed-by: Sakari Ailus Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/media/v4l2-subdev.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index df66365576dd..ed339f0116bf 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -1351,12 +1351,12 @@ void v4l2_subdev_cleanup(struct v4l2_subdev *sd); */ /* * Wrap v4l2_subdev_state_get_format(), allowing the function to be called with - * two or three arguments. The purpose of the __v4l2_subdev_state_get_format() + * two or three arguments. The purpose of the __v4l2_subdev_state_gen_call() * macro below is to come up with the name of the function or macro to call, * using the last two arguments (_stream and _pad). The selected function or * macro is then called using the arguments specified by the caller. A similar - * arrangement is used for v4l2_subdev_state_crop() and - * v4l2_subdev_state_compose() below. + * arrangement is used for v4l2_subdev_state_crop(), v4l2_subdev_state_compose() + * and v4l2_subdev_state_get_interval() below. */ #define v4l2_subdev_state_get_format(state, pad, ...) \ __v4l2_subdev_state_gen_call(format, ##__VA_ARGS__, , _pad) \ -- cgit v1.2.3 From 85af84852f115d3c9d4b45b0500315e630baa82c Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 9 May 2024 00:40:43 +0300 Subject: media: v4l2-subdev: Provide const-aware subdev state accessors It would be useful to mark instances of v4l2_subdev_state structures as const when code needs to access them read-only. This isn't currently possible, as the v4l2_subdev_state_get_*() accessor functions take a non-const pointer to the state. Use _Generic() to provide two different versions of the accessors, for const and non-const states respectively. The former returns a const pointer to the requested format, rectangle or interval, implementing const-correctness. The latter returns a non-const pointer, preserving the current behaviour for drivers. Signed-off-by: Laurent Pinchart Reviewed-by: Sakari Ailus Reviewed-by: Tomi Valkeinen [Sakari Ailus: Drop the word "below" from the text.] Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/media/v4l2-subdev.h | 54 ++++++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index ed339f0116bf..1b74e037e440 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -1337,6 +1337,16 @@ void v4l2_subdev_cleanup(struct v4l2_subdev *sd); #define __v4l2_subdev_state_gen_call(NAME, _1, ARG, ...) \ __v4l2_subdev_state_get_ ## NAME ## ARG +/* + * A macro to constify the return value of the state accessors when the state + * parameter is const. + */ +#define __v4l2_subdev_state_constify_ret(state, value) \ + _Generic(state, \ + const struct v4l2_subdev_state *: (const typeof(*(value)) *)(value), \ + struct v4l2_subdev_state *: (value) \ + ) + /** * v4l2_subdev_state_get_format() - Get pointer to a stream format * @state: subdevice state @@ -1352,15 +1362,20 @@ void v4l2_subdev_cleanup(struct v4l2_subdev *sd); /* * Wrap v4l2_subdev_state_get_format(), allowing the function to be called with * two or three arguments. The purpose of the __v4l2_subdev_state_gen_call() - * macro below is to come up with the name of the function or macro to call, - * using the last two arguments (_stream and _pad). The selected function or - * macro is then called using the arguments specified by the caller. A similar - * arrangement is used for v4l2_subdev_state_crop(), v4l2_subdev_state_compose() - * and v4l2_subdev_state_get_interval() below. - */ -#define v4l2_subdev_state_get_format(state, pad, ...) \ - __v4l2_subdev_state_gen_call(format, ##__VA_ARGS__, , _pad) \ - (state, pad, ##__VA_ARGS__) + * macro is to come up with the name of the function or macro to call, using + * the last two arguments (_stream and _pad). The selected function or macro is + * then called using the arguments specified by the caller. The + * __v4l2_subdev_state_constify_ret() macro constifies the returned pointer + * when the state is const, allowing the state accessors to guarantee + * const-correctness in all cases. + * + * A similar arrangement is used for v4l2_subdev_state_crop(), + * v4l2_subdev_state_compose() and v4l2_subdev_state_get_interval() below. + */ +#define v4l2_subdev_state_get_format(state, pad, ...) \ + __v4l2_subdev_state_constify_ret(state, \ + __v4l2_subdev_state_gen_call(format, ##__VA_ARGS__, , _pad) \ + ((struct v4l2_subdev_state *)state, pad, ##__VA_ARGS__)) #define __v4l2_subdev_state_get_format_pad(state, pad) \ __v4l2_subdev_state_get_format(state, pad, 0) struct v4l2_mbus_framefmt * @@ -1379,9 +1394,10 @@ __v4l2_subdev_state_get_format(struct v4l2_subdev_state *state, * For stream-unaware drivers the crop rectangle for the corresponding pad is * returned. If the pad does not exist, NULL is returned. */ -#define v4l2_subdev_state_get_crop(state, pad, ...) \ - __v4l2_subdev_state_gen_call(crop, ##__VA_ARGS__, , _pad) \ - (state, pad, ##__VA_ARGS__) +#define v4l2_subdev_state_get_crop(state, pad, ...) \ + __v4l2_subdev_state_constify_ret(state, \ + __v4l2_subdev_state_gen_call(crop, ##__VA_ARGS__, , _pad) \ + ((struct v4l2_subdev_state *)state, pad, ##__VA_ARGS__)) #define __v4l2_subdev_state_get_crop_pad(state, pad) \ __v4l2_subdev_state_get_crop(state, pad, 0) struct v4l2_rect * @@ -1400,9 +1416,10 @@ __v4l2_subdev_state_get_crop(struct v4l2_subdev_state *state, unsigned int pad, * For stream-unaware drivers the compose rectangle for the corresponding pad is * returned. If the pad does not exist, NULL is returned. */ -#define v4l2_subdev_state_get_compose(state, pad, ...) \ - __v4l2_subdev_state_gen_call(compose, ##__VA_ARGS__, , _pad) \ - (state, pad, ##__VA_ARGS__) +#define v4l2_subdev_state_get_compose(state, pad, ...) \ + __v4l2_subdev_state_constify_ret(state, \ + __v4l2_subdev_state_gen_call(compose, ##__VA_ARGS__, , _pad) \ + ((struct v4l2_subdev_state *)state, pad, ##__VA_ARGS__)) #define __v4l2_subdev_state_get_compose_pad(state, pad) \ __v4l2_subdev_state_get_compose(state, pad, 0) struct v4l2_rect * @@ -1421,9 +1438,10 @@ __v4l2_subdev_state_get_compose(struct v4l2_subdev_state *state, * For stream-unaware drivers the frame interval for the corresponding pad is * returned. If the pad does not exist, NULL is returned. */ -#define v4l2_subdev_state_get_interval(state, pad, ...) \ - __v4l2_subdev_state_gen_call(interval, ##__VA_ARGS__, , _pad) \ - (state, pad, ##__VA_ARGS__) +#define v4l2_subdev_state_get_interval(state, pad, ...) \ + __v4l2_subdev_state_constify_ret(state, \ + __v4l2_subdev_state_gen_call(interval, ##__VA_ARGS__, , _pad) \ + ((struct v4l2_subdev_state *)state, pad, ##__VA_ARGS__)) #define __v4l2_subdev_state_get_interval_pad(state, pad) \ __v4l2_subdev_state_get_interval(state, pad, 0) struct v4l2_fract * -- cgit v1.2.3 From 1d8491d3e726984343dd8c3cdbe2f2b47cfdd928 Mon Sep 17 00:00:00 2001 From: Paolo Pisati Date: Sat, 1 Jun 2024 17:32:54 +0200 Subject: m68k: amiga: Turn off Warp1260 interrupts during boot On an Amiga 1200 equipped with a Warp1260 accelerator, an interrupt storm coming from the accelerator board causes the machine to crash in local_irq_enable() or auto_irq_enable(). Disabling interrupts for the Warp1260 in amiga_parse_bootinfo() fixes the problem. Link: https://lore.kernel.org/r/ZkjwzVwYeQtyAPrL@amaterasu.local Cc: stable Signed-off-by: Paolo Pisati Reviewed-by: Michael Schmitz Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240601153254.186225-1-p.pisati@gmail.com Signed-off-by: Geert Uytterhoeven --- include/uapi/linux/zorro_ids.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/zorro_ids.h b/include/uapi/linux/zorro_ids.h index 6e574d7b7d79..393f2ee9c042 100644 --- a/include/uapi/linux/zorro_ids.h +++ b/include/uapi/linux/zorro_ids.h @@ -449,6 +449,9 @@ #define ZORRO_PROD_VMC_ISDN_BLASTER_Z2 ZORRO_ID(VMC, 0x01, 0) #define ZORRO_PROD_VMC_HYPERCOM_4 ZORRO_ID(VMC, 0x02, 0) +#define ZORRO_MANUF_CSLAB 0x1400 +#define ZORRO_PROD_CSLAB_WARP_1260 ZORRO_ID(CSLAB, 0x65, 0) + #define ZORRO_MANUF_INFORMATION 0x157C #define ZORRO_PROD_INFORMATION_ISDN_ENGINE_I ZORRO_ID(INFORMATION, 0x64, 0) -- cgit v1.2.3 From 0e6be855a96dd44effce97b6b8d0cf0d0d0b7303 Mon Sep 17 00:00:00 2001 From: Xianwei Zhao Date: Wed, 22 May 2024 16:27:23 +0800 Subject: dt-bindings: clock: add Amlogic C3 PLL clock controller Add the PLL clock controller dt-bindings for Amlogic C3 SoC family. Reviewed-by: Krzysztof Kozlowski Co-developed-by: Chuan Liu Signed-off-by: Chuan Liu Signed-off-by: Xianwei Zhao Link: https://lore.kernel.org/r/20240522082727.3029656-2-xianwei.zhao@amlogic.com Signed-off-by: Jerome Brunet --- include/dt-bindings/clock/amlogic,c3-pll-clkc.h | 40 +++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 include/dt-bindings/clock/amlogic,c3-pll-clkc.h (limited to 'include') diff --git a/include/dt-bindings/clock/amlogic,c3-pll-clkc.h b/include/dt-bindings/clock/amlogic,c3-pll-clkc.h new file mode 100644 index 000000000000..fcdc558715e8 --- /dev/null +++ b/include/dt-bindings/clock/amlogic,c3-pll-clkc.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR MIT) */ +/* + * Copyright (c) 2023 Amlogic, Inc. All rights reserved. + * Author: Chuan Liu + */ + +#ifndef _DT_BINDINGS_CLOCK_AMLOGIC_C3_PLL_CLKC_H +#define _DT_BINDINGS_CLOCK_AMLOGIC_C3_PLL_CLKC_H + +#define CLKID_FCLK_50M_EN 0 +#define CLKID_FCLK_50M 1 +#define CLKID_FCLK_DIV2_DIV 2 +#define CLKID_FCLK_DIV2 3 +#define CLKID_FCLK_DIV2P5_DIV 4 +#define CLKID_FCLK_DIV2P5 5 +#define CLKID_FCLK_DIV3_DIV 6 +#define CLKID_FCLK_DIV3 7 +#define CLKID_FCLK_DIV4_DIV 8 +#define CLKID_FCLK_DIV4 9 +#define CLKID_FCLK_DIV5_DIV 10 +#define CLKID_FCLK_DIV5 11 +#define CLKID_FCLK_DIV7_DIV 12 +#define CLKID_FCLK_DIV7 13 +#define CLKID_GP0_PLL_DCO 14 +#define CLKID_GP0_PLL 15 +#define CLKID_HIFI_PLL_DCO 16 +#define CLKID_HIFI_PLL 17 +#define CLKID_MCLK_PLL_DCO 18 +#define CLKID_MCLK_PLL_OD 19 +#define CLKID_MCLK_PLL 20 +#define CLKID_MCLK0_SEL 21 +#define CLKID_MCLK0_SEL_EN 22 +#define CLKID_MCLK0_DIV 23 +#define CLKID_MCLK0 24 +#define CLKID_MCLK1_SEL 25 +#define CLKID_MCLK1_SEL_EN 26 +#define CLKID_MCLK1_DIV 27 +#define CLKID_MCLK1 28 + +#endif /* _DT_BINDINGS_CLOCK_AMLOGIC_C3_PLL_CLKC_H */ -- cgit v1.2.3 From d309989a0a0aff3b8ad5259aa11f119b15336c1a Mon Sep 17 00:00:00 2001 From: Xianwei Zhao Date: Wed, 22 May 2024 16:27:24 +0800 Subject: dt-bindings: clock: add Amlogic C3 SCMI clock controller support Add the SCMI clock controller dt-bindings for Amlogic C3 SoC family Acked-by: Rob Herring (Arm) Co-developed-by: Chuan Liu Signed-off-by: Chuan Liu Signed-off-by: Xianwei Zhao Link: https://lore.kernel.org/r/20240522082727.3029656-3-xianwei.zhao@amlogic.com Signed-off-by: Jerome Brunet --- include/dt-bindings/clock/amlogic,c3-scmi-clkc.h | 27 ++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 include/dt-bindings/clock/amlogic,c3-scmi-clkc.h (limited to 'include') diff --git a/include/dt-bindings/clock/amlogic,c3-scmi-clkc.h b/include/dt-bindings/clock/amlogic,c3-scmi-clkc.h new file mode 100644 index 000000000000..663c9b349275 --- /dev/null +++ b/include/dt-bindings/clock/amlogic,c3-scmi-clkc.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR MIT) */ +/* + * Copyright (c) 2023 Amlogic, Inc. All rights reserved. + * Author: Chuan Liu + */ + +#ifndef __AMLOGIC_C3_SCMI_CLKC_H +#define __AMLOGIC_C3_SCMI_CLKC_H + +#define CLKID_DDR_PLL_OSC 0 +#define CLKID_DDR_PHY 1 +#define CLKID_TOP_PLL_OSC 2 +#define CLKID_USB_PLL_OSC 3 +#define CLKID_MIPIISP_VOUT 4 +#define CLKID_MCLK_PLL_OSC 5 +#define CLKID_USB_CTRL 6 +#define CLKID_ETH_PLL_OSC 7 +#define CLKID_OSC 8 +#define CLKID_SYS_CLK 9 +#define CLKID_AXI_CLK 10 +#define CLKID_CPU_CLK 11 +#define CLKID_FIXED_PLL_OSC 12 +#define CLKID_GP1_PLL_OSC 13 +#define CLKID_SYS_PLL_DIV16 14 +#define CLKID_CPU_CLK_DIV16 15 + +#endif /* __AMLOGIC_C3_SCMI_CLKC_H */ -- cgit v1.2.3 From fc1c7f941c71460a730a449f76764d883e270cba Mon Sep 17 00:00:00 2001 From: Xianwei Zhao Date: Wed, 22 May 2024 16:27:25 +0800 Subject: dt-bindings: clock: add Amlogic C3 peripherals clock controller Add the peripherals clock controller dt-bindings for Amlogic C3 SoC family Reviewed-by: Rob Herring (Arm) Co-developed-by: Chuan Liu Signed-off-by: Chuan Liu Signed-off-by: Xianwei Zhao Link: https://lore.kernel.org/r/20240522082727.3029656-4-xianwei.zhao@amlogic.com Signed-off-by: Jerome Brunet --- .../clock/amlogic,c3-peripherals-clkc.h | 212 +++++++++++++++++++++ 1 file changed, 212 insertions(+) create mode 100644 include/dt-bindings/clock/amlogic,c3-peripherals-clkc.h (limited to 'include') diff --git a/include/dt-bindings/clock/amlogic,c3-peripherals-clkc.h b/include/dt-bindings/clock/amlogic,c3-peripherals-clkc.h new file mode 100644 index 000000000000..d115c741c255 --- /dev/null +++ b/include/dt-bindings/clock/amlogic,c3-peripherals-clkc.h @@ -0,0 +1,212 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR MIT) */ +/* + * Copyright (c) 2023 Amlogic, Inc. All rights reserved. + * Author: Chuan Liu + */ + +#ifndef _DT_BINDINGS_CLOCK_AMLOGIC_C3_PERIPHERALS_CLKC_H +#define _DT_BINDINGS_CLOCK_AMLOGIC_C3_PERIPHERALS_CLKC_H + +#define CLKID_RTC_XTAL_CLKIN 0 +#define CLKID_RTC_32K_DIV 1 +#define CLKID_RTC_32K_MUX 2 +#define CLKID_RTC_32K 3 +#define CLKID_RTC_CLK 4 +#define CLKID_SYS_RESET_CTRL 5 +#define CLKID_SYS_PWR_CTRL 6 +#define CLKID_SYS_PAD_CTRL 7 +#define CLKID_SYS_CTRL 8 +#define CLKID_SYS_TS_PLL 9 +#define CLKID_SYS_DEV_ARB 10 +#define CLKID_SYS_MMC_PCLK 11 +#define CLKID_SYS_CPU_CTRL 12 +#define CLKID_SYS_JTAG_CTRL 13 +#define CLKID_SYS_IR_CTRL 14 +#define CLKID_SYS_IRQ_CTRL 15 +#define CLKID_SYS_MSR_CLK 16 +#define CLKID_SYS_ROM 17 +#define CLKID_SYS_UART_F 18 +#define CLKID_SYS_CPU_ARB 19 +#define CLKID_SYS_RSA 20 +#define CLKID_SYS_SAR_ADC 21 +#define CLKID_SYS_STARTUP 22 +#define CLKID_SYS_SECURE 23 +#define CLKID_SYS_SPIFC 24 +#define CLKID_SYS_NNA 25 +#define CLKID_SYS_ETH_MAC 26 +#define CLKID_SYS_GIC 27 +#define CLKID_SYS_RAMA 28 +#define CLKID_SYS_BIG_NIC 29 +#define CLKID_SYS_RAMB 30 +#define CLKID_SYS_AUDIO_PCLK 31 +#define CLKID_SYS_PWM_KL 32 +#define CLKID_SYS_PWM_IJ 33 +#define CLKID_SYS_USB 34 +#define CLKID_SYS_SD_EMMC_A 35 +#define CLKID_SYS_SD_EMMC_C 36 +#define CLKID_SYS_PWM_AB 37 +#define CLKID_SYS_PWM_CD 38 +#define CLKID_SYS_PWM_EF 39 +#define CLKID_SYS_PWM_GH 40 +#define CLKID_SYS_SPICC_1 41 +#define CLKID_SYS_SPICC_0 42 +#define CLKID_SYS_UART_A 43 +#define CLKID_SYS_UART_B 44 +#define CLKID_SYS_UART_C 45 +#define CLKID_SYS_UART_D 46 +#define CLKID_SYS_UART_E 47 +#define CLKID_SYS_I2C_M_A 48 +#define CLKID_SYS_I2C_M_B 49 +#define CLKID_SYS_I2C_M_C 50 +#define CLKID_SYS_I2C_M_D 51 +#define CLKID_SYS_I2S_S_A 52 +#define CLKID_SYS_RTC 53 +#define CLKID_SYS_GE2D 54 +#define CLKID_SYS_ISP 55 +#define CLKID_SYS_GPV_ISP_NIC 56 +#define CLKID_SYS_GPV_CVE_NIC 57 +#define CLKID_SYS_MIPI_DSI_HOST 58 +#define CLKID_SYS_MIPI_DSI_PHY 59 +#define CLKID_SYS_ETH_PHY 60 +#define CLKID_SYS_ACODEC 61 +#define CLKID_SYS_DWAP 62 +#define CLKID_SYS_DOS 63 +#define CLKID_SYS_CVE 64 +#define CLKID_SYS_VOUT 65 +#define CLKID_SYS_VC9000E 66 +#define CLKID_SYS_PWM_MN 67 +#define CLKID_SYS_SD_EMMC_B 68 +#define CLKID_AXI_SYS_NIC 69 +#define CLKID_AXI_ISP_NIC 70 +#define CLKID_AXI_CVE_NIC 71 +#define CLKID_AXI_RAMB 72 +#define CLKID_AXI_RAMA 73 +#define CLKID_AXI_CPU_DMC 74 +#define CLKID_AXI_NIC 75 +#define CLKID_AXI_DMA 76 +#define CLKID_AXI_MUX_NIC 77 +#define CLKID_AXI_CVE 78 +#define CLKID_AXI_DEV1_DMC 79 +#define CLKID_AXI_DEV0_DMC 80 +#define CLKID_AXI_DSP_DMC 81 +#define CLKID_12_24M_IN 82 +#define CLKID_12M_24M 83 +#define CLKID_FCLK_25M_DIV 84 +#define CLKID_FCLK_25M 85 +#define CLKID_GEN_SEL 86 +#define CLKID_GEN_DIV 87 +#define CLKID_GEN 88 +#define CLKID_SARADC_SEL 89 +#define CLKID_SARADC_DIV 90 +#define CLKID_SARADC 91 +#define CLKID_PWM_A_SEL 92 +#define CLKID_PWM_A_DIV 93 +#define CLKID_PWM_A 94 +#define CLKID_PWM_B_SEL 95 +#define CLKID_PWM_B_DIV 96 +#define CLKID_PWM_B 97 +#define CLKID_PWM_C_SEL 98 +#define CLKID_PWM_C_DIV 99 +#define CLKID_PWM_C 100 +#define CLKID_PWM_D_SEL 101 +#define CLKID_PWM_D_DIV 102 +#define CLKID_PWM_D 103 +#define CLKID_PWM_E_SEL 104 +#define CLKID_PWM_E_DIV 105 +#define CLKID_PWM_E 106 +#define CLKID_PWM_F_SEL 107 +#define CLKID_PWM_F_DIV 108 +#define CLKID_PWM_F 109 +#define CLKID_PWM_G_SEL 110 +#define CLKID_PWM_G_DIV 111 +#define CLKID_PWM_G 112 +#define CLKID_PWM_H_SEL 113 +#define CLKID_PWM_H_DIV 114 +#define CLKID_PWM_H 115 +#define CLKID_PWM_I_SEL 116 +#define CLKID_PWM_I_DIV 117 +#define CLKID_PWM_I 118 +#define CLKID_PWM_J_SEL 119 +#define CLKID_PWM_J_DIV 120 +#define CLKID_PWM_J 121 +#define CLKID_PWM_K_SEL 122 +#define CLKID_PWM_K_DIV 123 +#define CLKID_PWM_K 124 +#define CLKID_PWM_L_SEL 125 +#define CLKID_PWM_L_DIV 126 +#define CLKID_PWM_L 127 +#define CLKID_PWM_M_SEL 128 +#define CLKID_PWM_M_DIV 129 +#define CLKID_PWM_M 130 +#define CLKID_PWM_N_SEL 131 +#define CLKID_PWM_N_DIV 132 +#define CLKID_PWM_N 133 +#define CLKID_SPICC_A_SEL 134 +#define CLKID_SPICC_A_DIV 135 +#define CLKID_SPICC_A 136 +#define CLKID_SPICC_B_SEL 137 +#define CLKID_SPICC_B_DIV 138 +#define CLKID_SPICC_B 139 +#define CLKID_SPIFC_SEL 140 +#define CLKID_SPIFC_DIV 141 +#define CLKID_SPIFC 142 +#define CLKID_SD_EMMC_A_SEL 143 +#define CLKID_SD_EMMC_A_DIV 144 +#define CLKID_SD_EMMC_A 145 +#define CLKID_SD_EMMC_B_SEL 146 +#define CLKID_SD_EMMC_B_DIV 147 +#define CLKID_SD_EMMC_B 148 +#define CLKID_SD_EMMC_C_SEL 149 +#define CLKID_SD_EMMC_C_DIV 150 +#define CLKID_SD_EMMC_C 151 +#define CLKID_TS_DIV 152 +#define CLKID_TS 153 +#define CLKID_ETH_125M_DIV 154 +#define CLKID_ETH_125M 155 +#define CLKID_ETH_RMII_DIV 156 +#define CLKID_ETH_RMII 157 +#define CLKID_MIPI_DSI_MEAS_SEL 158 +#define CLKID_MIPI_DSI_MEAS_DIV 159 +#define CLKID_MIPI_DSI_MEAS 160 +#define CLKID_DSI_PHY_SEL 161 +#define CLKID_DSI_PHY_DIV 162 +#define CLKID_DSI_PHY 163 +#define CLKID_VOUT_MCLK_SEL 164 +#define CLKID_VOUT_MCLK_DIV 165 +#define CLKID_VOUT_MCLK 166 +#define CLKID_VOUT_ENC_SEL 167 +#define CLKID_VOUT_ENC_DIV 168 +#define CLKID_VOUT_ENC 169 +#define CLKID_HCODEC_0_SEL 170 +#define CLKID_HCODEC_0_DIV 171 +#define CLKID_HCODEC_0 172 +#define CLKID_HCODEC_1_SEL 173 +#define CLKID_HCODEC_1_DIV 174 +#define CLKID_HCODEC_1 175 +#define CLKID_HCODEC 176 +#define CLKID_VC9000E_ACLK_SEL 177 +#define CLKID_VC9000E_ACLK_DIV 178 +#define CLKID_VC9000E_ACLK 179 +#define CLKID_VC9000E_CORE_SEL 180 +#define CLKID_VC9000E_CORE_DIV 181 +#define CLKID_VC9000E_CORE 182 +#define CLKID_CSI_PHY0_SEL 183 +#define CLKID_CSI_PHY0_DIV 184 +#define CLKID_CSI_PHY0 185 +#define CLKID_DEWARPA_SEL 186 +#define CLKID_DEWARPA_DIV 187 +#define CLKID_DEWARPA 188 +#define CLKID_ISP0_SEL 189 +#define CLKID_ISP0_DIV 190 +#define CLKID_ISP0 191 +#define CLKID_NNA_CORE_SEL 192 +#define CLKID_NNA_CORE_DIV 193 +#define CLKID_NNA_CORE 194 +#define CLKID_GE2D_SEL 195 +#define CLKID_GE2D_DIV 196 +#define CLKID_GE2D 197 +#define CLKID_VAPB_SEL 198 +#define CLKID_VAPB_DIV 199 +#define CLKID_VAPB 200 + +#endif /* _DT_BINDINGS_CLOCK_AMLOGIC_C3_PERIPHERALS_CLKC_H */ -- cgit v1.2.3 From 668b6a2ef832a878494cc1b12a881c8ec0494b25 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 30 May 2024 19:08:34 +0200 Subject: flow_dissector: add support for tunnel control flags Dissect [no]csum, [no]dontfrag, [no]oam, [no]crit flags from skb metadata. This is a prerequisite for matching these control flags using TC flower. Suggested-by: Ilya Maximets Signed-off-by: Davide Caratti Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- include/net/flow_dissector.h | 9 +++++++++ include/net/ip_tunnels.h | 12 ++++++++++++ 2 files changed, 21 insertions(+) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 9ab376d1a677..99626475c3f4 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -329,6 +329,14 @@ struct flow_dissector_key_cfm { #define FLOW_DIS_CFM_MDL_MASK GENMASK(7, 5) #define FLOW_DIS_CFM_MDL_MAX 7 +/** + * struct flow_dissector_key_enc_flags: tunnel metadata control flags + * @flags: tunnel control flags + */ +struct flow_dissector_key_enc_flags { + u32 flags; +}; + enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ @@ -363,6 +371,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_L2TPV3, /* struct flow_dissector_key_l2tpv3 */ FLOW_DISSECTOR_KEY_CFM, /* struct flow_dissector_key_cfm */ FLOW_DISSECTOR_KEY_IPSEC, /* struct flow_dissector_key_ipsec */ + FLOW_DISSECTOR_KEY_ENC_FLAGS, /* struct flow_dissector_key_enc_flags */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 9a6a08ec7713..5a530d4fb02c 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -247,6 +247,18 @@ static inline bool ip_tunnel_is_options_present(const unsigned long *flags) return ip_tunnel_flags_intersect(flags, present); } +static inline void ip_tunnel_set_encflags_present(unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(present) = { }; + + __set_bit(IP_TUNNEL_CSUM_BIT, present); + __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, present); + __set_bit(IP_TUNNEL_OAM_BIT, present); + __set_bit(IP_TUNNEL_CRIT_OPT_BIT, present); + + ip_tunnel_flags_or(flags, flags, present); +} + static inline bool ip_tunnel_flags_is_be16_compat(const unsigned long *flags) { IP_TUNNEL_DECLARE_FLAGS(supp) = { }; -- cgit v1.2.3 From 1d17568e74dedbcb54d36af0662a15128297d681 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 30 May 2024 19:08:35 +0200 Subject: net/sched: cls_flower: add support for matching tunnel control flags extend cls_flower to match TUNNEL_FLAGS_PRESENT bits in tunnel metadata. Suggested-by: Ilya Maximets Acked-by: Jamal Hadi Salim Signed-off-by: Davide Caratti Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- include/uapi/linux/pkt_cls.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 229fc925ec3a..b6d38f5fd7c0 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -554,6 +554,9 @@ enum { TCA_FLOWER_KEY_SPI, /* be32 */ TCA_FLOWER_KEY_SPI_MASK, /* be32 */ + TCA_FLOWER_KEY_ENC_FLAGS, /* u32 */ + TCA_FLOWER_KEY_ENC_FLAGS_MASK, /* u32 */ + __TCA_FLOWER_MAX, }; -- cgit v1.2.3 From 071115301838c6c265065dd5d6bf43a9a987a550 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 30 May 2024 16:36:14 -0700 Subject: tcp: wrap mptcp and decrypted checks into tcp_skb_can_collapse_rx() tcp_skb_can_collapse() checks for conditions which don't make sense on input. Because of this we ended up sprinkling a few pairs of mptcp_skb_can_collapse() and skb_cmp_decrypted() calls on the input path. Group them in a new helper. This should make it less likely that someone will check mptcp and not decrypted or vice versa when adding new code. This implicitly adds a decrypted check early in tcp_collapse(). AFAIU this will very slightly increase our ability to collapse packets under memory pressure, not a real bug. Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/net/tcp.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 32815a40dea1..32741856da01 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1071,6 +1071,13 @@ static inline bool tcp_skb_can_collapse(const struct sk_buff *to, skb_pure_zcopy_same(to, from)); } +static inline bool tcp_skb_can_collapse_rx(const struct sk_buff *to, + const struct sk_buff *from) +{ + return likely(mptcp_skb_can_collapse(to, from) && + !skb_cmp_decrypted(to, from)); +} + /* Events passed to congestion control interface */ enum tcp_ca_event { CA_EVENT_TX_START, /* first transmit when no packets in flight */ -- cgit v1.2.3 From 1be68a87ab333af37b02ad928a724a722a5a8203 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 30 May 2024 16:36:15 -0700 Subject: tcp: add a helper for setting EOR on tail skb TLS (and hopefully soon PSP will) use EOR to prevent skbs with different decrypted state from getting merged, without adding new tests to the skb handling. In both cases once the connection switches to an "encrypted" state, all subsequent skbs will be encrypted, so a single "EOR fence" is sufficient to prevent mixing. Add a helper for setting the EOR bit, to make this arrangement more explicit. Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/net/tcp.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 32741856da01..08c3b99501cf 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1066,6 +1066,7 @@ static inline bool tcp_skb_can_collapse_to(const struct sk_buff *skb) static inline bool tcp_skb_can_collapse(const struct sk_buff *to, const struct sk_buff *from) { + /* skb_cmp_decrypted() not needed, use tcp_write_collapse_fence() */ return likely(tcp_skb_can_collapse_to(to) && mptcp_skb_can_collapse(to, from) && skb_pure_zcopy_same(to, from)); @@ -2102,6 +2103,14 @@ static inline void tcp_rtx_queue_unlink_and_free(struct sk_buff *skb, struct soc tcp_wmem_free_skb(sk, skb); } +static inline void tcp_write_collapse_fence(struct sock *sk) +{ + struct sk_buff *skb = tcp_write_queue_tail(sk); + + if (skb) + TCP_SKB_CB(skb)->eor = 1; +} + static inline void tcp_push_pending_frames(struct sock *sk) { if (tcp_send_head(sk)) { -- cgit v1.2.3 From f85d39dd7ed89ffdd622bc1de247ffba8d961504 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 27 May 2024 19:35:38 +0200 Subject: kcov, usb: disable interrupts in kcov_remote_start_usb_softirq After commit 8fea0c8fda30 ("usb: core: hcd: Convert from tasklet to BH workqueue"), usb_giveback_urb_bh() runs in the BH workqueue with interrupts enabled. Thus, the remote coverage collection section in usb_giveback_urb_bh()-> __usb_hcd_giveback_urb() might be interrupted, and the interrupt handler might invoke __usb_hcd_giveback_urb() again. This breaks KCOV, as it does not support nested remote coverage collection sections within the same context (neither in task nor in softirq). Update kcov_remote_start/stop_usb_softirq() to disable interrupts for the duration of the coverage collection section to avoid nested sections in the softirq context (in addition to such in the task context, which are already handled). Reported-by: Tetsuo Handa Closes: https://lore.kernel.org/linux-usb/0f4d1964-7397-485b-bc48-11c01e2fcbca@I-love.SAKURA.ne.jp/ Closes: https://syzkaller.appspot.com/bug?extid=0438378d6f157baae1a2 Suggested-by: Alan Stern Fixes: 8fea0c8fda30 ("usb: core: hcd: Convert from tasklet to BH workqueue") Cc: stable@vger.kernel.org Acked-by: Dmitry Vyukov Signed-off-by: Andrey Konovalov Link: https://lore.kernel.org/r/20240527173538.4989-1-andrey.konovalov@linux.dev Signed-off-by: Greg Kroah-Hartman --- include/linux/kcov.h | 47 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/kcov.h b/include/linux/kcov.h index b851ba415e03..1068a7318d89 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -55,21 +55,47 @@ static inline void kcov_remote_start_usb(u64 id) /* * The softirq flavor of kcov_remote_*() functions is introduced as a temporary - * work around for kcov's lack of nested remote coverage sections support in - * task context. Adding support for nested sections is tracked in: - * https://bugzilla.kernel.org/show_bug.cgi?id=210337 + * workaround for KCOV's lack of nested remote coverage sections support. + * + * Adding support is tracked in https://bugzilla.kernel.org/show_bug.cgi?id=210337. + * + * kcov_remote_start_usb_softirq(): + * + * 1. Only collects coverage when called in the softirq context. This allows + * avoiding nested remote coverage collection sections in the task context. + * For example, USB/IP calls usb_hcd_giveback_urb() in the task context + * within an existing remote coverage collection section. Thus, KCOV should + * not attempt to start collecting coverage within the coverage collection + * section in __usb_hcd_giveback_urb() in this case. + * + * 2. Disables interrupts for the duration of the coverage collection section. + * This allows avoiding nested remote coverage collection sections in the + * softirq context (a softirq might occur during the execution of a work in + * the BH workqueue, which runs with in_serving_softirq() > 0). + * For example, usb_giveback_urb_bh() runs in the BH workqueue with + * interrupts enabled, so __usb_hcd_giveback_urb() might be interrupted in + * the middle of its remote coverage collection section, and the interrupt + * handler might invoke __usb_hcd_giveback_urb() again. */ -static inline void kcov_remote_start_usb_softirq(u64 id) +static inline unsigned long kcov_remote_start_usb_softirq(u64 id) { - if (in_serving_softirq()) + unsigned long flags = 0; + + if (in_serving_softirq()) { + local_irq_save(flags); kcov_remote_start_usb(id); + } + + return flags; } -static inline void kcov_remote_stop_softirq(void) +static inline void kcov_remote_stop_softirq(unsigned long flags) { - if (in_serving_softirq()) + if (in_serving_softirq()) { kcov_remote_stop(); + local_irq_restore(flags); + } } #ifdef CONFIG_64BIT @@ -103,8 +129,11 @@ static inline u64 kcov_common_handle(void) } static inline void kcov_remote_start_common(u64 id) {} static inline void kcov_remote_start_usb(u64 id) {} -static inline void kcov_remote_start_usb_softirq(u64 id) {} -static inline void kcov_remote_stop_softirq(void) {} +static inline unsigned long kcov_remote_start_usb_softirq(u64 id) +{ + return 0; +} +static inline void kcov_remote_stop_softirq(unsigned long flags) {} #endif /* CONFIG_KCOV */ #endif /* _LINUX_KCOV_H */ -- cgit v1.2.3 From fe8db0bbe04d31ab17dc60e205c4a3365c92e67c Mon Sep 17 00:00:00 2001 From: Abhishek Pandit-Subedi Date: Fri, 10 May 2024 20:12:42 +0000 Subject: usb: typec: Update sysfs when setting ops When adding altmode ops, update the sysfs group so that visibility is also recalculated. Reviewed-by: Heikki Krogerus Reviewed-by: Benson Leung Signed-off-by: Abhishek Pandit-Subedi Signed-off-by: Jameson Thies Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240510201244.2968152-3-jthies@google.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index b35b427561ab..549275f8ac1b 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -167,6 +167,9 @@ struct typec_port *typec_altmode2port(struct typec_altmode *alt); void typec_altmode_update_active(struct typec_altmode *alt, bool active); +void typec_altmode_set_ops(struct typec_altmode *alt, + const struct typec_altmode_ops *ops); + enum typec_plug_index { TYPEC_PLUG_SOP_P, TYPEC_PLUG_SOP_PP, -- cgit v1.2.3 From 971187350602d03c4a27c0783ff412502b95720a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 4 Jul 2023 14:17:19 +0100 Subject: driver core: remove devm_device_add_groups() There is no more in-kernel users of this function, and no driver should ever be using it, so remove it from the kernel. Acked-by: Dmitry Torokhov Acked-by: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20230704131715.44454-8-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index fc3bd7116ab9..ace039151cb8 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1220,8 +1220,6 @@ static inline void device_remove_group(struct device *dev, return device_remove_groups(dev, groups); } -int __must_check devm_device_add_groups(struct device *dev, - const struct attribute_group **groups); int __must_check devm_device_add_group(struct device *dev, const struct attribute_group *grp); -- cgit v1.2.3 From 44a45be57f85165761fdabf072f9a97aa026ff61 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 23 May 2024 13:00:00 +0200 Subject: sysfs: Unbreak the build around sysfs_bin_attr_simple_read() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Günter reports build breakage for m68k "m5208evb_defconfig" plus CONFIG_BLK_DEV_INITRD=y caused by commit 66bc1a173328 ("treewide: Use sysfs_bin_attr_simple_read() helper"). The defconfig disables CONFIG_SYSFS, so sysfs_bin_attr_simple_read() is not compiled into the kernel. But init/initramfs.c references that function in the initializer of a struct bin_attribute. Add an empty static inline to avoid the build breakage. Fixes: 66bc1a173328 ("treewide: Use sysfs_bin_attr_simple_read() helper") Reported-by: Guenter Roeck Closes: https://lore.kernel.org/r/e12b0027-b199-4de7-b83d-668171447ccc@roeck-us.net Signed-off-by: Lukas Wunner Tested-by: Guenter Roeck Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/05f4290439a58730738a15b0c99cd8576c4aa0d9.1716461752.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index a7d725fbf739..c4e64dc11206 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -750,6 +750,15 @@ static inline int sysfs_emit_at(char *buf, int at, const char *fmt, ...) { return 0; } + +static inline ssize_t sysfs_bin_attr_simple_read(struct file *file, + struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, + size_t count) +{ + return 0; +} #endif /* CONFIG_SYSFS */ static inline int __must_check sysfs_create_file(struct kobject *kobj, -- cgit v1.2.3 From 42675b723b4842bca7bfb0f209aa9a493a10324a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Jun 2024 15:07:05 -0400 Subject: function_graph: Convert ret_stack to a series of longs In order to make it possible to have multiple callbacks registered with the function_graph tracer, the retstack needs to be converted from an array of ftrace_ret_stack structures to an array of longs. This will allow to store the list of callbacks on the stack for the return side of the functions. Link: https://lore.kernel.org/linux-trace-kernel/171509092742.162236.4427737821399314856.stgit@devnote2 Link: https://lore.kernel.org/linux-trace-kernel/20240603190821.073111754@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 61591ac6eab6..352939dab3a5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1402,7 +1402,7 @@ struct task_struct { int curr_ret_depth; /* Stack of return addresses for return function tracing: */ - struct ftrace_ret_stack *ret_stack; + unsigned long *ret_stack; /* Timestamp for last schedule: */ unsigned long long ftrace_timestamp; -- cgit v1.2.3 From 7aa1eaef9f4282c9acd39588b1fdc9dda7e73f34 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Jun 2024 15:07:08 -0400 Subject: function_graph: Allow multiple users to attach to function graph Allow for multiple users to attach to function graph tracer at the same time. Only 16 simultaneous users can attach to the tracer. This is because there's an array that stores the pointers to the attached fgraph_ops. When a function being traced is entered, each of the ftrace_ops entryfunc is called and if it returns non zero, its index into the array will be added to the shadow stack. On exit of the function being traced, the shadow stack will contain the indexes of the ftrace_ops on the array that want their retfunc to be called. Because a function may sleep for a long time (if a task sleeps itself), the return of the function may be literally days later. If the ftrace_ops is removed, its place on the array is replaced with a ftrace_ops that contains the stub functions and that will be called when the function finally returns. If another ftrace_ops is added that happens to get the same index into the array, its return function may be called. But that's actually the way things current work with the old function graph tracer. If one tracer is removed and another is added, the new one will get the return calls of the function traced by the previous one, thus this is not a regression. This can be fixed by adding a counter to each time the array item is updated and save that on the shadow stack as well, such that it won't be called if the index saved does not match the index on the array. Note, being able to filter functions when both are called is not completely handled yet, but that shouldn't be too hard to manage. Co-developed with Masami Hiramatsu: Link: https://lore.kernel.org/linux-trace-kernel/171509096221.162236.8806372072523195752.stgit@devnote2 Link: https://lore.kernel.org/linux-trace-kernel/20240603190821.555493396@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 800995c425e0..8f00a7415bd5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1038,6 +1038,7 @@ extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace); struct fgraph_ops { trace_func_graph_ent_t entryfunc; trace_func_graph_ret_t retfunc; + int idx; }; /* @@ -1072,7 +1073,7 @@ function_graph_enter(unsigned long ret, unsigned long func, unsigned long frame_pointer, unsigned long *retp); struct ftrace_ret_stack * -ftrace_graph_get_ret_stack(struct task_struct *task, int idx); +ftrace_graph_get_ret_stack(struct task_struct *task, int skip); unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, unsigned long *retp); -- cgit v1.2.3 From 37238abe3cb47b8daaa8706c9949f67b2a705cf1 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Jun 2024 15:07:11 -0400 Subject: ftrace/function_graph: Pass fgraph_ops to function graph callbacks Pass the fgraph_ops structure to the function graph callbacks. This will allow callbacks to add a descriptor to a fgraph_ops private field that wil be added in the future and use it for the callbacks. This will be useful when more than one callback can be registered to the function graph tracer. Co-developed with Masami Hiramatsu: Link: https://lore.kernel.org/linux-trace-kernel/171509098588.162236.4787930115997357578.stgit@devnote2 Link: https://lore.kernel.org/linux-trace-kernel/20240603190822.035147698@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8f00a7415bd5..4d817b85ac0d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1027,11 +1027,15 @@ struct ftrace_graph_ret { unsigned long long rettime; } __packed; +struct fgraph_ops; + /* Type of the callback handlers for tracing function graph*/ -typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */ -typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ +typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *, + struct fgraph_ops *); /* return */ +typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *, + struct fgraph_ops *); /* entry */ -extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace); +extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct fgraph_ops *gops); #ifdef CONFIG_FUNCTION_GRAPH_TRACER -- cgit v1.2.3 From 26dda5631d1bb2f254f4c94aa87ee6c92a89cfdb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Jun 2024 15:07:12 -0400 Subject: ftrace: Allow function_graph tracer to be enabled in instances Now that function graph tracing can handle more than one user, allow it to be enabled in the ftrace instances. Note, the filtering of the functions is still joined by the top level set_ftrace_filter and friends, as well as the graph and nograph files. Co-developed with Masami Hiramatsu: Link: https://lore.kernel.org/linux-trace-kernel/171509099743.162236.1699959255446248163.stgit@devnote2 Link: https://lore.kernel.org/linux-trace-kernel/20240603190822.190630762@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4d817b85ac0d..fd656e6d6b7c 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1042,6 +1042,7 @@ extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct fgraph struct fgraph_ops { trace_func_graph_ent_t entryfunc; trace_func_graph_ret_t retfunc; + void *private; int idx; }; -- cgit v1.2.3 From ab6b84630382914ffcbab59f4913c9a60971d034 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Jun 2024 15:07:13 -0400 Subject: ftrace: Allow ftrace startup flags to exist without dynamic ftrace Some of the flags for ftrace_startup() may be exposed even when CONFIG_DYNAMIC_FTRACE is not configured in. This is fine as the difference between dynamic ftrace and static ftrace is done within the internals of ftrace itself. No need to have use cases fail to compile because dynamic ftrace is disabled. This change is needed to move some of the logic of what is passed to ftrace_startup() out of the parameters of ftrace_startup(). Link: https://lore.kernel.org/linux-trace-kernel/171509100890.162236.4362350342549122222.stgit@devnote2 Link: https://lore.kernel.org/linux-trace-kernel/20240603190822.350654104@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index fd656e6d6b7c..586018744785 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -509,6 +509,15 @@ static inline void stack_tracer_disable(void) { } static inline void stack_tracer_enable(void) { } #endif +enum { + FTRACE_UPDATE_CALLS = (1 << 0), + FTRACE_DISABLE_CALLS = (1 << 1), + FTRACE_UPDATE_TRACE_FUNC = (1 << 2), + FTRACE_START_FUNC_RET = (1 << 3), + FTRACE_STOP_FUNC_RET = (1 << 4), + FTRACE_MAY_SLEEP = (1 << 5), +}; + #ifdef CONFIG_DYNAMIC_FTRACE void ftrace_arch_code_modify_prepare(void); @@ -603,15 +612,6 @@ void ftrace_set_global_notrace(unsigned char *buf, int len, int reset); void ftrace_free_filter(struct ftrace_ops *ops); void ftrace_ops_set_global_filter(struct ftrace_ops *ops); -enum { - FTRACE_UPDATE_CALLS = (1 << 0), - FTRACE_DISABLE_CALLS = (1 << 1), - FTRACE_UPDATE_TRACE_FUNC = (1 << 2), - FTRACE_START_FUNC_RET = (1 << 3), - FTRACE_STOP_FUNC_RET = (1 << 4), - FTRACE_MAY_SLEEP = (1 << 5), -}; - /* * The FTRACE_UPDATE_* enum is used to pass information back * from the ftrace_update_record() and ftrace_test_record() -- cgit v1.2.3 From 5fccc7552ccbc521bad61653ee739b1196b1bc53 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 3 Jun 2024 15:07:14 -0400 Subject: ftrace: Add subops logic to allow one ops to manage many There are cases where a single system will use a single function callback to handle multiple users. For example, to allow function_graph tracer to have multiple users where each can trace their own set of functions, it is useful to only have one ftrace_ops registered to ftrace that will call a function by the function_graph tracer to handle the multiplexing with the different registered function_graph tracers. Add a "subop_list" to the ftrace_ops that will hold a list of other ftrace_ops that the top ftrace_ops will manage. The function ftrace_startup_subops() that takes the manager ftrace_ops and a subop ftrace_ops it will manage. If there are no subops with the ftrace_ops yet, it will copy the ftrace_ops subop filters to the manager ftrace_ops and register that with ftrace_startup(), and adds the subop to its subop_list. If the manager ops already has something registered, it will then merge the new subop filters with what it has and enable the new functions that covers all the subops it has. To remove a subop, ftrace_shutdown_subops() is called which will use the subop_list of the manager ops to rebuild all the functions it needs to trace, and update the ftrace records to only call the functions it now has registered. If there are no more functions registered, it will then call ftrace_shutdown() to disable itself completely. Note, it is up to the manager ops callback to always make sure that the subops callbacks are called if its filter matches, as there are times in the update where the callback could be calling more functions than those that are currently registered. This could be updated to handle other systems other than function_graph, for example, fprobes could use this (but will need an interface to call ftrace_startup_subops()). Link: https://lore.kernel.org/linux-trace-kernel/20240603190822.508431129@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 586018744785..978a1d3b270a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -334,6 +334,7 @@ struct ftrace_ops { unsigned long trampoline; unsigned long trampoline_size; struct list_head list; + struct list_head subop_list; ftrace_ops_func_t ops_func; #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS unsigned long direct_call; -- cgit v1.2.3 From d9bbfbd14f58d2955cc7a3efa8ae6d4e09ee5995 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 3 Jun 2024 15:07:15 -0400 Subject: ftrace: Allow subops filtering to be modified The subops filters use a "manager" ops to enable and disable its filters. The manager ops can handle more than one subops, and its filter is what controls what functions get set. Add a ftrace_hash_move_and_update_subops() function that will update the manager ops when the subops filters change. Link: https://lore.kernel.org/linux-trace-kernel/20240603190822.673932251@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 978a1d3b270a..63238a9a9270 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -227,6 +227,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * ftrace_enabled. * DIRECT - Used by the direct ftrace_ops helper for direct functions * (internal ftrace only, should not be used by others) + * SUBOP - Is controlled by another op in field managed. */ enum { FTRACE_OPS_FL_ENABLED = BIT(0), @@ -247,6 +248,7 @@ enum { FTRACE_OPS_FL_TRACE_ARRAY = BIT(15), FTRACE_OPS_FL_PERMANENT = BIT(16), FTRACE_OPS_FL_DIRECT = BIT(17), + FTRACE_OPS_FL_SUBOP = BIT(18), }; #ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS @@ -336,6 +338,7 @@ struct ftrace_ops { struct list_head list; struct list_head subop_list; ftrace_ops_func_t ops_func; + struct ftrace_ops *managed; #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS unsigned long direct_call; #endif -- cgit v1.2.3 From c132be2c4fcc1150ad0791c2a85dd4c9ad0bd0c8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Jun 2024 15:07:16 -0400 Subject: function_graph: Have the instances use their own ftrace_ops for filtering Allow for instances to have their own ftrace_ops part of the fgraph_ops that makes the funtion_graph tracer filter on the set_ftrace_filter file of the instance and not the top instance. This uses the new ftrace_startup_subops(), by using graph_ops as the "manager ops" that defines the callback function and adds the functions defined by the filters of the ops for each trace instance. The callback defined by the manager ops will call the registered fgraph ops that were added to the fgraph_array. Co-developed with Masami Hiramatsu: Link: https://lore.kernel.org/linux-trace-kernel/171509102088.162236.15758883237657317789.stgit@devnote2 Link: https://lore.kernel.org/linux-trace-kernel/20240603190822.832946261@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 63238a9a9270..8f865689e868 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1046,6 +1046,7 @@ extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct fgraph struct fgraph_ops { trace_func_graph_ent_t entryfunc; trace_func_graph_ret_t retfunc; + struct ftrace_ops ops; /* for the hash lists */ void *private; int idx; }; -- cgit v1.2.3 From df3ec5da6a1e7f6e142680d7c5266d3af187170b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 3 Jun 2024 15:07:17 -0400 Subject: function_graph: Add pid tracing back to function graph tracer Now that the function_graph has a main callback that handles the function graph subops tracing, it no longer honors the pid filtering of ftrace. Add back this logic in the function_graph code to update the gops callback for the entry function to test if it should trace the current task or not. Link: https://lore.kernel.org/linux-trace-kernel/20240603190822.991720703@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8f865689e868..e31ec8516de1 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1040,6 +1040,7 @@ typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *, struct fgraph_ops *); /* entry */ extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct fgraph_ops *gops); +bool ftrace_pids_enabled(struct ftrace_ops *ops); #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -1048,6 +1049,7 @@ struct fgraph_ops { trace_func_graph_ret_t retfunc; struct ftrace_ops ops; /* for the hash lists */ void *private; + trace_func_graph_ent_t saved_func; int idx; }; -- cgit v1.2.3 From 4497412a1f7b5d9e0849f125652f2cc58cdba562 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Jun 2024 15:07:19 -0400 Subject: function_graph: Add "task variables" per task for fgraph_ops Add a "task variables" array on the tasks shadow ret_stack that is the size of longs for each possible registered fgraph_ops. That's a total of 16, taking up 8 * 16 = 128 bytes (out of a page size 4k). This will allow for fgraph_ops to do specific features on a per task basis having a way to maintain state for each task. Co-developed with Masami Hiramatsu: Link: https://lore.kernel.org/linux-trace-kernel/171509104383.162236.12239656156685718550.stgit@devnote2 Link: https://lore.kernel.org/linux-trace-kernel/20240603190823.308806126@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e31ec8516de1..82cfc8e09cfd 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1089,6 +1089,7 @@ ftrace_graph_get_ret_stack(struct task_struct *task, int skip); unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, unsigned long *retp); +unsigned long *fgraph_get_task_var(struct fgraph_ops *gops); /* * Sometimes we don't want to trace a function with the function -- cgit v1.2.3 From 12117f3307b63f287756d7ec8cc4f11b94e1206a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Jun 2024 15:07:20 -0400 Subject: function_graph: Move set_graph_function tests to shadow stack global var The use of the task->trace_recursion for the logic used for the set_graph_function was a bit of an abuse of that variable. Now that there exists global vars that are per stack for registered graph traces, use that instead. Link: https://lore.kernel.org/linux-trace-kernel/171509105520.162236.10339831553995971290.stgit@devnote2 Link: https://lore.kernel.org/linux-trace-kernel/20240603190823.472955399@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_recursion.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index 24ea8ac049b4..02e6afc6d7fe 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -44,9 +44,6 @@ enum { */ TRACE_IRQ_BIT, - /* Set if the function is in the set_graph_function file */ - TRACE_GRAPH_BIT, - /* * In the very unlikely case that an interrupt came in * at a start of graph tracing, and we want to trace @@ -60,7 +57,7 @@ enum { * that preempted a softirq start of a function that * preempted normal context!!!! Luckily, it can't be * greater than 3, so the next two bits are a mask - * of what the depth is when we set TRACE_GRAPH_BIT + * of what the depth is when we set TRACE_GRAPH_FL */ TRACE_GRAPH_DEPTH_START_BIT, -- cgit v1.2.3 From 068da098eb504469dc195137ae35eeacfe0c8de9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Jun 2024 15:07:21 -0400 Subject: function_graph: Move graph depth stored data to shadow stack global var The use of the task->trace_recursion for the logic used for the function graph depth was a bit of an abuse of that variable. Now that there exists global vars that are per stack for registered graph traces, use that instead. Link: https://lore.kernel.org/linux-trace-kernel/171509106728.162236.2398372644430125344.stgit@devnote2 Link: https://lore.kernel.org/linux-trace-kernel/20240603190823.634870264@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_recursion.h | 29 ----------------------------- 1 file changed, 29 deletions(-) (limited to 'include') diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index 02e6afc6d7fe..fdfb6f66718a 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -44,25 +44,6 @@ enum { */ TRACE_IRQ_BIT, - /* - * In the very unlikely case that an interrupt came in - * at a start of graph tracing, and we want to trace - * the function in that interrupt, the depth can be greater - * than zero, because of the preempted start of a previous - * trace. In an even more unlikely case, depth could be 2 - * if a softirq interrupted the start of graph tracing, - * followed by an interrupt preempting a start of graph - * tracing in the softirq, and depth can even be 3 - * if an NMI came in at the start of an interrupt function - * that preempted a softirq start of a function that - * preempted normal context!!!! Luckily, it can't be - * greater than 3, so the next two bits are a mask - * of what the depth is when we set TRACE_GRAPH_FL - */ - - TRACE_GRAPH_DEPTH_START_BIT, - TRACE_GRAPH_DEPTH_END_BIT, - /* * To implement set_graph_notrace, if this bit is set, we ignore * function graph tracing of called functions, until the return @@ -78,16 +59,6 @@ enum { #define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(1<<(bit)); } while (0) #define trace_recursion_test(bit) ((current)->trace_recursion & (1<<(bit))) -#define trace_recursion_depth() \ - (((current)->trace_recursion >> TRACE_GRAPH_DEPTH_START_BIT) & 3) -#define trace_recursion_set_depth(depth) \ - do { \ - current->trace_recursion &= \ - ~(3 << TRACE_GRAPH_DEPTH_START_BIT); \ - current->trace_recursion |= \ - ((depth) & 3) << TRACE_GRAPH_DEPTH_START_BIT; \ - } while (0) - #define TRACE_CONTEXT_BITS 4 #define TRACE_FTRACE_START TRACE_FTRACE_BIT -- cgit v1.2.3 From b84214890a9bc56f0fe4ec4fc72f2307ed05096d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Jun 2024 15:07:22 -0400 Subject: function_graph: Move graph notrace bit to shadow stack global var The use of the task->trace_recursion for the logic used for the function graph no-trace was a bit of an abuse of that variable. Now that there exists global vars that are per stack for registered graph traces, use that instead. Link: https://lore.kernel.org/linux-trace-kernel/171509107907.162236.6564679266777519065.stgit@devnote2 Link: https://lore.kernel.org/linux-trace-kernel/20240603190823.796709456@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_recursion.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index fdfb6f66718a..ae04054a1be3 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -44,13 +44,6 @@ enum { */ TRACE_IRQ_BIT, - /* - * To implement set_graph_notrace, if this bit is set, we ignore - * function graph tracing of called functions, until the return - * function is called to clear it. - */ - TRACE_GRAPH_NOTRACE_BIT, - /* Used to prevent recursion recording from recursing. */ TRACE_RECORD_RECURSION_BIT, }; -- cgit v1.2.3 From 91c46b0aa917546432b5b219494859cda0edc39e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Jun 2024 15:07:23 -0400 Subject: function_graph: Implement fgraph_reserve_data() and fgraph_retrieve_data() Added functions that can be called by a fgraph_ops entryfunc and retfunc to store state between the entry of the function being traced to the exit of the same function. The fgraph_ops entryfunc() may call fgraph_reserve_data() to store up to 32 words onto the task's shadow ret_stack and this then can be retrieved by fgraph_retrieve_data() called by the corresponding retfunc(). Co-developed with Masami Hiramatsu: Link: https://lore.kernel.org/linux-trace-kernel/171509109089.162236.11372474169781184034.stgit@devnote2 Link: https://lore.kernel.org/linux-trace-kernel/20240603190823.959703050@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 82cfc8e09cfd..9f61556a9491 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1053,6 +1053,9 @@ struct fgraph_ops { int idx; }; +void *fgraph_reserve_data(int idx, int size_bytes); +void *fgraph_retrieve_data(int idx, int *size_bytes); + /* * Stack of return addresses for functions * of a thread. -- cgit v1.2.3 From 3aee48a8e01f98d3285a0064285b0152cdbb8a9e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 8 May 2024 21:46:57 +0300 Subject: misc: eeprom_93xx46: Hide legacy platform data in the driver First of all, there is no user for the platform data in the kernel. Second, it needs a lot of updates to follow the modern standards of the kernel, including proper Device Tree bindings and device property handling. For now, just hide the legacy platform data in the driver's code. Signed-off-by: Andy Shevchenko Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20240508184905.2102633-5-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/eeprom_93xx46.h | 32 -------------------------------- 1 file changed, 32 deletions(-) delete mode 100644 include/linux/eeprom_93xx46.h (limited to 'include') diff --git a/include/linux/eeprom_93xx46.h b/include/linux/eeprom_93xx46.h deleted file mode 100644 index 34c2175e6a1e..000000000000 --- a/include/linux/eeprom_93xx46.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Module: eeprom_93xx46 - * platform description for 93xx46 EEPROMs. - */ -#include - -struct eeprom_93xx46_platform_data { - unsigned char flags; -#define EE_ADDR8 0x01 /* 8 bit addr. cfg */ -#define EE_ADDR16 0x02 /* 16 bit addr. cfg */ -#define EE_READONLY 0x08 /* forbid writing */ -#define EE_SIZE1K 0x10 /* 1 kb of data, that is a 93xx46 */ -#define EE_SIZE2K 0x20 /* 2 kb of data, that is a 93xx56 */ -#define EE_SIZE4K 0x40 /* 4 kb of data, that is a 93xx66 */ - - unsigned int quirks; -/* Single word read transfers only; no sequential read. */ -#define EEPROM_93XX46_QUIRK_SINGLE_WORD_READ (1 << 0) -/* Instructions such as EWEN are (addrlen + 2) in length. */ -#define EEPROM_93XX46_QUIRK_INSTRUCTION_LENGTH (1 << 1) -/* Add extra cycle after address during a read */ -#define EEPROM_93XX46_QUIRK_EXTRA_READ_CYCLE BIT(2) - - /* - * optional hooks to control additional logic - * before and after spi transfer. - */ - void (*prepare)(void *); - void (*finish)(void *); - struct gpio_desc *select; -}; -- cgit v1.2.3 From 1968845d358e108cfbfba45538d64b3cbdf04ac2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 31 May 2024 17:51:29 +0300 Subject: driver core: device.h: Group of_node handling declarations and definitions There are a few of_node related APIs defined in the driver core. Group the respective declarations and definitions in the header. There is no functional change. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240531145129.1506733-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index fc3bd7116ab9..56f266429229 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1031,13 +1031,6 @@ static inline void device_lock_assert(struct device *dev) lockdep_assert_held(&dev->mutex); } -static inline struct device_node *dev_of_node(struct device *dev) -{ - if (!IS_ENABLED(CONFIG_OF) || !dev) - return NULL; - return dev->of_node; -} - static inline bool dev_has_sync_state(struct device *dev) { if (!dev) @@ -1144,10 +1137,18 @@ void unlock_device_hotplug(void); int lock_device_hotplug_sysfs(void); int device_offline(struct device *dev); int device_online(struct device *dev); + void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode); void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode); -void device_set_of_node_from_dev(struct device *dev, const struct device *dev2); void device_set_node(struct device *dev, struct fwnode_handle *fwnode); +void device_set_of_node_from_dev(struct device *dev, const struct device *dev2); + +static inline struct device_node *dev_of_node(struct device *dev) +{ + if (!IS_ENABLED(CONFIG_OF) || !dev) + return NULL; + return dev->of_node; +} static inline int dev_num_vf(struct device *dev) { -- cgit v1.2.3 From fd7179ece035417f44f7ecff086d6df674d8a5bd Mon Sep 17 00:00:00 2001 From: David Lechner Date: Thu, 30 May 2024 10:14:08 -0500 Subject: iio: introduce struct iio_scan_type This gives the channel scan_type a named type so that it can be used to simplify code in later commits. Signed-off-by: David Lechner Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20240530-iio-add-support-for-multiple-scan-types-v3-1-cbc4acea2cfa@baylibre.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 55e2b22086a1..19de573a944a 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -173,6 +173,27 @@ struct iio_event_spec { unsigned long mask_shared_by_all; }; +/** + * struct iio_scan_type - specification for channel data format in buffer + * @sign: 's' or 'u' to specify signed or unsigned + * @realbits: Number of valid bits of data + * @storagebits: Realbits + padding + * @shift: Shift right by this before masking out realbits. + * @repeat: Number of times real/storage bits repeats. When the + * repeat element is more than 1, then the type element in + * sysfs will show a repeat value. Otherwise, the number + * of repetitions is omitted. + * @endianness: little or big endian + */ +struct iio_scan_type { + char sign; + u8 realbits; + u8 storagebits; + u8 shift; + u8 repeat; + enum iio_endian endianness; +}; + /** * struct iio_chan_spec - specification of a single channel * @type: What type of measurement is the channel making. @@ -184,17 +205,6 @@ struct iio_event_spec { * @scan_index: Monotonic index to give ordering in scans when read * from a buffer. * @scan_type: struct describing the scan type - * @scan_type.sign: 's' or 'u' to specify signed or unsigned - * @scan_type.realbits: Number of valid bits of data - * @scan_type.storagebits: Realbits + padding - * @scan_type.shift: Shift right by this before masking out - * realbits. - * @scan_type.repeat: Number of times real/storage bits repeats. - * When the repeat element is more than 1, then - * the type element in sysfs will show a repeat - * value. Otherwise, the number of repetitions - * is omitted. - * @scan_type.endianness: little or big endian * @info_mask_separate: What information is to be exported that is specific to * this channel. * @info_mask_separate_available: What availability information is to be @@ -245,14 +255,7 @@ struct iio_chan_spec { int channel2; unsigned long address; int scan_index; - struct { - char sign; - u8 realbits; - u8 storagebits; - u8 shift; - u8 repeat; - enum iio_endian endianness; - } scan_type; + struct iio_scan_type scan_type; long info_mask_separate; long info_mask_separate_available; long info_mask_shared_by_type; -- cgit v1.2.3 From d8f2bb50845f2797f594ffe3cac9417abff4d7b0 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Thu, 30 May 2024 10:14:10 -0500 Subject: iio: add support for multiple scan types per channel This adds new fields to the iio_channel structure to support multiple scan types per channel. This is useful for devices that support multiple resolution modes or other modes that require different data formats of the raw data. To make use of this, drivers need to implement the new callback get_current_scan_type() to resolve the scan type for a given channel based on the current state of the driver. There is a new scan_type_ext field in the iio_channel structure that should be used to store the scan types for any channel that has more than one. There is also a new flag has_ext_scan_type that acts as a type discriminator for the scan_type/ext_scan_type union. A union is used so that we don't grow the size of the iio_channel structure and also makes it clear that scan_type and ext_scan_type are mutually exclusive. The buffer code is the only code in the IIO core code that is using the scan_type field. This patch updates the buffer code to use the new iio_channel_validate_scan_type() function to ensure it is returning the correct scan type for the current state of the device when reading the sysfs attributes. The buffer validation code is also update to validate any additional scan types that are set in the scan_type_ext field. Part of that code is refactored to a new function to avoid duplication. Some userspace tools may need to be updated to re-read the scan type after writing any other attribute. During testing, we noticed that we had to restart iiod to get it to re-read the scan type after enabling oversampling on the ad7380 driver. Signed-off-by: David Lechner Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20240530-iio-add-support-for-multiple-scan-types-v3-3-cbc4acea2cfa@baylibre.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 55 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 19de573a944a..894309294182 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -204,7 +204,13 @@ struct iio_scan_type { * @address: Driver specific identifier. * @scan_index: Monotonic index to give ordering in scans when read * from a buffer. - * @scan_type: struct describing the scan type + * @scan_type: struct describing the scan type - mutually exclusive + * with ext_scan_type. + * @ext_scan_type: Used in rare cases where there is more than one scan + * format for a channel. When this is used, the flag + * has_ext_scan_type must be set and the driver must + * implement get_current_scan_type in struct iio_info. + * @num_ext_scan_type: Number of elements in ext_scan_type. * @info_mask_separate: What information is to be exported that is specific to * this channel. * @info_mask_separate_available: What availability information is to be @@ -248,6 +254,7 @@ struct iio_scan_type { * attributes but not for event codes. * @output: Channel is output. * @differential: Channel is differential. + * @has_ext_scan_type: True if ext_scan_type is used instead of scan_type. */ struct iio_chan_spec { enum iio_chan_type type; @@ -255,7 +262,13 @@ struct iio_chan_spec { int channel2; unsigned long address; int scan_index; - struct iio_scan_type scan_type; + union { + struct iio_scan_type scan_type; + struct { + const struct iio_scan_type *ext_scan_type; + unsigned int num_ext_scan_type; + }; + }; long info_mask_separate; long info_mask_separate_available; long info_mask_shared_by_type; @@ -273,6 +286,7 @@ struct iio_chan_spec { unsigned indexed:1; unsigned output:1; unsigned differential:1; + unsigned has_ext_scan_type:1; }; @@ -435,6 +449,9 @@ struct iio_trigger; /* forward declaration */ * for better event identification. * @validate_trigger: function to validate the trigger when the * current trigger gets changed. + * @get_current_scan_type: must be implemented by drivers that use ext_scan_type + * in the channel spec to return the index of the currently + * active ext_scan type for a channel. * @update_scan_mode: function to configure device and scan buffer when * channels have changed * @debugfs_reg_access: function to read or write register value of device @@ -519,6 +536,8 @@ struct iio_info { int (*validate_trigger)(struct iio_dev *indio_dev, struct iio_trigger *trig); + int (*get_current_scan_type)(const struct iio_dev *indio_dev, + const struct iio_chan_spec *chan); int (*update_scan_mode)(struct iio_dev *indio_dev, const unsigned long *scan_mask); int (*debugfs_reg_access)(struct iio_dev *indio_dev, @@ -804,6 +823,38 @@ static inline bool iio_read_acpi_mount_matrix(struct device *dev, } #endif +/** + * iio_get_current_scan_type - Get the current scan type for a channel + * @indio_dev: the IIO device to get the scan type for + * @chan: the channel to get the scan type for + * + * Most devices only have one scan type per channel and can just access it + * directly without calling this function. Core IIO code and drivers that + * implement ext_scan_type in the channel spec should use this function to + * get the current scan type for a channel. + * + * Returns: the current scan type for the channel or error. + */ +static inline const struct iio_scan_type +*iio_get_current_scan_type(const struct iio_dev *indio_dev, + const struct iio_chan_spec *chan) +{ + int ret; + + if (chan->has_ext_scan_type) { + ret = indio_dev->info->get_current_scan_type(indio_dev, chan); + if (ret < 0) + return ERR_PTR(ret); + + if (ret >= chan->num_ext_scan_type) + return ERR_PTR(-EINVAL); + + return &chan->ext_scan_type[ret]; + } + + return &chan->scan_type; +} + ssize_t iio_format_value(char *buf, unsigned int type, int size, int *vals); int iio_str_to_fixpoint(const char *str, int fract_mult, int *integer, -- cgit v1.2.3 From b6e6aca6c2b1b53fb4db4b672eaa2722a75aa6a2 Mon Sep 17 00:00:00 2001 From: Ramona Gradinariu Date: Mon, 27 May 2024 17:26:12 +0300 Subject: iio: imu: adis_buffer: Add buffer setup API with buffer attributes Add new API called devm_adis_setup_buffer_and_trigger_with_attrs() which also takes buffer attributes as a parameter. Rewrite devm_adis_setup_buffer_and_trigger() implementation such that it calls devm_adis_setup_buffer_and_trigger_with_attrs() with buffer attributes parameter NULL Reviewed-by: Nuno Sa Signed-off-by: Ramona Gradinariu Link: https://lore.kernel.org/r/20240527142618.275897-4-ramona.bolboaca13@gmail.com Signed-off-by: Jonathan Cameron --- include/linux/iio/imu/adis.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index 8898966bc0f0..0fe3a2f63033 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -21,6 +21,7 @@ #define ADIS_REG_PAGE_ID 0x00 struct adis; +struct iio_dev_attr; /** * struct adis_timeouts - ADIS chip variant timeouts @@ -515,11 +516,19 @@ int adis_single_conversion(struct iio_dev *indio_dev, #define ADIS_ROT_CHAN(mod, addr, si, info_sep, info_all, bits) \ ADIS_MOD_CHAN(IIO_ROT, mod, addr, si, info_sep, info_all, bits) +#define devm_adis_setup_buffer_and_trigger(adis, indio_dev, trigger_handler) \ + devm_adis_setup_buffer_and_trigger_with_attrs((adis), (indio_dev), \ + (trigger_handler), NULL, \ + NULL) + #ifdef CONFIG_IIO_ADIS_LIB_BUFFER int -devm_adis_setup_buffer_and_trigger(struct adis *adis, struct iio_dev *indio_dev, - irq_handler_t trigger_handler); +devm_adis_setup_buffer_and_trigger_with_attrs(struct adis *adis, + struct iio_dev *indio_dev, + irq_handler_t trigger_handler, + const struct iio_buffer_setup_ops *ops, + const struct iio_dev_attr **buffer_attrs); int devm_adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev); @@ -529,8 +538,11 @@ int adis_update_scan_mode(struct iio_dev *indio_dev, #else /* CONFIG_IIO_BUFFER */ static inline int -devm_adis_setup_buffer_and_trigger(struct adis *adis, struct iio_dev *indio_dev, - irq_handler_t trigger_handler) +devm_adis_setup_buffer_and_trigger_with_attrs(struct adis *adis, + struct iio_dev *indio_dev, + irq_handler_t trigger_handler, + const struct iio_buffer_setup_ops *ops, + const struct iio_dev_attr **buffer_attrs) { return 0; } -- cgit v1.2.3 From 01724ce2d9405b2246bbb69701c74880eb56a34b Mon Sep 17 00:00:00 2001 From: Ramona Gradinariu Date: Mon, 27 May 2024 17:26:15 +0300 Subject: iio: imu: adis_trigger: Allow level interrupts for FIFO readings Currently, adis library allows configuration only for edge interrupts, needed for data ready sampling. This patch removes the restriction for level interrupts for devices which have FIFO support. Furthermore, in case of devices which have FIFO support, devm_request_threaded_irq is used for interrupt allocation, to avoid flooding the processor with the FIFO watermark level interrupt, which is active until enough data has been read from the FIFO. Reviewed-by: Nuno Sa Signed-off-by: Ramona Gradinariu Link: https://lore.kernel.org/r/20240527142618.275897-7-ramona.bolboaca13@gmail.com Signed-off-by: Jonathan Cameron --- include/linux/iio/imu/adis.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index 0fe3a2f63033..4bb0a53cf7ea 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -85,6 +85,7 @@ struct adis_data { bool unmasked_drdy; bool has_paging; + bool has_fifo; unsigned int burst_reg_cmd; unsigned int burst_len; -- cgit v1.2.3 From 175d1825ca4d2288fee734ada0955a1e36dd50e6 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sat, 1 Jun 2024 06:22:43 +0900 Subject: scsi: ufs: pci: Add support MCQ for QEMU-based UFS Recently, ufs-mcq feature has been introduced to QEMU hw/ufs device [1]. This patch adds MCQ support for upstream QEMU UFS PCI controller. This patch provides mandatory vops callbacks to make UFS controller work properly on MCQ mode. Operation and Runtime Config register stride is fixed to 48bytes which is implemented by qemu. [1] https://lore.kernel.org/qemu-devel/cover.1716876237.git.jeuk20.kim@samsung.com/ Signed-off-by: Minwoo Im Link: https://lore.kernel.org/r/20240531212244.1593535-2-minwoo.im@samsung.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index df68fb1d4f3f..9e0581115b34 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1278,6 +1278,7 @@ void ufshcd_update_evt_hist(struct ufs_hba *hba, u32 id, u32 val); void ufshcd_hba_stop(struct ufs_hba *hba); void ufshcd_schedule_eh_work(struct ufs_hba *hba); void ufshcd_mcq_config_mac(struct ufs_hba *hba, u32 max_active_cmds); +unsigned int ufshcd_mcq_queue_cfg_addr(struct ufs_hba *hba); u32 ufshcd_mcq_read_cqis(struct ufs_hba *hba, int i); void ufshcd_mcq_write_cqis(struct ufs_hba *hba, u32 val, int i); unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba, -- cgit v1.2.3 From ba098ed9829c5de4d65e5708d08d3508f2e70a7d Mon Sep 17 00:00:00 2001 From: Daisuke Nojiri Date: Tue, 4 Jun 2024 10:01:48 -0700 Subject: platform/chrome: Add struct ec_response_get_next_event_v3 Add struct ec_response_get_next_event_v3 to upgrade EC_CMD_GET_NEXT_EVENT to version 3. Signed-off-by: Daisuke Nojiri Link: https://lore.kernel.org/r/20240604170552.2517189-1-dnojiri@chromium.org Signed-off-by: Tzung-Bi Shih --- include/linux/platform_data/cros_ec_commands.h | 34 ++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index ecc47d5fe239..ec598057d1da 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -3463,6 +3463,34 @@ union __ec_align_offset1 ec_response_get_next_data_v1 { }; BUILD_ASSERT(sizeof(union ec_response_get_next_data_v1) == 16); +union __ec_align_offset1 ec_response_get_next_data_v3 { + uint8_t key_matrix[18]; + + /* Unaligned */ + uint32_t host_event; + uint64_t host_event64; + + struct __ec_todo_unpacked { + /* For aligning the fifo_info */ + uint8_t reserved[3]; + struct ec_response_motion_sense_fifo_info info; + } sensor_fifo; + + uint32_t buttons; + + uint32_t switches; + + uint32_t fp_events; + + uint32_t sysrq; + + /* CEC events from enum mkbp_cec_event */ + uint32_t cec_events; + + uint8_t cec_message[16]; +}; +BUILD_ASSERT(sizeof(union ec_response_get_next_data_v3) == 18); + struct ec_response_get_next_event { uint8_t event_type; /* Followed by event data if any */ @@ -3475,6 +3503,12 @@ struct ec_response_get_next_event_v1 { union ec_response_get_next_data_v1 data; } __ec_align1; +struct ec_response_get_next_event_v3 { + uint8_t event_type; + /* Followed by event data if any */ + union ec_response_get_next_data_v3 data; +} __ec_align1; + /* Bit indices for buttons and switches.*/ /* Buttons */ #define EC_MKBP_POWER_BUTTON 0 -- cgit v1.2.3 From 106d6739823369c734a8fc3b13634274eee4f60e Mon Sep 17 00:00:00 2001 From: Daisuke Nojiri Date: Tue, 4 Jun 2024 16:08:25 -0700 Subject: platform/chrome: cros_ec_proto: Upgrade get_next_event to v3 Upgrade EC_CMD_GET_NEXT_EVENT to version 3. The max supported version will be v3. So, we speak v3 even if the EC says it supports v4+. Signed-off-by: Daisuke Nojiri Link: https://lore.kernel.org/r/20240604230837.2878737-1-dnojiri@chromium.org [tzungbi: uint32_t -> u32 per suggested by checkpatch.pl] Signed-off-by: Tzung-Bi Shih --- include/linux/platform_data/cros_ec_proto.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 1ddc52603f9a..6e9225bdf903 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -185,7 +185,7 @@ struct cros_ec_device { bool host_sleep_v1; struct blocking_notifier_head event_notifier; - struct ec_response_get_next_event_v1 event_data; + struct ec_response_get_next_event_v3 event_data; int event_size; u32 host_event_wake_mask; u32 last_resume_result; -- cgit v1.2.3 From 93bbbcb1e762a49fc18ee1272545b77371595f1e Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sat, 25 May 2024 02:30:39 +0000 Subject: mm/memblock: fix a typo in description of for_each_mem_region() No functional change. Signed-off-by: Wei Yang Link: https://lore.kernel.org/r/20240525023040.13509-2-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (IBM) --- include/linux/memblock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index e2082240586d..6cf18dc2b4d0 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -565,7 +565,7 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo } /** - * for_each_mem_region - itereate over memory regions + * for_each_mem_region - iterate over memory regions * @region: loop variable */ #define for_each_mem_region(region) \ -- cgit v1.2.3 From 172e422ffea20a89bfdc672741c1aad6fbb5044e Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 12 May 2024 13:30:07 +0200 Subject: fsnotify: clear PARENT_WATCHED flags lazily In some setups directories can have many (usually negative) dentries. Hence __fsnotify_update_child_dentry_flags() function can take a significant amount of time. Since the bulk of this function happens under inode->i_lock this causes a significant contention on the lock when we remove the watch from the directory as the __fsnotify_update_child_dentry_flags() call from fsnotify_recalc_mask() races with __fsnotify_update_child_dentry_flags() calls from __fsnotify_parent() happening on children. This can lead upto softlockup reports reported by users. Fix the problem by calling fsnotify_update_children_dentry_flags() to set PARENT_WATCHED flags only when parent starts watching children. When parent stops watching children, clear false positive PARENT_WATCHED flags lazily in __fsnotify_parent() for each accessed child. Suggested-by: Jan Kara Signed-off-by: Amir Goldstein Signed-off-by: Stephen Brennan Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 4dd6143db271..8be029bc50b1 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -594,12 +594,14 @@ static inline __u32 fsnotify_parent_needed_mask(__u32 mask) static inline int fsnotify_inode_watches_children(struct inode *inode) { + __u32 parent_mask = READ_ONCE(inode->i_fsnotify_mask); + /* FS_EVENT_ON_CHILD is set if the inode may care */ - if (!(inode->i_fsnotify_mask & FS_EVENT_ON_CHILD)) + if (!(parent_mask & FS_EVENT_ON_CHILD)) return 0; /* this inode might care about child events, does it care about the * specific set of events that can happen on a child? */ - return inode->i_fsnotify_mask & FS_EVENTS_POSS_ON_CHILD; + return parent_mask & FS_EVENTS_POSS_ON_CHILD; } /* @@ -613,7 +615,7 @@ static inline void fsnotify_update_flags(struct dentry *dentry) /* * Serialisation of setting PARENT_WATCHED on the dentries is provided * by d_lock. If inotify_inode_watched changes after we have taken - * d_lock, the following __fsnotify_update_child_dentry_flags call will + * d_lock, the following fsnotify_set_children_dentry_flags call will * find our entry, so it will spin until we complete here, and update * us with the new state. */ -- cgit v1.2.3 From 61e2bbafb00e4b9a5de45e6448a7b6b818658576 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Fri, 31 May 2024 23:46:34 +0800 Subject: net: remove NULL-pointer net parameter in ip_metrics_convert When I was doing some experiments, I found that when using the first parameter, namely, struct net, in ip_metrics_convert() always triggers NULL pointer crash. Then I digged into this part, realizing that we can remove this one due to its uselessness. Signed-off-by: Jason Xing Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/ip.h | 3 +-- include/net/tcp.h | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 6d735e00d3f3..c5606cadb1a5 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -506,8 +506,7 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk, return mtu - lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu); } -struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx, - int fc_mx_len, +struct dst_metrics *ip_fib_metrics_init(struct nlattr *fc_mx, int fc_mx_len, struct netlink_ext_ack *extack); static inline void ip_fib_metrics_put(struct dst_metrics *fib_metrics) { diff --git a/include/net/tcp.h b/include/net/tcp.h index 08c3b99501cf..a70fc39090fe 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1224,7 +1224,7 @@ extern struct tcp_congestion_ops tcp_reno; struct tcp_congestion_ops *tcp_ca_find(const char *name); struct tcp_congestion_ops *tcp_ca_find_key(u32 key); -u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca); +u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca); #ifdef CONFIG_INET char *tcp_ca_get_name_by_key(u32 key, char *buffer); #else -- cgit v1.2.3 From 6f49c3fb563c0a95a838216eaf7d9b02ece44bf5 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 1 Jun 2024 00:29:17 +0100 Subject: net: caif: remove unused structs 'cfpktq' has been unused since commit 73d6ac633c6c ("caif: code cleanup"). 'caif_packet_funcs' is declared but never defined. Remove both of them. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Jacob Keller Signed-off-by: David S. Miller --- include/net/caif/caif_layer.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/caif/caif_layer.h b/include/net/caif/caif_layer.h index 51f7bb42a936..0f45d875905f 100644 --- a/include/net/caif/caif_layer.h +++ b/include/net/caif/caif_layer.h @@ -11,9 +11,7 @@ struct cflayer; struct cfpkt; -struct cfpktq; struct caif_payload_info; -struct caif_packet_funcs; #define CAIF_LAYER_NAME_SZ 16 -- cgit v1.2.3 From 310ec03841a36e3f45fb528f0dfdfe5b9e84b037 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Wed, 5 Jun 2024 13:26:05 +1200 Subject: dma-buf: align fd_flags and heap_flags with dma_heap_allocation_data dma_heap_allocation_data defines the UAPI as follows: struct dma_heap_allocation_data { __u64 len; __u32 fd; __u32 fd_flags; __u64 heap_flags; }; However, dma_heap_buffer_alloc() casts both fd_flags and heap_flags into unsigned int. We're inconsistent with types in the non UAPI arguments. This patch fixes it. Signed-off-by: Barry Song Acked-by: John Stultz Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20240605012605.5341-1-21cnbao@gmail.com --- include/uapi/linux/dma-heap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/dma-heap.h b/include/uapi/linux/dma-heap.h index 6f84fa08e074..a4cf716a49fa 100644 --- a/include/uapi/linux/dma-heap.h +++ b/include/uapi/linux/dma-heap.h @@ -19,7 +19,7 @@ #define DMA_HEAP_VALID_FD_FLAGS (O_CLOEXEC | O_ACCMODE) /* Currently no heap flags */ -#define DMA_HEAP_VALID_HEAP_FLAGS (0) +#define DMA_HEAP_VALID_HEAP_FLAGS (0ULL) /** * struct dma_heap_allocation_data - metadata passed from userspace for -- cgit v1.2.3 From 82dc29b9737edf2d13561ebcf6212c0b88c41129 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 2 Jun 2024 16:18:52 +0200 Subject: devlink: Constify the 'table_ops' parameter of devl_dpipe_table_register() "struct devlink_dpipe_table_ops" only contains some function pointers. Update "struct devlink_dpipe_table" and the 'table_ops' parameter of devl_dpipe_table_register() so that structures in drivers can be constified. Constifying these structures will move some data to a read-only section, so increase overall security. Signed-off-by: Christophe JAILLET Reviewed-by: Wojciech Drewek Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/devlink.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 35eb0f884386..db5eff6cb60f 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -352,7 +352,7 @@ struct devlink_dpipe_table { bool resource_valid; u64 resource_id; u64 resource_units; - struct devlink_dpipe_table_ops *table_ops; + const struct devlink_dpipe_table_ops *table_ops; struct rcu_head rcu; }; @@ -1751,7 +1751,7 @@ void devl_sb_unregister(struct devlink *devlink, unsigned int sb_index); void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index); int devl_dpipe_table_register(struct devlink *devlink, const char *table_name, - struct devlink_dpipe_table_ops *table_ops, + const struct devlink_dpipe_table_ops *table_ops, void *priv, bool counter_control_extern); void devl_dpipe_table_unregister(struct devlink *devlink, const char *table_name); -- cgit v1.2.3 From 5bbe5872fed4497a192556426d75fd9223c5bfeb Mon Sep 17 00:00:00 2001 From: Xianwei Zhao Date: Wed, 29 May 2024 11:10:33 +0800 Subject: dt-bindings: power: add Amlogic A4 power domains Add devicetree binding document and related header file for Amlogic A4 secure power domains. Acked-by: Krzysztof Kozlowski Signed-off-by: Xianwei Zhao Link: https://lore.kernel.org/r/20240529-a4_secpowerdomain-v2-1-47502fc0eaf3@amlogic.com Signed-off-by: Ulf Hansson --- include/dt-bindings/power/amlogic,a4-pwrc.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 include/dt-bindings/power/amlogic,a4-pwrc.h (limited to 'include') diff --git a/include/dt-bindings/power/amlogic,a4-pwrc.h b/include/dt-bindings/power/amlogic,a4-pwrc.h new file mode 100644 index 000000000000..bd2f9c558d22 --- /dev/null +++ b/include/dt-bindings/power/amlogic,a4-pwrc.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR MIT) */ +/* + * Copyright (C) 2024 Amlogic, Inc. All rights reserved + */ +#ifndef _DT_BINDINGS_AMLOGIC_A4_POWER_H +#define _DT_BINDINGS_AMLOGIC_A4_POWER_H + +#define PWRC_A4_AUDIO_ID 0 +#define PWRC_A4_SDIOA_ID 1 +#define PWRC_A4_EMMC_ID 2 +#define PWRC_A4_USB_COMB_ID 3 +#define PWRC_A4_ETH_ID 4 +#define PWRC_A4_VOUT_ID 5 +#define PWRC_A4_AUDIO_PDM_ID 6 +#define PWRC_A4_DMC_ID 7 +#define PWRC_A4_SYS_WRAP_ID 8 +#define PWRC_A4_AO_I2C_S_ID 9 +#define PWRC_A4_AO_UART_ID 10 +#define PWRC_A4_AO_IR_ID 11 + +#endif -- cgit v1.2.3 From f086edef71be7174a16c1ed67ac65a085cda28b1 Mon Sep 17 00:00:00 2001 From: Kevin Yang Date: Mon, 3 Jun 2024 21:30:54 +0000 Subject: tcp: add sysctl_tcp_rto_min_us Adding a sysctl knob to allow user to specify a default rto_min at socket init time, other than using the hard coded 200ms default rto_min. Note that the rto_min route option has the highest precedence for configuring this setting, followed by the TCP_BPF_RTO_MIN socket option, followed by the tcp_rto_min_us sysctl. Signed-off-by: Kevin Yang Reviewed-by: Neal Cardwell Reviewed-by: Yuchung Cheng Reviewed-by: Eric Dumazet Reviewed-by: Tony Lu Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/netns/ipv4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index c356c458b340..a91bb971f901 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -170,6 +170,7 @@ struct netns_ipv4 { u8 sysctl_tcp_sack; u8 sysctl_tcp_window_scaling; u8 sysctl_tcp_timestamps; + int sysctl_tcp_rto_min_us; u8 sysctl_tcp_recovery; u8 sysctl_tcp_thin_linear_timeouts; u8 sysctl_tcp_slow_start_after_idle; -- cgit v1.2.3 From 6fef518594bcb7e374f809717281bd02894929f8 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Thu, 25 Apr 2024 15:07:01 -0700 Subject: KVM: x86: Add a capability to configure bus frequency for APIC timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add KVM_CAP_X86_APIC_BUS_CYCLES_NS capability to configure the APIC bus clock frequency for APIC timer emulation. Allow KVM_ENABLE_CAPABILITY(KVM_CAP_X86_APIC_BUS_CYCLES_NS) to set the frequency in nanoseconds. When using this capability, the user space VMM should configure CPUID leaf 0x15 to advertise the frequency. Vishal reported that the TDX guest kernel expects a 25MHz APIC bus frequency but ends up getting interrupts at a significantly higher rate. The TDX architecture hard-codes the core crystal clock frequency to 25MHz and mandates exposing it via CPUID leaf 0x15. The TDX architecture does not allow the VMM to override the value. In addition, per Intel SDM: "The APIC timer frequency will be the processor’s bus clock or core crystal clock frequency (when TSC/core crystal clock ratio is enumerated in CPUID leaf 0x15) divided by the value specified in the divide configuration register." The resulting 25MHz APIC bus frequency conflicts with the KVM hardcoded APIC bus frequency of 1GHz. The KVM doesn't enumerate CPUID leaf 0x15 to the guest unless the user space VMM sets it using KVM_SET_CPUID. If the CPUID leaf 0x15 is enumerated, the guest kernel uses it as the APIC bus frequency. If not, the guest kernel measures the frequency based on other known timers like the ACPI timer or the legacy PIT. As reported by Vishal the TDX guest kernel expects a 25MHz timer frequency but gets timer interrupt more frequently due to the 1GHz frequency used by KVM. To ensure that the guest doesn't have a conflicting view of the APIC bus frequency, allow the userspace to tell KVM to use the same frequency that TDX mandates instead of the default 1Ghz. Reported-by: Vishal Annapurve Closes: https://lore.kernel.org/lkml/20231006011255.4163884-1-vannapurve@google.com Signed-off-by: Isaku Yamahata Reviewed-by: Rick Edgecombe Co-developed-by: Reinette Chatre Signed-off-by: Reinette Chatre Reviewed-by: Xiaoyao Li Reviewed-by: Yuan Yao Link: https://lore.kernel.org/r/6748a4c12269e756f0c48680da8ccc5367c31ce7.1714081726.git.reinette.chatre@intel.com Signed-off-by: Sean Christopherson --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d03842abae57..ec998e6b6555 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -917,6 +917,7 @@ struct kvm_enable_cap { #define KVM_CAP_MEMORY_ATTRIBUTES 233 #define KVM_CAP_GUEST_MEMFD 234 #define KVM_CAP_VM_TYPES 235 +#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 236 struct kvm_irq_routing_irqchip { __u32 irqchip; -- cgit v1.2.3 From f0dc887f21d18791037c0166f652c67da761f16f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 27 May 2024 17:34:47 -0700 Subject: sched/core: Move preempt_model_*() helpers from sched.h to preempt.h Move the declarations and inlined implementations of the preempt_model_*() helpers to preempt.h so that they can be referenced in spinlock.h without creating a potential circular dependency between spinlock.h and sched.h. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ankur Arora Link: https://lkml.kernel.org/r/20240528003521.979836-2-ankur.a.arora@oracle.com --- include/linux/preempt.h | 41 +++++++++++++++++++++++++++++++++++++++++ include/linux/sched.h | 41 ----------------------------------------- 2 files changed, 41 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 7233e9cf1bab..ce76f1a45722 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -481,4 +481,45 @@ DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable()) DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace()) DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable()) +#ifdef CONFIG_PREEMPT_DYNAMIC + +extern bool preempt_model_none(void); +extern bool preempt_model_voluntary(void); +extern bool preempt_model_full(void); + +#else + +static inline bool preempt_model_none(void) +{ + return IS_ENABLED(CONFIG_PREEMPT_NONE); +} +static inline bool preempt_model_voluntary(void) +{ + return IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY); +} +static inline bool preempt_model_full(void) +{ + return IS_ENABLED(CONFIG_PREEMPT); +} + +#endif + +static inline bool preempt_model_rt(void) +{ + return IS_ENABLED(CONFIG_PREEMPT_RT); +} + +/* + * Does the preemption model allow non-cooperative preemption? + * + * For !CONFIG_PREEMPT_DYNAMIC kernels this is an exact match with + * CONFIG_PREEMPTION; for CONFIG_PREEMPT_DYNAMIC this doesn't work as the + * kernel is *built* with CONFIG_PREEMPTION=y but may run with e.g. the + * PREEMPT_NONE model. + */ +static inline bool preempt_model_preemptible(void) +{ + return preempt_model_full() || preempt_model_rt(); +} + #endif /* __LINUX_PREEMPT_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 61591ac6eab6..90691d99027e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2064,47 +2064,6 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock); __cond_resched_rwlock_write(lock); \ }) -#ifdef CONFIG_PREEMPT_DYNAMIC - -extern bool preempt_model_none(void); -extern bool preempt_model_voluntary(void); -extern bool preempt_model_full(void); - -#else - -static inline bool preempt_model_none(void) -{ - return IS_ENABLED(CONFIG_PREEMPT_NONE); -} -static inline bool preempt_model_voluntary(void) -{ - return IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY); -} -static inline bool preempt_model_full(void) -{ - return IS_ENABLED(CONFIG_PREEMPT); -} - -#endif - -static inline bool preempt_model_rt(void) -{ - return IS_ENABLED(CONFIG_PREEMPT_RT); -} - -/* - * Does the preemption model allow non-cooperative preemption? - * - * For !CONFIG_PREEMPT_DYNAMIC kernels this is an exact match with - * CONFIG_PREEMPTION; for CONFIG_PREEMPT_DYNAMIC this doesn't work as the - * kernel is *built* with CONFIG_PREEMPTION=y but may run with e.g. the - * PREEMPT_NONE model. - */ -static inline bool preempt_model_preemptible(void) -{ - return preempt_model_full() || preempt_model_rt(); -} - static __always_inline bool need_resched(void) { return unlikely(tif_need_resched()); -- cgit v1.2.3 From c793a62823d1ce8f70d9cfc7803e3ea436277cda Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 27 May 2024 17:34:48 -0700 Subject: sched/core: Drop spinlocks on contention iff kernel is preemptible Use preempt_model_preemptible() to detect a preemptible kernel when deciding whether or not to reschedule in order to drop a contended spinlock or rwlock. Because PREEMPT_DYNAMIC selects PREEMPTION, kernels built with PREEMPT_DYNAMIC=y will yield contended locks even if the live preemption model is "none" or "voluntary". In short, make kernels with dynamically selected models behave the same as kernels with statically selected models. Somewhat counter-intuitively, NOT yielding a lock can provide better latency for the relevant tasks/processes. E.g. KVM x86's mmu_lock, a rwlock, is often contended between an invalidation event (takes mmu_lock for write) and a vCPU servicing a guest page fault (takes mmu_lock for read). For _some_ setups, letting the invalidation task complete even if there is mmu_lock contention provides lower latency for *all* tasks, i.e. the invalidation completes sooner *and* the vCPU services the guest page fault sooner. But even KVM's mmu_lock behavior isn't uniform, e.g. the "best" behavior can vary depending on the host VMM, the guest workload, the number of vCPUs, the number of pCPUs in the host, why there is lock contention, etc. In other words, simply deleting the CONFIG_PREEMPTION guard (or doing the opposite and removing contention yielding entirely) needs to come with a big pile of data proving that changing the status quo is a net positive. Opportunistically document this side effect of preempt=full, as yielding contended spinlocks can have significant, user-visible impact. Fixes: c597bfddc9e9 ("sched: Provide Kconfig support for default dynamic preempt mode") Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ankur Arora Reviewed-by: Chen Yu Link: https://lore.kernel.org/kvm/ef81ff36-64bb-4cfe-ae9b-e3acf47bff24@proxmox.com --- include/linux/spinlock.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 3fcd20de6ca8..63dd8cf3c3c2 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -462,11 +462,10 @@ static __always_inline int spin_is_contended(spinlock_t *lock) */ static inline int spin_needbreak(spinlock_t *lock) { -#ifdef CONFIG_PREEMPTION + if (!preempt_model_preemptible()) + return 0; + return spin_is_contended(lock); -#else - return 0; -#endif } /* @@ -479,11 +478,10 @@ static inline int spin_needbreak(spinlock_t *lock) */ static inline int rwlock_needbreak(rwlock_t *lock) { -#ifdef CONFIG_PREEMPTION + if (!preempt_model_preemptible()) + return 0; + return rwlock_is_contended(lock); -#else - return 0; -#endif } /* -- cgit v1.2.3 From dff60734fc7606fabde668ab6a26feacec8787cc Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 4 Jun 2024 17:52:56 +0200 Subject: vfs: retire user_path_at_empty and drop empty arg from getname_flags No users after do_readlinkat started doing the job on its own. Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/r/20240604155257.109500-3-mjguzik@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 +- include/linux/namei.h | 8 +------- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 639885621608..bfc1e6407bf6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2701,7 +2701,7 @@ static inline struct file *file_clone_open(struct file *file) } extern int filp_close(struct file *, fl_owner_t id); -extern struct filename *getname_flags(const char __user *, int, int *); +extern struct filename *getname_flags(const char __user *, int); extern struct filename *getname_uflags(const char __user *, int); extern struct filename *getname(const char __user *); extern struct filename *getname_kernel(const char *); diff --git a/include/linux/namei.h b/include/linux/namei.h index 967aa9ea9f96..8ec8fed3bce8 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -50,13 +50,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT}; extern int path_pts(struct path *path); -extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty); - -static inline int user_path_at(int dfd, const char __user *name, unsigned flags, - struct path *path) -{ - return user_path_at_empty(dfd, name, flags, path, NULL); -} +extern int user_path_at(int, const char __user *, unsigned, struct path *); struct dentry *lookup_one_qstr_excl(const struct qstr *name, struct dentry *base, -- cgit v1.2.3 From 758191c9ea7bcc45dd99398a538ae4ab27c4029e Mon Sep 17 00:00:00 2001 From: Yoray Zack Date: Tue, 4 Jun 2024 00:22:17 +0300 Subject: net/mlx5e: SHAMPO, Use KSMs instead of KLMs KSM Mkey is KLM Mkey with a fixed buffer size. Due to this fact, it is a faster mechanism than KLM. SHAMPO feature used KLMs Mkeys for memory mappings of its headers buffer. As it used KLMs with the same buffer size for each entry, we can use KSMs instead. This commit changes the Mkeys that map the SHAMPO headers buffer from KLMs to KSMs. Signed-off-by: Yoray Zack Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240603212219.1037656-13-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/mlx5/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index d7bb31d9a446..da09bfaa7b81 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -294,6 +294,7 @@ enum { #define MLX5_UMR_FLEX_ALIGNMENT 0x40 #define MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_mtt)) #define MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_klm)) +#define MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_ksm)) #define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8) -- cgit v1.2.3 From 99be56171fa9ffea494dfe3d4a7f6e7e51630c2e Mon Sep 17 00:00:00 2001 From: Yoray Zack Date: Tue, 4 Jun 2024 00:22:18 +0300 Subject: net/mlx5e: SHAMPO, Re-enable HW-GRO Add back HW-GRO to the reported features. As the current implementation of HW-GRO uses KSMs with a specific fixed buffer size (256B) to map its headers buffer, we reported the feature only if the NIC is supporting KSM and the minimum value for buffer size is below the requested one. iperf3 bandwidth comparison: +---------+--------+--------+-----------+ | streams | SW GRO | HW GRO | Unit | |---------+--------+--------+-----------| | 1 | 36 | 42 | Gbits/sec | | 4 | 34 | 39 | Gbits/sec | | 8 | 31 | 35 | Gbits/sec | +---------+--------+--------+-----------+ A downstream patch will add skb fragment coalescing which will improve performance considerably. Benchmark details: VM based setup CPU: Intel(R) Xeon(R) Platinum 8380 CPU, 24 cores NIC: ConnectX-7 100GbE iperf3 and irq running on same CPU over a single receive queue Signed-off-by: Yoray Zack Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240603212219.1037656-14-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/mlx5/mlx5_ifc.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5df52e15f7d6..17acd0f3ca8e 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1526,8 +1526,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 ts_cqe_to_dest_cqn[0x1]; u8 reserved_at_b3[0x6]; u8 go_back_n[0x1]; - u8 shampo[0x1]; - u8 reserved_at_bb[0x5]; + u8 reserved_at_ba[0x6]; u8 max_sgl_for_optimized_performance[0x8]; u8 log_max_cq_sz[0x8]; @@ -1744,7 +1743,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_280[0x10]; u8 max_wqe_sz_sq[0x10]; - u8 reserved_at_2a0[0x10]; + u8 reserved_at_2a0[0xb]; + u8 shampo[0x1]; + u8 reserved_at_2ac[0x4]; u8 max_wqe_sz_rq[0x10]; u8 max_flow_counter_31_16[0x10]; @@ -2017,7 +2018,8 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_250[0x10]; u8 reserved_at_260[0x120]; - u8 reserved_at_380[0x10]; + u8 reserved_at_380[0xb]; + u8 min_mkey_log_entity_size_fixed_buffer[0x5]; u8 ec_vf_vport_base[0x10]; u8 reserved_at_3a0[0x10]; @@ -2029,7 +2031,11 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 pcc_ifa2[0x1]; u8 reserved_at_3f1[0xf]; - u8 reserved_at_400[0x400]; + u8 reserved_at_400[0x1]; + u8 min_mkey_log_entity_size_fixed_buffer_valid[0x1]; + u8 reserved_at_402[0x1e]; + + u8 reserved_at_420[0x3e0]; }; enum mlx5_ifc_flow_destination_type { -- cgit v1.2.3 From f1180fd2a7c039691b64ebf404c746a74e40b7b0 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 5 Jun 2024 07:13:39 +0000 Subject: mm/mm_init.c: not always search next deferred_init_pfn from very beginning In function deferred_init_memmap(), we call deferred_init_mem_pfn_range_in_zone() to get the next deferred_init_pfn. But we always search it from the very beginning. Since we save the index in i, we can leverage this to search from i next time. [rppt refine the comment] Signed-off-by: Wei Yang Link: https://lore.kernel.org/all/20240605071339.15330-1-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (IBM) --- include/linux/memblock.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 6cf18dc2b4d0..45cac33334c8 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -299,25 +299,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, unsigned long *out_spfn, unsigned long *out_epfn); -/** - * for_each_free_mem_pfn_range_in_zone - iterate through zone specific free - * memblock areas - * @i: u64 used as loop variable - * @zone: zone in which all of the memory blocks reside - * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL - * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL - * - * Walks over free (memory && !reserved) areas of memblock in a specific - * zone. Available once memblock and an empty zone is initialized. The main - * assumption is that the zone start, end, and pgdat have been associated. - * This way we can use the zone to determine NUMA node, and if a given part - * of the memblock is valid for the zone. - */ -#define for_each_free_mem_pfn_range_in_zone(i, zone, p_start, p_end) \ - for (i = 0, \ - __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end); \ - i != U64_MAX; \ - __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end)) /** * for_each_free_mem_pfn_range_in_zone_from - iterate through zone specific -- cgit v1.2.3 From b4cb4a1391dcdc640c4ade003aaf0ee19cc8d509 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 4 Jun 2024 11:16:03 +0000 Subject: net: use unrcu_pointer() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Toke mentioned unrcu_pointer() existence, allowing to remove some of the ugly casts we have when using xchg() for rcu protected pointers. Also make inet_rcv_compat const. Signed-off-by: Eric Dumazet Cc: Toke Høiland-Jørgensen Reviewed-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20240604111603.45871-1-edumazet@google.com Signed-off-by: Paolo Abeni --- include/net/sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 953c8dc4e259..b30ea0c342a6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2095,7 +2095,7 @@ sk_dst_set(struct sock *sk, struct dst_entry *dst) sk_tx_queue_clear(sk); WRITE_ONCE(sk->sk_dst_pending_confirm, 0); - old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst); + old_dst = unrcu_pointer(xchg(&sk->sk_dst_cache, RCU_INITIALIZER(dst))); dst_release(old_dst); } -- cgit v1.2.3 From c34506406dd5cfb352f8c53bb6a1b9535c0905dd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Jun 2024 07:15:51 +0000 Subject: tcp: small changes in reqsk_put() and reqsk_free() In reqsk_free(), use DEBUG_NET_WARN_ON_ONCE() instead of WARN_ON_ONCE() for a condition which never fired. In reqsk_put() directly call __reqsk_free(), there is no point checking rsk_refcnt again right after a transition to zero. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/request_sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index ebcb8896bffc..a8f82216c628 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -172,14 +172,14 @@ static inline void __reqsk_free(struct request_sock *req) static inline void reqsk_free(struct request_sock *req) { - WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0); + DEBUG_NET_WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0); __reqsk_free(req); } static inline void reqsk_put(struct request_sock *req) { if (refcount_dec_and_test(&req->rsk_refcnt)) - reqsk_free(req); + __reqsk_free(req); } /* -- cgit v1.2.3 From 6971d21672827a701c5ea180891b7ea6cf06f6a7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Jun 2024 07:15:53 +0000 Subject: tcp: move reqsk_alloc() to inet_connection_sock.c reqsk_alloc() has a single caller, no need to expose it in include/net/request_sock.h. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/request_sock.h | 33 --------------------------------- 1 file changed, 33 deletions(-) (limited to 'include') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index a8f82216c628..b07b1cd14e9f 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -128,39 +128,6 @@ static inline struct sock *skb_steal_sock(struct sk_buff *skb, return sk; } -static inline struct request_sock * -reqsk_alloc_noprof(const struct request_sock_ops *ops, struct sock *sk_listener, - bool attach_listener) -{ - struct request_sock *req; - - req = kmem_cache_alloc_noprof(ops->slab, GFP_ATOMIC | __GFP_NOWARN); - if (!req) - return NULL; - req->rsk_listener = NULL; - if (attach_listener) { - if (unlikely(!refcount_inc_not_zero(&sk_listener->sk_refcnt))) { - kmem_cache_free(ops->slab, req); - return NULL; - } - req->rsk_listener = sk_listener; - } - req->rsk_ops = ops; - req_to_sk(req)->sk_prot = sk_listener->sk_prot; - sk_node_init(&req_to_sk(req)->sk_node); - sk_tx_queue_clear(req_to_sk(req)); - req->saved_syn = NULL; - req->syncookie = 0; - req->timeout = 0; - req->num_timeout = 0; - req->num_retrans = 0; - req->sk = NULL; - refcount_set(&req->rsk_refcnt, 0); - - return req; -} -#define reqsk_alloc(...) alloc_hooks(reqsk_alloc_noprof(__VA_ARGS__)) - static inline void __reqsk_free(struct request_sock *req) { req->rsk_ops->destructor(req); -- cgit v1.2.3 From 3ed96977a3c5b0a9b017d626600402be3089d4fc Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 4 Jun 2024 13:54:38 -0400 Subject: drm/mm: Remove unused drm_mm_replace_node Last caller was removed with commit 078a5b498d6a ("drm/tests: Remove slow tests"). Cc: Maxime Ripard Acked-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240604175438.48125-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- include/drm/drm_mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index ac33ba1b18bc..f654874c4ce6 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -463,7 +463,6 @@ static inline int drm_mm_insert_node(struct drm_mm *mm, } void drm_mm_remove_node(struct drm_mm_node *node); -void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new); void drm_mm_init(struct drm_mm *mm, u64 start, u64 size); void drm_mm_takedown(struct drm_mm *mm); -- cgit v1.2.3 From ababa16fd9bd0e2727a1c31c4fb68d6be053bddc Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 6 Jun 2024 11:42:38 +0200 Subject: irqchip/gic-v3: Enable non-coherent redistributors/ITSes ACPI probing The GIC architecture specification defines a set of registers for redistributors and ITSes that control the sharebility and cacheability attributes of redistributors/ITSes initiator ports on the interconnect (GICR_[V]PROPBASER, GICR_[V]PENDBASER, GITS_BASER). Architecturally the GIC provides a means to drive shareability and cacheability attributes signals but it is not mandatory for designs to wire up the corresponding interconnect signals that control the cacheability/shareability of transactions. Redistributors and ITSes interconnect ports can be connected to non-coherent interconnects that are not able to manage the shareability/cacheability attributes; this implicitly makes the redistributors and ITSes non-coherent observers. To enable non-coherent GIC designs on ACPI based systems, parse the MADT GICC/GICR/ITS subtables non-coherent flags to determine whether the respective components are non-coherent observers and force the shareability attributes to be programmed into the redistributors and ITSes registers. An ACPI global function (acpi_get_madt_revision()) is added to retrieve the MADT revision, in that it is essential to check the MADT revision before checking for flags that were added with MADT revision 7 so that if the kernel is booted with an ACPI MADT table with revision < 7 it skips parsing the newly added flags (that should be zeroed reserved values for MADT versions < 7 but they could turn out to be buggy and should be ignored). Signed-off-by: Lorenzo Pieralisi Signed-off-by: Thomas Gleixner Reviewed-by: Robin Murphy Acked-by: Marc Zyngier Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20240606094238.757649-2-lpieralisi@kernel.org --- include/linux/acpi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 28c3fb2bef0d..000d339e1596 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -279,6 +279,9 @@ static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id) return phys_id == PHYS_CPUID_INVALID; } + +int __init acpi_get_madt_revision(void); + /* Validate the processor object's proc_id */ bool acpi_duplicate_processor_id(int proc_id); /* Processor _CTS control */ -- cgit v1.2.3 From 0ee14725471cea66e03e3cd4f4c582d759de502c Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 6 Jun 2024 15:46:09 +0100 Subject: mm/util: Swap kmemdup_array() arguments GCC 14.1 complains about the argument usage of kmemdup_array(): drivers/soc/tegra/fuse/fuse-tegra.c:130:65: error: 'kmemdup_array' sizes specified with 'sizeof' in the earlier argument and not in the later argument [-Werror=calloc-transposed-args] 130 | fuse->lookups = kmemdup_array(fuse->soc->lookups, sizeof(*fuse->lookups), | ^ drivers/soc/tegra/fuse/fuse-tegra.c:130:65: note: earlier argument should specify number of elements, later size of each element The annotation introduced by commit 7d78a7773355 ("string: Add additional __realloc_size() annotations for "dup" helpers") lets the compiler think that kmemdup_array() follows the same format as calloc(), with the number of elements preceding the size of one element. So we could simply swap the arguments to __realloc_size() to get rid of that warning, but it seems cleaner to instead have kmemdup_array() follow the same format as krealloc_array(), memdup_array_user(), calloc() etc. Fixes: 7d78a7773355 ("string: Add additional __realloc_size() annotations for "dup" helpers") Signed-off-by: Jean-Philippe Brucker Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240606144608.97817-2-jean-philippe@linaro.org Signed-off-by: Kees Cook --- include/linux/string.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/string.h b/include/linux/string.h index 60168aa2af07..9edace076ddb 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -289,7 +289,7 @@ extern void *kmemdup_noprof(const void *src, size_t len, gfp_t gfp) __realloc_si extern void *kvmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2); extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp); -extern void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp) +extern void *kmemdup_array(const void *src, size_t count, size_t element_size, gfp_t gfp) __realloc_size(2, 3); /* lib/argv_split.c */ -- cgit v1.2.3 From 0d11307022978f1f395da587285c06c9cea47288 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 23 May 2024 19:44:29 +0200 Subject: drm/print: Add missing [drm] prefix to drm based WARN All drm_device based logging macros, except those related to WARN, include the [drm] prefix. Fix that. [ ] 0000:00:00.0: this is a warning [ ] 0000:00:00.0: drm_WARN_ON(true) vs [ ] 0000:00:00.0: [drm] this is a warning [ ] 0000:00:00.0: [drm] drm_WARN_ON(true) Signed-off-by: Michal Wajdeczko Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240523174429.800-1-michal.wajdeczko@intel.com Signed-off-by: Rodrigo Vivi --- include/drm/drm_print.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index 089950ad8681..112f8830b372 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -632,12 +632,12 @@ void __drm_err(const char *format, ...); /* Helper for struct drm_device based WARNs */ #define drm_WARN(drm, condition, format, arg...) \ - WARN(condition, "%s %s: " format, \ + WARN(condition, "%s %s: [drm] " format, \ dev_driver_string((drm)->dev), \ dev_name((drm)->dev), ## arg) #define drm_WARN_ONCE(drm, condition, format, arg...) \ - WARN_ONCE(condition, "%s %s: " format, \ + WARN_ONCE(condition, "%s %s: [drm] " format, \ dev_driver_string((drm)->dev), \ dev_name((drm)->dev), ## arg) -- cgit v1.2.3 From 0d5edcc60abe9a02501f01e032bfa2432c1364de Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 16 May 2024 18:00:15 +0200 Subject: drm/print: Kill ___drm_dbg() There is no point in maintaining a separate print function, while there is __drm_dev_dbg() function that can work with a NULL device. Signed-off-by: Michal Wajdeczko Cc: Jani Nikula Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240516160015.2260-1-michal.wajdeczko@intel.com Signed-off-by: Rodrigo Vivi --- include/drm/drm_print.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index 112f8830b372..679a2086fbea 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -527,17 +527,15 @@ void __drm_dev_dbg(struct _ddebug *desc, const struct device *dev, * Prefer drm_device based logging over device or prink based logging. */ -__printf(3, 4) -void ___drm_dbg(struct _ddebug *desc, enum drm_debug_category category, const char *format, ...); __printf(1, 2) void __drm_err(const char *format, ...); #if !defined(CONFIG_DRM_USE_DYNAMIC_DEBUG) -#define __drm_dbg(cat, fmt, ...) ___drm_dbg(NULL, cat, fmt, ##__VA_ARGS__) +#define __drm_dbg(cat, fmt, ...) __drm_dev_dbg(NULL, NULL, cat, fmt, ##__VA_ARGS__) #else #define __drm_dbg(cat, fmt, ...) \ - _dynamic_func_call_cls(cat, fmt, ___drm_dbg, \ - cat, fmt, ##__VA_ARGS__) + _dynamic_func_call_cls(cat, fmt, __drm_dev_dbg, \ + NULL, cat, fmt, ##__VA_ARGS__) #endif /* Macros to make printk easier */ -- cgit v1.2.3 From c2ef66e9ad882ab4b055a86657c20c61d203f003 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 17 May 2024 18:34:05 +0200 Subject: drm/print: Improve drm_dbg_printer With recent introduction of a generic drm dev printk function, we can now store and use location where drm_dbg_printer was invoked and output it's symbolic name like we do for all drm debug prints. Cc: Jani Nikula Reviewed-by: Jani Nikula Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240517163406.2348-3-michal.wajdeczko@intel.com Signed-off-by: Rodrigo Vivi --- include/drm/drm_print.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index 679a2086fbea..5d9dff5149c9 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -175,6 +175,7 @@ struct drm_printer { void (*printfn)(struct drm_printer *p, struct va_format *vaf); void (*puts)(struct drm_printer *p, const char *str); void *arg; + const void *origin; const char *prefix; enum drm_debug_category category; }; @@ -332,6 +333,7 @@ static inline struct drm_printer drm_dbg_printer(struct drm_device *drm, struct drm_printer p = { .printfn = __drm_printfn_dbg, .arg = drm, + .origin = (const void *)_THIS_IP_, /* it's fine as we will be inlined */ .prefix = prefix, .category = category, }; -- cgit v1.2.3 From 1d5f0222944fe723b9bfaaeb27e368363644ccab Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 5 Jun 2024 16:26:45 -0400 Subject: ftrace: Declare function_trace_op in header to quiet sparse warning Sparse complains that function_trace_op is not static but is not declared in a header file. It is used only in assembly code. But add it to a header so that sparse no longer complains: kernel/trace/ftrace.c:99:19: warning: symbol 'function_trace_op' was not declared. Should it be static? Link: https://lore.kernel.org/linux-trace-kernel/20240605202708.289105647@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9f61556a9491..4135dc171447 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1131,6 +1131,9 @@ extern void ftrace_graph_init_task(struct task_struct *t); extern void ftrace_graph_exit_task(struct task_struct *t); extern void ftrace_graph_init_idle_task(struct task_struct *t, int cpu); +/* Used by assembly, but to quiet sparse warnings */ +extern struct ftrace_ops *function_trace_op; + static inline void pause_graph_tracing(void) { atomic_inc(¤t->tracing_graph_pause); -- cgit v1.2.3 From c76494768761aef7630e7e0db820ba7b375964da Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 30 May 2024 11:07:19 -0700 Subject: linux/interrupt.h: allow "guard" notation to disable and reenable IRQ Drivers often need to first disable an interrupt, carry out some action, and then reenable the interrupt. Introduce support for the "guard" notation for this so that the following is possible: ... scoped_cond_guard(mutex_intr, return -EINTR, &data->sysfs_mutex) { guard(disable_irq)(&client->irq); error = elan_acquire_baseline(data); if (error) return error; } ... Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/ZljAV6HjkPSEhWSw@google.com Signed-off-by: Dmitry Torokhov --- include/linux/interrupt.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 5c9bdd3ffccc..3a36e64119c8 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -235,6 +236,9 @@ extern void enable_percpu_irq(unsigned int irq, unsigned int type); extern bool irq_percpu_is_enabled(unsigned int irq); extern void irq_wake_thread(unsigned int irq, void *dev_id); +DEFINE_LOCK_GUARD_1(disable_irq, int, + disable_irq(*_T->lock), enable_irq(*_T->lock)) + extern void disable_nmi_nosync(unsigned int irq); extern void disable_percpu_nmi(unsigned int irq); extern void enable_nmi(unsigned int irq); -- cgit v1.2.3 From b472b996a43404a912c5cb4f27050022fdbce10c Mon Sep 17 00:00:00 2001 From: Udit Kumar Date: Fri, 31 May 2024 22:27:25 +0530 Subject: dt-bindings: net: dp8386x: Add MIT license along with GPL-2.0 Modify license to include dual licensing as GPL-2.0-only OR MIT license for TI specific phy header files. This allows for Linux kernel files to be used in other Operating System ecosystems such as Zephyr or FreeBSD. While at this, update the GPL-2.0 to be GPL-2.0-only to be in sync with latest SPDX conventions (GPL-2.0 is deprecated). While at this, update the TI copyright year to sync with current year to indicate license change. Cc: Thomas Gleixner Cc: Trent Piepho Cc: Wadim Egorov Cc: Kip Broadhurst Signed-off-by: Udit Kumar Acked-by: Wadim Egorov Acked-by: Rob Herring Signed-off-by: David S. Miller --- include/dt-bindings/net/ti-dp83867.h | 4 ++-- include/dt-bindings/net/ti-dp83869.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/net/ti-dp83867.h b/include/dt-bindings/net/ti-dp83867.h index 6fc4b445d3a1..b8a4f3ff4a3b 100644 --- a/include/dt-bindings/net/ti-dp83867.h +++ b/include/dt-bindings/net/ti-dp83867.h @@ -1,10 +1,10 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ /* * Device Tree constants for the Texas Instruments DP83867 PHY * * Author: Dan Murphy * - * Copyright: (C) 2015 Texas Instruments, Inc. + * Copyright (C) 2015-2024 Texas Instruments Incorporated - https://www.ti.com/ */ #ifndef _DT_BINDINGS_TI_DP83867_H diff --git a/include/dt-bindings/net/ti-dp83869.h b/include/dt-bindings/net/ti-dp83869.h index 218b1a64e975..917114aad7d0 100644 --- a/include/dt-bindings/net/ti-dp83869.h +++ b/include/dt-bindings/net/ti-dp83869.h @@ -1,10 +1,10 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ /* * Device Tree constants for the Texas Instruments DP83869 PHY * * Author: Dan Murphy * - * Copyright: (C) 2019 Texas Instruments, Inc. + * Copyright (C) 2015-2024 Texas Instruments Incorporated - https://www.ti.com/ */ #ifndef _DT_BINDINGS_TI_DP83869_H -- cgit v1.2.3 From b4100947a8014e1876872546398275968f77e1ad Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 28 May 2024 16:07:10 -0500 Subject: crypto: ccp - align psp_platform_access_msg Align the whitespace so that future messages will also be better aligned. Acked-by: Tom Lendacky Signed-off-by: Mario Limonciello Signed-off-by: Herbert Xu --- include/linux/psp-platform-access.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/psp-platform-access.h b/include/linux/psp-platform-access.h index c1dc87fc536b..23893b33e48c 100644 --- a/include/linux/psp-platform-access.h +++ b/include/linux/psp-platform-access.h @@ -6,8 +6,8 @@ #include enum psp_platform_access_msg { - PSP_CMD_NONE = 0x0, - PSP_I2C_REQ_BUS_CMD = 0x64, + PSP_CMD_NONE = 0x0, + PSP_I2C_REQ_BUS_CMD = 0x64, PSP_DYNAMIC_BOOST_GET_NONCE, PSP_DYNAMIC_BOOST_SET_UID, PSP_DYNAMIC_BOOST_GET_PARAMETER, -- cgit v1.2.3 From 82f9327f774c6e040ba63a887a85fa2e290a233a Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 28 May 2024 16:07:11 -0500 Subject: crypto: ccp - Add support for getting security attributes on some older systems Older systems will not populate the security attributes in the capabilities register. The PSP on these systems, however, does have a command to get the security attributes. Use this command during ccp startup to populate the attributes if they're missing. Closes: https://github.com/fwupd/fwupd/issues/5284 Closes: https://github.com/fwupd/fwupd/issues/5675 Closes: https://github.com/fwupd/fwupd/issues/6253 Closes: https://github.com/fwupd/fwupd/issues/7280 Closes: https://github.com/fwupd/fwupd/issues/6323 Closes: https://github.com/fwupd/fwupd/discussions/5433 Signed-off-by: Mario Limonciello Acked-by: Tom Lendacky Signed-off-by: Herbert Xu --- include/linux/psp-platform-access.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/psp-platform-access.h b/include/linux/psp-platform-access.h index 23893b33e48c..1504fb012c05 100644 --- a/include/linux/psp-platform-access.h +++ b/include/linux/psp-platform-access.h @@ -7,6 +7,7 @@ enum psp_platform_access_msg { PSP_CMD_NONE = 0x0, + PSP_CMD_HSTI_QUERY = 0x14, PSP_I2C_REQ_BUS_CMD = 0x64, PSP_DYNAMIC_BOOST_GET_NONCE, PSP_DYNAMIC_BOOST_SET_UID, -- cgit v1.2.3 From 46b3ff73afc815f1feb844e6b57e43cc13051544 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 31 May 2024 18:20:03 +0800 Subject: crypto: sm2 - Remove sm2 algorithm The SM2 algorithm has a single user in the kernel. However, it's never been integrated properly with that user: asymmetric_keys. The crux of the issue is that the way it computes its digest with sm3 does not fit into the architecture of asymmetric_keys. As no solution has been proposed, remove this algorithm. It can be resubmitted when it is integrated properly into the asymmetric_keys subsystem. Signed-off-by: Herbert Xu --- include/crypto/sm2.h | 28 ---------------------------- 1 file changed, 28 deletions(-) delete mode 100644 include/crypto/sm2.h (limited to 'include') diff --git a/include/crypto/sm2.h b/include/crypto/sm2.h deleted file mode 100644 index 04a92c1013c8..000000000000 --- a/include/crypto/sm2.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * sm2.h - SM2 asymmetric public-key algorithm - * as specified by OSCCA GM/T 0003.1-2012 -- 0003.5-2012 SM2 and - * described at https://tools.ietf.org/html/draft-shen-sm2-ecdsa-02 - * - * Copyright (c) 2020, Alibaba Group. - * Written by Tianjia Zhang - */ - -#ifndef _CRYPTO_SM2_H -#define _CRYPTO_SM2_H - -struct shash_desc; - -#if IS_REACHABLE(CONFIG_CRYPTO_SM2) -int sm2_compute_z_digest(struct shash_desc *desc, - const void *key, unsigned int keylen, void *dgst); -#else -static inline int sm2_compute_z_digest(struct shash_desc *desc, - const void *key, unsigned int keylen, - void *dgst) -{ - return -ENOTSUPP; -} -#endif - -#endif /* _CRYPTO_SM2_H */ -- cgit v1.2.3 From df3fb27a74a4eeb1436129024a7e957c2e83a95e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= Date: Tue, 4 Jun 2024 15:20:30 +0200 Subject: drm/mipi-dbi: Make bits per word configurable for pixel transfers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MIPI DCS write/set commands have 8 bit parameters except for the write_memory commands where it depends on the pixel format. drm_mipi_dbi does currently only support RGB565 which is 16-bit and it has to make sure that the pixels enters the SPI bus in big endian format since the MIPI DBI spec doesn't have support for little endian. drm_mipi_dbi is optimized for DBI interface option 3 which means that the 16-bit bytes are swapped by the upper layer if the SPI bus does not support 16 bits per word, signified by the swap_bytes member. In order to support both 16-bit and 24-bit pixel transfers we need a way to tell the DBI command layer the format of the buffer. Add a write_memory_bpw member that the upper layer can use to tell how many bits per word to use for the SPI transfer. v4: - Expand the commit message (Dmitry) Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240604-panel-mipi-dbi-rgb666-v4-3-d7c2bcb9b78d@tronnes.org Signed-off-by: Noralf Trønnes --- include/drm/drm_mipi_dbi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/drm/drm_mipi_dbi.h b/include/drm/drm_mipi_dbi.h index e8e0f8d39f3a..b36596efdcc3 100644 --- a/include/drm/drm_mipi_dbi.h +++ b/include/drm/drm_mipi_dbi.h @@ -56,6 +56,11 @@ struct mipi_dbi { */ struct spi_device *spi; + /** + * @write_memory_bpw: Bits per word used on a MIPI_DCS_WRITE_MEMORY_START transfer + */ + unsigned int write_memory_bpw; + /** * @dc: Optional D/C gpio. */ -- cgit v1.2.3 From 4aebb79021f3e6c2b6fbb92a7d9c5d1e6ad0324a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= Date: Tue, 4 Jun 2024 15:20:31 +0200 Subject: drm/mipi-dbi: Add support for DRM_FORMAT_RGB888 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DRM_FORMAT_RGB888 is 24 bits per pixel and it would be natural to send it on the SPI bus using a 24 bits per word transfer. The problem with this is that not all SPI controllers support 24 bpw. Since DRM_FORMAT_RGB888 is stored in memory as little endian and the SPI bus is big endian we use 8 bpw to always get the same pixel format on the bus: b8g8r8. The MIPI DCS specification lists the standard commands that can be sent over the MIPI DBI interface. The set_address_mode (36h) command has one bit in the parameter that controls RGB/BGR order. This means that the controller can be configured to receive the pixel as BGR. RGB888 is rarely supported on these controllers but RGB666 is very common. All datasheets I have seen do at least support the pixel format option where each color is sent as one byte and the 6 MSB's are used. All this put together means that we can send each pixel as b8g8r8 and an RGB666 capable controller sees this as b6x2g6x2r6x2. v4: - s/emulation_format/pixel_format/ (Dmitry) Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240604-panel-mipi-dbi-rgb666-v4-4-d7c2bcb9b78d@tronnes.org Signed-off-by: Noralf Trønnes --- include/drm/drm_mipi_dbi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/drm/drm_mipi_dbi.h b/include/drm/drm_mipi_dbi.h index b36596efdcc3..f45f9612c0bc 100644 --- a/include/drm/drm_mipi_dbi.h +++ b/include/drm/drm_mipi_dbi.h @@ -101,6 +101,11 @@ struct mipi_dbi_dev { */ struct drm_display_mode mode; + /** + * @pixel_format: Native pixel format (DRM_FORMAT\_\*) + */ + u32 pixel_format; + /** * @tx_buf: Buffer used for transfer (copy clip rect area) */ -- cgit v1.2.3 From f05eda16037f9363297561bd28f318a6d7833d35 Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Fri, 8 Mar 2024 17:09:27 -0800 Subject: srcu: Add an API for a memory barrier after SRCU read lock To avoid redundant memory barriers, add smp_mb__after_srcu_read_lock() to pair with smp_mb__after_srcu_read_unlock() for use in paths that need to emit a memory barrier, but already do srcu_read_lock(), which includes a full memory barrier. Provide an API, e.g. as opposed to having callers document the behavior via a comment, as the full memory barrier provided by srcu_read_lock() is an implementation detail that shouldn't bleed into random subsystems. KVM will use smp_mb__after_srcu_read_lock() in it's VM-Exit path to ensure a memory barrier is emitted, which is necessary to ensure correctness of mixed memory types on CPUs that support self-snoop. Cc: Paolo Bonzini Cc: Sean Christopherson Cc: Kevin Tian Signed-off-by: Yan Zhao [sean: massage changelog] Tested-by: Xiangfei Ma Tested-by: Yongwei Ma Reviewed-by: Paul E. McKenney --- include/linux/srcu.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 236610e4a8fa..1cb4527076de 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -343,6 +343,20 @@ static inline void smp_mb__after_srcu_read_unlock(void) /* __srcu_read_unlock has smp_mb() internally so nothing to do here. */ } +/** + * smp_mb__after_srcu_read_lock - ensure full ordering after srcu_read_lock + * + * Converts the preceding srcu_read_lock into a two-way memory barrier. + * + * Call this after srcu_read_lock, to guarantee that all memory operations + * that occur after smp_mb__after_srcu_read_lock will appear to happen after + * the preceding srcu_read_lock. + */ +static inline void smp_mb__after_srcu_read_lock(void) +{ + /* __srcu_read_lock has smp_mb() internally so nothing to do here. */ +} + DEFINE_LOCK_GUARD_1(srcu, struct srcu_struct, _T->idx = srcu_read_lock(_T->lock), srcu_read_unlock(_T->lock, _T->idx), -- cgit v1.2.3 From 6a79a4e187bdd17959ff3e811d5de65c066f89e6 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Thu, 6 Jun 2024 10:33:49 +0300 Subject: libfs: Introduce case-insensitive string comparison helper generic_ci_match can be used by case-insensitive filesystems to compare strings under lookup with dirents in a case-insensitive way. This function is currently reimplemented by each filesystem supporting casefolding, so this reduces code duplication in filesystem-specific code. [eugen.hristev@collabora.com: rework to first test the exact match, cleanup and add error message] Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Eugen Hristev Link: https://lore.kernel.org/r/20240606073353.47130-4-eugen.hristev@collabora.com Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Christian Brauner --- include/linux/fs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0283cf366c2a..2b1eeca426a0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3351,6 +3351,10 @@ extern int generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_check_addressable(unsigned, u64); extern void generic_set_sb_d_ops(struct super_block *sb); +extern int generic_ci_match(const struct inode *parent, + const struct qstr *name, + const struct qstr *folded_name, + const u8 *de_name, u32 de_name_len); static inline bool sb_has_encoding(const struct super_block *sb) { -- cgit v1.2.3 From 231035f18d6b80e5c28732a20872398116a54ecd Mon Sep 17 00:00:00 2001 From: Wenchao Hao Date: Thu, 6 Jun 2024 16:52:15 +0800 Subject: workqueue: Increase worker desc's length to 32 Commit 31c89007285d ("workqueue.c: Increase workqueue name length") increased WQ_NAME_LEN from 24 to 32, but forget to increase WORKER_DESC_LEN, which would cause truncation when setting kworker's desc from workqueue_struct's name, process_one_work() for example. Fixes: 31c89007285d ("workqueue.c: Increase workqueue name length") Signed-off-by: Wenchao Hao CC: Audra Mitchell Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index fb3993894536..d9968bfc8eac 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -95,7 +95,7 @@ enum wq_misc_consts { WORK_BUSY_RUNNING = 1 << 1, /* maximum string length for set_worker_desc() */ - WORKER_DESC_LEN = 24, + WORKER_DESC_LEN = 32, }; /* Convenience constants - of type 'unsigned long', not 'enum'! */ -- cgit v1.2.3 From 11c63e57404e538c5df91f732e5d505860edb660 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 16 May 2024 12:25:31 +0200 Subject: firmware: add nowarn variant of request_firmware_nowait() Device drivers with optional firmware may still want to use the asynchronous firmware loading interface. To avoid printing a warning into the kernel log when the optional firmware is absent, add a nowarn variant of this interface. Signed-off-by: Lucas Stach Reviewed-by: Greg Kroah-Hartman Reviewed-by: Luis Chamberlain Link: https://lore.kernel.org/r/20240516102532.213874-1-l.stach@pengutronix.de Signed-off-by: Vinod Koul --- include/linux/firmware.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/firmware.h b/include/linux/firmware.h index f026f8926d79..aae1b85ffc10 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -98,6 +98,10 @@ static inline bool firmware_request_builtin(struct firmware *fw, #if IS_REACHABLE(CONFIG_FW_LOADER) int request_firmware(const struct firmware **fw, const char *name, struct device *device); +int firmware_request_nowait_nowarn( + struct module *module, const char *name, + struct device *device, gfp_t gfp, void *context, + void (*cont)(const struct firmware *fw, void *context)); int firmware_request_nowarn(const struct firmware **fw, const char *name, struct device *device); int firmware_request_platform(const struct firmware **fw, const char *name, @@ -123,6 +127,14 @@ static inline int request_firmware(const struct firmware **fw, return -EINVAL; } +static inline int firmware_request_nowait_nowarn( + struct module *module, const char *name, + struct device *device, gfp_t gfp, void *context, + void (*cont)(const struct firmware *fw, void *context)) +{ + return -EINVAL; +} + static inline int firmware_request_nowarn(const struct firmware **fw, const char *name, struct device *device) -- cgit v1.2.3 From a31a0a3e90b442c1a414be24b062e27624a40a8e Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 28 May 2024 11:47:16 -0700 Subject: thermal: intel: intel_soc_dts_thermal: Switch to new Intel CPU model defines New CPU #defines encode vendor and family as well as model. Signed-off-by: Tony Luck Acked-by: Rafael J. Wysocki Signed-off-by: Rafael J. Wysocki --- include/linux/platform_data/x86/soc.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/platform_data/x86/soc.h b/include/linux/platform_data/x86/soc.h index a5705189e2ac..f981907a5cb0 100644 --- a/include/linux/platform_data/x86/soc.h +++ b/include/linux/platform_data/x86/soc.h @@ -20,7 +20,7 @@ static inline bool soc_intel_is_##soc(void) \ { \ static const struct x86_cpu_id soc##_cpu_ids[] = { \ - X86_MATCH_INTEL_FAM6_MODEL(type, NULL), \ + X86_MATCH_VFM(type, NULL), \ {} \ }; \ const struct x86_cpu_id *id; \ @@ -31,11 +31,11 @@ static inline bool soc_intel_is_##soc(void) \ return false; \ } -SOC_INTEL_IS_CPU(byt, ATOM_SILVERMONT); -SOC_INTEL_IS_CPU(cht, ATOM_AIRMONT); -SOC_INTEL_IS_CPU(apl, ATOM_GOLDMONT); -SOC_INTEL_IS_CPU(glk, ATOM_GOLDMONT_PLUS); -SOC_INTEL_IS_CPU(cml, KABYLAKE_L); +SOC_INTEL_IS_CPU(byt, INTEL_ATOM_SILVERMONT); +SOC_INTEL_IS_CPU(cht, INTEL_ATOM_AIRMONT); +SOC_INTEL_IS_CPU(apl, INTEL_ATOM_GOLDMONT); +SOC_INTEL_IS_CPU(glk, INTEL_ATOM_GOLDMONT_PLUS); +SOC_INTEL_IS_CPU(cml, INTEL_KABYLAKE_L); #undef SOC_INTEL_IS_CPU -- cgit v1.2.3 From 2a1251e3dbb2995100b6f351c2452228895386a5 Mon Sep 17 00:00:00 2001 From: Konstantin Taranov Date: Fri, 7 Jun 2024 03:08:17 -0700 Subject: RDMA/mana_ib: Process QP error events in mana_ib Process QP fatal events from the error event queue. For that, find the QP, using QPN from the event, and then call its event_handler. To find the QPs, store created RC QPs in an xarray. Signed-off-by: Konstantin Taranov Link: https://lore.kernel.org/r/1717754897-19858-1-git-send-email-kotaranov@linux.microsoft.com Reviewed-by: Wei Hu Signed-off-by: Leon Romanovsky --- include/net/mana/gdma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 27684135bb4d..44c797d5f899 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -60,6 +60,7 @@ enum gdma_eqe_type { GDMA_EQE_HWC_INIT_DONE = 131, GDMA_EQE_HWC_SOC_RECONFIG = 132, GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133, + GDMA_EQE_RNIC_QP_FATAL = 176, }; enum { -- cgit v1.2.3 From 195fb517ee25bfefde9c74ecd86348eccbd6d2e4 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Sun, 9 Jun 2024 17:39:24 -0700 Subject: cpu: Move CPU hotplug function declarations into their own header Avoid upcoming #include hell when wants to use lockdep_assert_cpus_held() and creates a #include loop that would break the build for arch/riscv. [ bp: s/cpu/CPU/g ] Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240610003927.341707-2-tony.luck@intel.com --- include/linux/cpu.h | 33 +-------------------------------- include/linux/cpuhplock.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 32 deletions(-) create mode 100644 include/linux/cpuhplock.h (limited to 'include') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 861c3bfc5f17..a8926d0a28cd 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -18,6 +18,7 @@ #include #include #include +#include #include struct device; @@ -132,38 +133,6 @@ static inline int add_cpu(unsigned int cpu) { return 0;} #endif /* CONFIG_SMP */ extern const struct bus_type cpu_subsys; -extern int lockdep_is_cpus_held(void); - -#ifdef CONFIG_HOTPLUG_CPU -extern void cpus_write_lock(void); -extern void cpus_write_unlock(void); -extern void cpus_read_lock(void); -extern void cpus_read_unlock(void); -extern int cpus_read_trylock(void); -extern void lockdep_assert_cpus_held(void); -extern void cpu_hotplug_disable(void); -extern void cpu_hotplug_enable(void); -void clear_tasks_mm_cpumask(int cpu); -int remove_cpu(unsigned int cpu); -int cpu_device_down(struct device *dev); -extern void smp_shutdown_nonboot_cpus(unsigned int primary_cpu); - -#else /* CONFIG_HOTPLUG_CPU */ - -static inline void cpus_write_lock(void) { } -static inline void cpus_write_unlock(void) { } -static inline void cpus_read_lock(void) { } -static inline void cpus_read_unlock(void) { } -static inline int cpus_read_trylock(void) { return true; } -static inline void lockdep_assert_cpus_held(void) { } -static inline void cpu_hotplug_disable(void) { } -static inline void cpu_hotplug_enable(void) { } -static inline int remove_cpu(unsigned int cpu) { return -EPERM; } -static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { } -#endif /* !CONFIG_HOTPLUG_CPU */ - -DEFINE_LOCK_GUARD_0(cpus_read_lock, cpus_read_lock(), cpus_read_unlock()) - #ifdef CONFIG_PM_SLEEP_SMP extern int freeze_secondary_cpus(int primary); extern void thaw_secondary_cpus(void); diff --git a/include/linux/cpuhplock.h b/include/linux/cpuhplock.h new file mode 100644 index 000000000000..386abc482264 --- /dev/null +++ b/include/linux/cpuhplock.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * include/linux/cpuhplock.h - CPU hotplug locking + * + * Locking functions for CPU hotplug. + */ +#ifndef _LINUX_CPUHPLOCK_H_ +#define _LINUX_CPUHPLOCK_H_ + +#include +#include + +struct device; + +extern int lockdep_is_cpus_held(void); + +#ifdef CONFIG_HOTPLUG_CPU +extern void cpus_write_lock(void); +extern void cpus_write_unlock(void); +extern void cpus_read_lock(void); +extern void cpus_read_unlock(void); +extern int cpus_read_trylock(void); +extern void lockdep_assert_cpus_held(void); +extern void cpu_hotplug_disable(void); +extern void cpu_hotplug_enable(void); +void clear_tasks_mm_cpumask(int cpu); +int remove_cpu(unsigned int cpu); +int cpu_device_down(struct device *dev); +extern void smp_shutdown_nonboot_cpus(unsigned int primary_cpu); + +#else /* CONFIG_HOTPLUG_CPU */ + +static inline void cpus_write_lock(void) { } +static inline void cpus_write_unlock(void) { } +static inline void cpus_read_lock(void) { } +static inline void cpus_read_unlock(void) { } +static inline int cpus_read_trylock(void) { return true; } +static inline void lockdep_assert_cpus_held(void) { } +static inline void cpu_hotplug_disable(void) { } +static inline void cpu_hotplug_enable(void) { } +static inline int remove_cpu(unsigned int cpu) { return -EPERM; } +static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { } +#endif /* !CONFIG_HOTPLUG_CPU */ + +DEFINE_LOCK_GUARD_0(cpus_read_lock, cpus_read_lock(), cpus_read_unlock()) + +#endif /* _LINUX_CPUHPLOCK_H_ */ -- cgit v1.2.3 From ddefcfdeb5a2238cbcb07b80dda9ac3136735b1e Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Sun, 9 Jun 2024 17:39:25 -0700 Subject: cpu: Drop "extern" from function declarations in cpuhplock.h This file was created with a direct cut and paste from cpu.h so kept the legacy declaration style. But the Linux coding standard for function declarations in header files is to avoid use of "extern". Drop "extern" from all function declarations. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240610003927.341707-3-tony.luck@intel.com --- include/linux/cpuhplock.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/cpuhplock.h b/include/linux/cpuhplock.h index 386abc482264..431560bbd045 100644 --- a/include/linux/cpuhplock.h +++ b/include/linux/cpuhplock.h @@ -15,18 +15,18 @@ struct device; extern int lockdep_is_cpus_held(void); #ifdef CONFIG_HOTPLUG_CPU -extern void cpus_write_lock(void); -extern void cpus_write_unlock(void); -extern void cpus_read_lock(void); -extern void cpus_read_unlock(void); -extern int cpus_read_trylock(void); -extern void lockdep_assert_cpus_held(void); -extern void cpu_hotplug_disable(void); -extern void cpu_hotplug_enable(void); +void cpus_write_lock(void); +void cpus_write_unlock(void); +void cpus_read_lock(void); +void cpus_read_unlock(void); +int cpus_read_trylock(void); +void lockdep_assert_cpus_held(void); +void cpu_hotplug_disable(void); +void cpu_hotplug_enable(void); void clear_tasks_mm_cpumask(int cpu); int remove_cpu(unsigned int cpu); int cpu_device_down(struct device *dev); -extern void smp_shutdown_nonboot_cpus(unsigned int primary_cpu); +void smp_shutdown_nonboot_cpus(unsigned int primary_cpu); #else /* CONFIG_HOTPLUG_CPU */ -- cgit v1.2.3 From 685cb1674060c2cb1b9da051a12933c082b8e874 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Sun, 9 Jun 2024 17:39:26 -0700 Subject: cacheinfo: Add function to get cacheinfo for a given CPU and cache level Resctrl open codes a search for information about a given cache level in a couple of places (and more are on the way). Provide a new inline function get_cpu_cacheinfo_level() in to do the search and return a pointer to the cacheinfo structure. Add lockdep_assert_cpus_held() to enforce the comment that cpuhp lock must be held. Simplify the existing get_cpu_cacheinfo_id() by using this new function to do the search. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/r/20240610003927.341707-4-tony.luck@intel.com --- include/linux/cacheinfo.h | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 2cb15fe4fe12..3dde175f4108 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -3,6 +3,7 @@ #define _LINUX_CACHEINFO_H #include +#include #include #include @@ -113,23 +114,37 @@ int acpi_get_cache_info(unsigned int cpu, const struct attribute_group *cache_get_priv_group(struct cacheinfo *this_leaf); /* - * Get the id of the cache associated with @cpu at level @level. + * Get the cacheinfo structure for the cache associated with @cpu at + * level @level. * cpuhp lock must be held. */ -static inline int get_cpu_cacheinfo_id(int cpu, int level) +static inline struct cacheinfo *get_cpu_cacheinfo_level(int cpu, int level) { struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu); int i; + lockdep_assert_cpus_held(); + for (i = 0; i < ci->num_leaves; i++) { if (ci->info_list[i].level == level) { if (ci->info_list[i].attributes & CACHE_ID) - return ci->info_list[i].id; - return -1; + return &ci->info_list[i]; + return NULL; } } - return -1; + return NULL; +} + +/* + * Get the id of the cache associated with @cpu at level @level. + * cpuhp lock must be held. + */ +static inline int get_cpu_cacheinfo_id(int cpu, int level) +{ + struct cacheinfo *ci = get_cpu_cacheinfo_level(cpu, level); + + return ci ? ci->id : -1; } #ifdef CONFIG_ARM64 -- cgit v1.2.3 From 62096c48394b49e326056a62780fb67b60edde91 Mon Sep 17 00:00:00 2001 From: Ming Qian Date: Mon, 6 May 2024 17:49:16 +0900 Subject: media: v4l2-ctrls: Add average QP control Add a control V4L2_CID_MPEG_VIDEO_AVERAGE_QP to report the average QP value of the current encoded frame. The value applies to the last dequeued capture buffer. Signed-off-by: Ming Qian Reviewed-by: Hans Verkuil Signed-off-by: Sebastian Fricke Signed-off-by: Hans Verkuil --- include/uapi/linux/v4l2-controls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 99c3f5e99da7..974fd254e573 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -898,6 +898,8 @@ enum v4l2_mpeg_video_av1_level { V4L2_MPEG_VIDEO_AV1_LEVEL_7_3 = 23 }; +#define V4L2_CID_MPEG_VIDEO_AVERAGE_QP (V4L2_CID_CODEC_BASE + 657) + /* MPEG-class control IDs specific to the CX2341x driver as defined by V4L2 */ #define V4L2_CID_CODEC_CX2341X_BASE (V4L2_CTRL_CLASS_CODEC | 0x1000) #define V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE (V4L2_CID_CODEC_CX2341X_BASE+0) -- cgit v1.2.3 From 97d833ceb27dc19f8777d63f90be4a27b5daeedf Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 6 Jun 2024 16:49:42 +0200 Subject: mlxsw: spectrum_acl_erp: Fix object nesting warning ACLs in Spectrum-2 and newer ASICs can reside in the algorithmic TCAM (A-TCAM) or in the ordinary circuit TCAM (C-TCAM). The former can contain more ACLs (i.e., tc filters), but the number of masks in each region (i.e., tc chain) is limited. In order to mitigate the effects of the above limitation, the device allows filters to share a single mask if their masks only differ in up to 8 consecutive bits. For example, dst_ip/25 can be represented using dst_ip/24 with a delta of 1 bit. The C-TCAM does not have a limit on the number of masks being used (and therefore does not support mask aggregation), but can contain a limited number of filters. The driver uses the "objagg" library to perform the mask aggregation by passing it objects that consist of the filter's mask and whether the filter is to be inserted into the A-TCAM or the C-TCAM since filters in different TCAMs cannot share a mask. The set of created objects is dependent on the insertion order of the filters and is not necessarily optimal. Therefore, the driver will periodically ask the library to compute a more optimal set ("hints") by looking at all the existing objects. When the library asks the driver whether two objects can be aggregated the driver only compares the provided masks and ignores the A-TCAM / C-TCAM indication. This is the right thing to do since the goal is to move as many filters as possible to the A-TCAM. The driver also forbids two identical masks from being aggregated since this can only happen if one was intentionally put in the C-TCAM to avoid a conflict in the A-TCAM. The above can result in the following set of hints: H1: {mask X, A-TCAM} -> H2: {mask Y, A-TCAM} // X is Y + delta H3: {mask Y, C-TCAM} -> H4: {mask Z, A-TCAM} // Y is Z + delta After getting the hints from the library the driver will start migrating filters from one region to another while consulting the computed hints and instructing the device to perform a lookup in both regions during the transition. Assuming a filter with mask X is being migrated into the A-TCAM in the new region, the hints lookup will return H1. Since H2 is the parent of H1, the library will try to find the object associated with it and create it if necessary in which case another hints lookup (recursive) will be performed. This hints lookup for {mask Y, A-TCAM} will either return H2 or H3 since the driver passes the library an object comparison function that ignores the A-TCAM / C-TCAM indication. This can eventually lead to nested objects which are not supported by the library [1]. Fix by removing the object comparison function from both the driver and the library as the driver was the only user. That way the lookup will only return exact matches. I do not have a reliable reproducer that can reproduce the issue in a timely manner, but before the fix the issue would reproduce in several minutes and with the fix it does not reproduce in over an hour. Note that the current usefulness of the hints is limited because they include the C-TCAM indication and represent aggregation that cannot actually happen. This will be addressed in net-next. [1] WARNING: CPU: 0 PID: 153 at lib/objagg.c:170 objagg_obj_parent_assign+0xb5/0xd0 Modules linked in: CPU: 0 PID: 153 Comm: kworker/0:18 Not tainted 6.9.0-rc6-custom-g70fbc2c1c38b #42 Hardware name: Mellanox Technologies Ltd. MSN3700C/VMOD0008, BIOS 5.11 10/10/2018 Workqueue: mlxsw_core mlxsw_sp_acl_tcam_vregion_rehash_work RIP: 0010:objagg_obj_parent_assign+0xb5/0xd0 [...] Call Trace: __objagg_obj_get+0x2bb/0x580 objagg_obj_get+0xe/0x80 mlxsw_sp_acl_erp_mask_get+0xb5/0xf0 mlxsw_sp_acl_atcam_entry_add+0xe8/0x3c0 mlxsw_sp_acl_tcam_entry_create+0x5e/0xa0 mlxsw_sp_acl_tcam_vchunk_migrate_one+0x16b/0x270 mlxsw_sp_acl_tcam_vregion_rehash_work+0xbe/0x510 process_one_work+0x151/0x370 Fixes: 9069a3817d82 ("lib: objagg: implement optimization hints assembly and use hints for object creation") Signed-off-by: Ido Schimmel Reviewed-by: Amit Cohen Tested-by: Alexander Zubkov Signed-off-by: Petr Machata Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/objagg.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/objagg.h b/include/linux/objagg.h index 78021777df46..6df5b887dc54 100644 --- a/include/linux/objagg.h +++ b/include/linux/objagg.h @@ -8,7 +8,6 @@ struct objagg_ops { size_t obj_size; bool (*delta_check)(void *priv, const void *parent_obj, const void *obj); - int (*hints_obj_cmp)(const void *obj1, const void *obj2); void * (*delta_create)(void *priv, void *parent_obj, void *obj); void (*delta_destroy)(void *priv, void *delta_priv); void * (*root_create)(void *priv, void *obj, unsigned int root_id); -- cgit v1.2.3 From 96f3b978736356ba0e5a7d923681765c7ea9b12b Mon Sep 17 00:00:00 2001 From: Dmitry Rokosov Date: Wed, 15 May 2024 21:47:25 +0300 Subject: dt-bindings: clock: meson: a1: pll: introduce new syspll bindings The 'syspll' PLL is a general-purpose PLL designed specifically for the CPU clock. It is capable of producing output frequencies within the range of 768MHz to 1536MHz. The 'syspll_in' source clock is an optional parent connection from the peripherals clock controller. Signed-off-by: Dmitry Rokosov Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20240515185103.20256-3-ddrokosov@salutedevices.com Signed-off-by: Jerome Brunet --- include/dt-bindings/clock/amlogic,a1-pll-clkc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/clock/amlogic,a1-pll-clkc.h b/include/dt-bindings/clock/amlogic,a1-pll-clkc.h index 2b660c0f2c9f..0dfc5e78a2d5 100644 --- a/include/dt-bindings/clock/amlogic,a1-pll-clkc.h +++ b/include/dt-bindings/clock/amlogic,a1-pll-clkc.h @@ -21,5 +21,6 @@ #define CLKID_FCLK_DIV5 8 #define CLKID_FCLK_DIV7 9 #define CLKID_HIFI_PLL 10 +#define CLKID_SYS_PLL 11 #endif /* __A1_PLL_CLKC_H */ -- cgit v1.2.3 From 41056416ed538d1a05dbba57060c02acdc5154e7 Mon Sep 17 00:00:00 2001 From: Dmitry Rokosov Date: Wed, 15 May 2024 21:47:27 +0300 Subject: dt-bindings: clock: meson: a1: peripherals: support sys_pll input The 'sys_pll' input is an optional clock that can be used to generate 'sys_pll_div16', which serves as one of the sources for the GEN clock. Signed-off-by: Dmitry Rokosov Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20240515185103.20256-5-ddrokosov@salutedevices.com Signed-off-by: Jerome Brunet --- include/dt-bindings/clock/amlogic,a1-peripherals-clkc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/clock/amlogic,a1-peripherals-clkc.h b/include/dt-bindings/clock/amlogic,a1-peripherals-clkc.h index 06f198ee7623..2ce1a06dc735 100644 --- a/include/dt-bindings/clock/amlogic,a1-peripherals-clkc.h +++ b/include/dt-bindings/clock/amlogic,a1-peripherals-clkc.h @@ -164,5 +164,6 @@ #define CLKID_DMC_SEL 151 #define CLKID_DMC_DIV 152 #define CLKID_DMC_SEL2 153 +#define CLKID_SYS_PLL_DIV16 154 #endif /* __A1_PERIPHERALS_CLKC_H */ -- cgit v1.2.3 From b334b924c9b709bc969644fb5c406f5c9d01dceb Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 6 Jun 2024 17:11:37 +0200 Subject: net: tcp/dccp: prepare for tw_timer un-pinning The TCP timewait timer is proving to be problematic for setups where scheduler CPU isolation is achieved at runtime via cpusets (as opposed to statically via isolcpus=domains). What happens there is a CPU goes through tcp_time_wait(), arming the time_wait timer, then gets isolated. TCP_TIMEWAIT_LEN later, the timer fires, causing interference for the now-isolated CPU. This is conceptually similar to the issue described in commit e02b93124855 ("workqueue: Unbind kworkers before sending them to exit()") Move inet_twsk_schedule() to within inet_twsk_hashdance(), with the ehash lock held. Expand the lock's critical section from inet_twsk_kill() to inet_twsk_deschedule_put(), serializing the scheduling vs descheduling of the timer. IOW, this prevents the following race: tcp_time_wait() inet_twsk_hashdance() inet_twsk_deschedule_put() del_timer_sync() inet_twsk_schedule() Thanks to Paolo Abeni for suggesting to leverage the ehash lock. This also restores a comment from commit ec94c2696f0b ("tcp/dccp: avoid one atomic operation for timewait hashdance") as inet_twsk_hashdance() had a "Step 1" and "Step 3" comment, but the "Step 2" had gone missing. inet_twsk_deschedule_put() now acquires the ehash spinlock to synchronize with inet_twsk_hashdance_schedule(). To ease possible regression search, actual un-pin is done in next patch. Link: https://lore.kernel.org/all/ZPhpfMjSiHVjQkTk@localhost.localdomain/ Reviewed-by: Eric Dumazet Signed-off-by: Valentin Schneider Co-developed-by: Florian Westphal Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 2a536eea9424..5b43d220243d 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -93,8 +93,10 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, struct inet_timewait_death_row *dr, const int state); -void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, - struct inet_hashinfo *hashinfo); +void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw, + struct sock *sk, + struct inet_hashinfo *hashinfo, + int timeo); void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm); -- cgit v1.2.3 From f81d0dd2fde35fd1acc30b3f4de6aaf57d514551 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 6 Jun 2024 17:11:39 +0200 Subject: tcp: move inet_twsk_schedule helper out of header Its no longer used outside inet_timewait_sock.c, so move it there. Reviewed-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 5b43d220243d..f88b68269012 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -101,11 +101,6 @@ void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw, void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm); -static inline void inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo) -{ - __inet_twsk_schedule(tw, timeo, false); -} - static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo) { __inet_twsk_schedule(tw, timeo, true); -- cgit v1.2.3 From 0b7e448119428e1dcb854abb5855f66966fb82dc Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 28 May 2024 14:29:33 -0500 Subject: ACPI: utils: introduce acpi_get_local_u64_address() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ACPI _ADR is a 64-bit value. We changed the definitions in commit ca6f998cf9a2 ("ACPI: bus: change _ADR representation to 64 bits") but some helpers still assume the value is a 32-bit value. This patch adds a new helper to extract the full 64-bits. The existing 32-bit helper is kept for backwards-compatibility and cases where the _ADR is known to fit in a 32-bit value. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Reviewed-by: Bard Liao Acked-by: Rafael J. Wysocki Reviewed-by: Takashi Iwai Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240528192936.16180-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 28c3fb2bef0d..65e7177bcb02 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -761,6 +761,7 @@ static inline u64 acpi_arch_get_root_pointer(void) } #endif +int acpi_get_local_u64_address(acpi_handle handle, u64 *addr); int acpi_get_local_address(acpi_handle handle, u32 *addr); const char *acpi_get_subsystem_id(acpi_handle handle); -- cgit v1.2.3 From e289df82344fecc104ad8326b9ab6da612b9c899 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 31 May 2024 22:42:40 +0300 Subject: spi: Rework per message DMA mapped flag to be per transfer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The granularity of DMA mappings is transfer and moreover, the direction is also important as it can be unidirect. The current cur_msg_mapped flag doesn't fit well the DMA mapping and syncing calls and we have tons of checks around on top of it. So, instead of doing that rework the code to use per transfer per direction flag to show if it's DMA mapped or not. Signed-off-by: Andy Shevchenko Tested-by: Neil Armstrong # on SM8650-QRD Link: https://lore.kernel.org/r/20240531194723.1761567-9-andriy.shevchenko@linux.intel.com Reviewed-by: Serge Semin Tested-by: Nícolas F. R. A. Prado Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index e8e1e798924f..b4a89db4c855 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -447,7 +447,6 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @cur_msg_need_completion: Flag used internally to opportunistically skip * the @cur_msg_completion. This flag is used to signal the context that * is running spi_finalize_current_message() that it needs to complete() - * @cur_msg_mapped: message has been mapped for DMA * @fallback: fallback to PIO if DMA transfer return failure with * SPI_TRANS_FAIL_NO_START. * @last_cs_mode_high: was (mode & SPI_CS_HIGH) true on the last call to set_cs. @@ -708,7 +707,6 @@ struct spi_controller { bool running; bool rt; bool auto_runtime_pm; - bool cur_msg_mapped; bool fallback; bool last_cs_mode_high; s8 last_cs[SPI_CS_CNT_MAX]; @@ -981,6 +979,8 @@ struct spi_res { * transfer this transfer. Set to 0 if the SPI bus driver does * not support it. * @transfer_list: transfers are sequenced through @spi_message.transfers + * @tx_sg_mapped: If true, the @tx_sg is mapped for DMA + * @rx_sg_mapped: If true, the @rx_sg is mapped for DMA * @tx_sg: Scatterlist for transmit, currently not for client use * @rx_sg: Scatterlist for receive, currently not for client use * @ptp_sts_word_pre: The word (subject to bits_per_word semantics) offset @@ -1077,10 +1077,13 @@ struct spi_transfer { #define SPI_TRANS_FAIL_IO BIT(1) u16 error; - dma_addr_t tx_dma; - dma_addr_t rx_dma; + bool tx_sg_mapped; + bool rx_sg_mapped; + struct sg_table tx_sg; struct sg_table rx_sg; + dma_addr_t tx_dma; + dma_addr_t rx_dma; unsigned dummy_data:1; unsigned cs_off:1; -- cgit v1.2.3 From 24d07f114e4ec7608659a4ef18307f76739c72a8 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Mon, 3 Jun 2024 11:47:26 +0200 Subject: drm/panic: Add a set_pixel() callback to drm_scanout_buffer This allows drivers to draw the pixel, and handle tiling, or specific color formats. v2: * Use fg_color for blit() functions (Javier Martinez Canillas) Signed-off-by: Jocelyn Falempe Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20240603095343.39588-3-jfalempe@redhat.com --- include/drm/drm_panic.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/drm/drm_panic.h b/include/drm/drm_panic.h index 822dbb1aa9d6..73bb3f3d9ed9 100644 --- a/include/drm/drm_panic.h +++ b/include/drm/drm_panic.h @@ -50,6 +50,15 @@ struct drm_scanout_buffer { * @pitch: Length in bytes between the start of two consecutive lines. */ unsigned int pitch[DRM_FORMAT_MAX_PLANES]; + + /** + * @set_pixel: Optional function, to set a pixel color on the + * framebuffer. It allows to handle special tiling format inside the + * driver. + */ + void (*set_pixel)(struct drm_scanout_buffer *sb, unsigned int x, + unsigned int y, u32 color); + }; /** -- cgit v1.2.3 From 1f020495458396766496ee067130f507a4d718e4 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Fri, 31 May 2024 22:37:46 +0200 Subject: drm/bridge: Drop drm_bridge_chain_mode_fixup There are no users left of drm_bridge_chain_mode_fixup() and we do not want to have this function available, so drop it. Signed-off-by: Sam Ravnborg Reviewed-by: Maxime Ripard Reviewed-by: Laurent Pinchart Cc: Laurent Pinchart Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20240531-bridge_chain_mode-v1-2-8b49e36c5dd3@ravnborg.org --- include/drm/drm_bridge.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 4baca0d9107b..5cf41f92d1f0 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -855,9 +855,6 @@ drm_bridge_chain_get_first_bridge(struct drm_encoder *encoder) #define drm_for_each_bridge_in_chain(encoder, bridge) \ list_for_each_entry(bridge, &(encoder)->bridge_chain, chain_node) -bool drm_bridge_chain_mode_fixup(struct drm_bridge *bridge, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode); enum drm_mode_status drm_bridge_chain_mode_valid(struct drm_bridge *bridge, const struct drm_display_info *info, -- cgit v1.2.3 From 5fbf57a937f418fe204f9dbb7735e91984f4ee6a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 6 Jun 2024 12:29:06 -0700 Subject: net: netlink: remove the cb_mutex "injection" from netlink core Back in 2007, in commit af65bdfce98d ("[NETLINK]: Switch cb_lock spinlock to mutex and allow to override it") netlink core was extended to allow subsystems to replace the dump mutex lock with its own lock. The mechanism was used by rtnetlink to take rtnl_lock but it isn't sufficiently flexible for other users. Over the 17 years since it was added no other user appeared. Since rtnetlink needs conditional locking now, and doesn't use it either, axe this feature complete. Signed-off-by: Jakub Kicinski Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 5df7340d4dab..b332c2048c75 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -47,7 +47,6 @@ struct netlink_kernel_cfg { unsigned int groups; unsigned int flags; void (*input)(struct sk_buff *skb); - struct mutex *cb_mutex; int (*bind)(struct net *net, int group); void (*unbind)(struct net *net, int group); void (*release) (struct sock *sk, unsigned long *groups); -- cgit v1.2.3 From c6ae073f5903f6c6439d0ac855836a4da5c0a701 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 6 Jun 2024 23:32:48 +0300 Subject: geneve: Fix incorrect inner network header offset when innerprotoinherit is set When innerprotoinherit is set, the tunneled packets do not have an inner Ethernet header. Change 'maclen' to not always assume the header length is ETH_HLEN, as there might not be a MAC header. This resolves issues with drivers (e.g. mlx5, in mlx5e_tx_tunnel_accel()) who rely on the skb inner network header offset to be correct, and use it for TX offloads. Fixes: d8a6213d70ac ("geneve: fix header validation in geneve[6]_xmit_skb") Signed-off-by: Gal Pressman Signed-off-by: Tariq Toukan Reviewed-by: Wojciech Drewek Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 9a6a08ec7713..1db2417b8ff5 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -461,9 +461,10 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb) /* Variant of pskb_inet_may_pull(). */ -static inline bool skb_vlan_inet_prepare(struct sk_buff *skb) +static inline bool skb_vlan_inet_prepare(struct sk_buff *skb, + bool inner_proto_inherit) { - int nhlen = 0, maclen = ETH_HLEN; + int nhlen = 0, maclen = inner_proto_inherit ? 0 : ETH_HLEN; __be16 type = skb->protocol; /* Essentially this is skb_protocol(skb, true) -- cgit v1.2.3 From 806a5198c05987b748b50f3d0c0cfb3d417381a4 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 20 May 2024 16:03:07 -0400 Subject: Bluetooth: L2CAP: Fix rejecting L2CAP_CONN_PARAM_UPDATE_REQ This removes the bogus check for max > hcon->le_conn_max_interval since the later is just the initial maximum conn interval not the maximum the stack could support which is really 3200=4000ms. In order to pass GAP/CONN/CPUP/BV-05-C one shall probably enter values of the following fields in IXIT that would cause hci_check_conn_params to fail: TSPX_conn_update_int_min TSPX_conn_update_int_max TSPX_conn_update_peripheral_latency TSPX_conn_update_supervision_timeout Link: https://github.com/bluez/bluez/issues/847 Fixes: e4b019515f95 ("Bluetooth: Enforce validation on max value of connection interval") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 9231396fe96f..c43716edf205 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -2113,18 +2113,46 @@ static inline int hci_check_conn_params(u16 min, u16 max, u16 latency, { u16 max_latency; - if (min > max || min < 6 || max > 3200) + if (min > max) { + BT_WARN("min %d > max %d", min, max); return -EINVAL; + } + + if (min < 6) { + BT_WARN("min %d < 6", min); + return -EINVAL; + } + + if (max > 3200) { + BT_WARN("max %d > 3200", max); + return -EINVAL; + } + + if (to_multiplier < 10) { + BT_WARN("to_multiplier %d < 10", to_multiplier); + return -EINVAL; + } - if (to_multiplier < 10 || to_multiplier > 3200) + if (to_multiplier > 3200) { + BT_WARN("to_multiplier %d > 3200", to_multiplier); return -EINVAL; + } - if (max >= to_multiplier * 8) + if (max >= to_multiplier * 8) { + BT_WARN("max %d >= to_multiplier %d * 8", max, to_multiplier); return -EINVAL; + } max_latency = (to_multiplier * 4 / max) - 1; - if (latency > 499 || latency > max_latency) + if (latency > 499) { + BT_WARN("latency %d > 499", latency); return -EINVAL; + } + + if (latency > max_latency) { + BT_WARN("latency %d > max_latency %d", latency, max_latency); + return -EINVAL; + } return 0; } -- cgit v1.2.3 From 8a74e4eaa72c14f997df6d820effb6aac400d470 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 10 Jun 2024 10:20:53 +0200 Subject: PCI: switchtec: Make switchtec_class constant Now that the driver core allows for struct class to be in read-only memory, we should make all 'class' structures declared at build time placing them into read-only memory, instead of having to be dynamically allocated at runtime. Link: https://lore.kernel.org/r/2024061053-online-unwound-b173@gregkh Signed-off-by: Greg Kroah-Hartman Signed-off-by: Bjorn Helgaas Reviewed-by: Dave Jiang Reviewed-By: Logan Gunthorpe Cc: Kurt Schwemmer Cc: Jon Mason Cc: Allen Hubbe --- include/linux/switchtec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h index 8d8fac1626bd..cdb58d61c152 100644 --- a/include/linux/switchtec.h +++ b/include/linux/switchtec.h @@ -521,6 +521,6 @@ static inline struct switchtec_dev *to_stdev(struct device *dev) return container_of(dev, struct switchtec_dev, dev); } -extern struct class *switchtec_class; +extern const struct class switchtec_class; #endif -- cgit v1.2.3 From 1d4ce389da2b84e0e24aad5e83fe9c742adc3f99 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 7 Jun 2024 14:22:33 -0700 Subject: ice: add and use roundup_u64 instead of open coding equivalent In ice_ptp_cfg_clkout(), the ice driver needs to calculate the nearest next second of a current time value specified in nanoseconds. It implements this using div64_u64, because the time value is a u64. It could use div_u64 since NSEC_PER_SEC is smaller than 32-bits. Ideally this would be implemented directly with roundup(), but that can't work on all platforms due to a division which requires using the specific macros and functions due to platform restrictions, and to ensure that the most appropriate and fast instructions are used. The kernel doesn't currently provide any 64-bit equivalents for doing roundup. Attempting to use roundup() on a 32-bit platform will result in a link failure due to not having a direct 64-bit division. The closest equivalent for this is DIV64_U64_ROUND_UP, which does a division always rounding up. However, this only computes the division, and forces use of the div64_u64 in cases where the divisor is a 32bit value and could make use of div_u64. Introduce DIV_U64_ROUND_UP based on div_u64, and then use it to implement roundup_u64 which takes a u64 input value and a u32 rounding value. The name roundup_u64 matches the naming scheme of div_u64, and future patches could implement roundup64_u64 if they need to round by a multiple that is greater than 32-bits. Replace the logic in ice_ptp.c which does this equivalent with the newly added roundup_u64. Tested-by: Pucha Himasekhar Reddy Signed-off-by: Jacob Keller Link: https://lore.kernel.org/r/20240607-next-2024-06-03-intel-next-batch-v3-2-d1470cee3347@intel.com Signed-off-by: Jakub Kicinski --- include/linux/math64.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include') diff --git a/include/linux/math64.h b/include/linux/math64.h index d34def7f9a8c..6aaccc1626ab 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -297,6 +297,19 @@ u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div); #define DIV64_U64_ROUND_UP(ll, d) \ ({ u64 _tmp = (d); div64_u64((ll) + _tmp - 1, _tmp); }) +/** + * DIV_U64_ROUND_UP - unsigned 64bit divide with 32bit divisor rounded up + * @ll: unsigned 64bit dividend + * @d: unsigned 32bit divisor + * + * Divide unsigned 64bit dividend by unsigned 32bit divisor + * and round up. + * + * Return: dividend / divisor rounded up + */ +#define DIV_U64_ROUND_UP(ll, d) \ + ({ u32 _tmp = (d); div_u64((ll) + _tmp - 1, _tmp); }) + /** * DIV64_U64_ROUND_CLOSEST - unsigned 64bit divide with 64bit divisor rounded to nearest integer * @dividend: unsigned 64bit dividend @@ -342,4 +355,19 @@ u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div); div_s64((__x - (__d / 2)), __d); \ } \ ) + +/** + * roundup_u64 - Round up a 64bit value to the next specified 32bit multiple + * @x: the value to up + * @y: 32bit multiple to round up to + * + * Rounds @x to the next multiple of @y. For 32bit @x values, see roundup and + * the faster round_up() for powers of 2. + * + * Return: rounded up value. + */ +static inline u64 roundup_u64(u64 x, u32 y) +{ + return DIV_U64_ROUND_UP(x, y) * y; +} #endif /* _LINUX_MATH64_H */ -- cgit v1.2.3 From 5f7fb89a115d53b4a10bf7ba2733e78df281e98d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 10 Jun 2024 23:09:36 -0400 Subject: function_graph: Everyone uses HAVE_FUNCTION_GRAPH_RET_ADDR_PTR, remove it All architectures that implement function graph also implements HAVE_FUNCTION_GRAPH_RET_ADDR_PTR. Remove it, as it is no longer a differentiator. Link: https://lore.kernel.org/linux-trace-kernel/20240611031737.982047614@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Jonathan Corbet Cc: Catalin Marinas Cc: Will Deacon Cc: Guo Ren Cc: Huacai Chen Cc: WANG Xuerui Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: "Naveen N. Rao" Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Sven Schnelle Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4135dc171447..845c2ab0bc1c 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1071,9 +1071,7 @@ struct ftrace_ret_stack { #ifdef HAVE_FUNCTION_GRAPH_FP_TEST unsigned long fp; #endif -#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR unsigned long *retp; -#endif }; /* -- cgit v1.2.3 From e8343410ddf08fc36a9b9cc7c51a4e53a262d4c6 Mon Sep 17 00:00:00 2001 From: Jai Luthra Date: Tue, 11 Jun 2024 18:02:55 +0530 Subject: ALSA: dmaengine: Synchronize dma channel after drop() Sometimes the stream may be stopped due to XRUN events, in which case the userspace can call snd_pcm_drop() and snd_pcm_prepare() to stop and start the stream again. In these cases, we must wait for the DMA channel to synchronize before marking the stream as prepared for playback, as the DMA channel gets stopped by drop() without any synchronization. Make sure the ALSA core synchronizes the DMA channel by adding a sync_stop() hook. Reviewed-by: Peter Ujfalusi Signed-off-by: Jai Luthra Link: https://lore.kernel.org/r/20240611-asoc_next-v3-1-fcfd84b12164@ti.com Signed-off-by: Mark Brown --- include/sound/dmaengine_pcm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/dmaengine_pcm.h b/include/sound/dmaengine_pcm.h index c11aaf8079fb..f6baa9a01868 100644 --- a/include/sound/dmaengine_pcm.h +++ b/include/sound/dmaengine_pcm.h @@ -36,6 +36,7 @@ snd_pcm_uframes_t snd_dmaengine_pcm_pointer_no_residue(struct snd_pcm_substream int snd_dmaengine_pcm_open(struct snd_pcm_substream *substream, struct dma_chan *chan); int snd_dmaengine_pcm_close(struct snd_pcm_substream *substream); +int snd_dmaengine_pcm_sync_stop(struct snd_pcm_substream *substream); int snd_dmaengine_pcm_open_request_chan(struct snd_pcm_substream *substream, dma_filter_fn filter_fn, void *filter_data); -- cgit v1.2.3 From 85542adb65ecd7cc0e442e8befef74f2ed07f5f6 Mon Sep 17 00:00:00 2001 From: Thomas Prescher Date: Wed, 8 May 2024 15:25:01 +0200 Subject: KVM: x86: Add KVM_RUN_X86_GUEST_MODE kvm_run flag When a vCPU is interrupted by a signal while running a nested guest, KVM will exit to userspace with L2 state. However, userspace has no way to know whether it sees L1 or L2 state (besides calling KVM_GET_STATS_FD, which does not have a stable ABI). This causes multiple problems: The simplest one is L2 state corruption when userspace marks the sregs as dirty. See this mailing list thread [1] for a complete discussion. Another problem is that if userspace decides to continue by emulating instructions, it will unknowingly emulate with L2 state as if L1 doesn't exist, which can be considered a weird guest escape. Introduce a new flag, KVM_RUN_X86_GUEST_MODE, in the kvm_run data structure, which is set when the vCPU exited while running a nested guest. Also introduce a new capability, KVM_CAP_X86_GUEST_MODE, to advertise the functionality to userspace. [1] https://lore.kernel.org/kvm/20240416123558.212040-1-julian.stecklina@cyberus-technology.de/T/#m280aadcb2e10ae02c191a7dc4ed4b711a74b1f55 Signed-off-by: Thomas Prescher Signed-off-by: Julian Stecklina Link: https://lore.kernel.org/r/20240508132502.184428-1-julian.stecklina@cyberus-technology.de Signed-off-by: Sean Christopherson --- include/uapi/linux/kvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index ec998e6b6555..a6ac00ec77ad 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -918,6 +918,7 @@ struct kvm_enable_cap { #define KVM_CAP_GUEST_MEMFD 234 #define KVM_CAP_VM_TYPES 235 #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 236 +#define KVM_CAP_X86_GUEST_MODE 237 struct kvm_irq_routing_irqchip { __u32 irqchip; -- cgit v1.2.3 From f328a26eeb5380bc74e58cb9c3280a4908452df7 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 3 Jun 2024 17:55:55 -0400 Subject: dlm: introduce DLM_LSFL_SOFTIRQ_SAFE Introduce a new external lockspace flag DLM_LSFL_SOFTIRQ_SAFE. A lockspace user will set this flag if it can handle dlm running the callback functions from softirq context. When not set, dlm will continue to run callback functions from the dlm_callback workqueue. The new lockspace flag cannot be used for user space lockspaces, so a uapi placeholder definition is used for the new flag value. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- include/linux/dlm.h | 17 ++++++++++++++++- include/uapi/linux/dlm.h | 2 ++ 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dlm.h b/include/linux/dlm.h index c58c4f790c04..bacda9898f2b 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -35,6 +35,9 @@ struct dlm_lockspace_ops { int num_slots, int our_slot, uint32_t generation); }; +/* only relevant for kernel lockspaces, will be removed in future */ +#define DLM_LSFL_SOFTIRQ __DLM_LSFL_RESERVED0 + /* * dlm_new_lockspace * @@ -55,6 +58,11 @@ struct dlm_lockspace_ops { * used to select the directory node. Must be the same on all nodes. * DLM_LSFL_NEWEXCL * dlm_new_lockspace() should return -EEXIST if the lockspace exists. + * DLM_LSFL_SOFTIRQ + * dlm request callbacks (ast, bast) are softirq safe. Flag should be + * preferred by users. Will be default in some future. If set the + * strongest context for ast, bast callback is softirq as it avoids + * an additional context switch. * * lvblen: length of lvb in bytes. Must be multiple of 8. * dlm_new_lockspace() returns an error if this does not match @@ -121,7 +129,14 @@ int dlm_release_lockspace(dlm_lockspace_t *lockspace, int force); * call. * * AST routines should not block (at least not for long), but may make - * any locking calls they please. + * any locking calls they please. If DLM_LSFL_SOFTIRQ for kernel + * users of dlm_new_lockspace() is passed the ast and bast callbacks + * can be processed in softirq context. Also some of the callback + * contexts are in the same context as the DLM lock request API, users + * must not hold locks while calling dlm lock request API and trying + * to acquire this lock in the callback again, this will end in a + * lock recursion. For newer implementation the DLM_LSFL_SOFTIRQ + * should be used. */ int dlm_lock(dlm_lockspace_t *lockspace, diff --git a/include/uapi/linux/dlm.h b/include/uapi/linux/dlm.h index e7e905fb0bb2..4eaf835780b0 100644 --- a/include/uapi/linux/dlm.h +++ b/include/uapi/linux/dlm.h @@ -71,6 +71,8 @@ struct dlm_lksb { /* DLM_LSFL_TIMEWARN is deprecated and reserved. DO NOT USE! */ #define DLM_LSFL_TIMEWARN 0x00000002 #define DLM_LSFL_NEWEXCL 0x00000008 +/* currently reserved due in-kernel use */ +#define __DLM_LSFL_RESERVED0 0x00000010 #endif /* _UAPI__DLM_DOT_H__ */ -- cgit v1.2.3 From 10b8e0fd3f7234a38db2c8d2c8dec0bd6eeede44 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Fri, 31 May 2024 17:07:10 +0200 Subject: dmaengine: add channel device name to channel registration Channel device name is used for sysfs, but also by dmatest filter function. With dynamic channel registration, channels can be registered after dma controller registration. Users may want to have specific channel names. If name is NULL, the channel name relies on previous implementation, dmachan. Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20240531150712.2503554-11-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 752dbde4cec1..73537fddbb52 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -1575,7 +1575,8 @@ int dma_async_device_register(struct dma_device *device); int dmaenginem_async_device_register(struct dma_device *device); void dma_async_device_unregister(struct dma_device *device); int dma_async_device_channel_register(struct dma_device *device, - struct dma_chan *chan); + struct dma_chan *chan, + const char *name); void dma_async_device_channel_unregister(struct dma_device *device, struct dma_chan *chan); void dma_run_dependencies(struct dma_async_tx_descriptor *tx); -- cgit v1.2.3 From 3ff1180a39fbc43ae69d4238e6922c57e3278910 Mon Sep 17 00:00:00 2001 From: Andrew Davis Date: Mon, 10 Jun 2024 08:53:13 -0500 Subject: gpiolib: Remove data-less gpiochip_add() function GPIO chips should be added with driver-private data associated with the chip. If none is needed, NULL can be used. All users already do this except one, fix that here. With no more users of the base gpiochip_add() we can drop this function so no more users show up later. Signed-off-by: Andrew Davis Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20240610135313.142571-1-afd@ti.com Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 0032bb6e7d8f..6d31388dde0a 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -632,10 +632,6 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, devm_gpiochip_add_data_with_key(dev, gc, data, NULL, NULL) #endif /* CONFIG_LOCKDEP */ -static inline int gpiochip_add(struct gpio_chip *gc) -{ - return gpiochip_add_data(gc, NULL); -} void gpiochip_remove(struct gpio_chip *gc); int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, void *data, struct lock_class_key *lock_key, -- cgit v1.2.3 From fbe4a7e881d4408bfabbb4fd538f10fd686cd8ab Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Mon, 6 May 2024 18:17:49 +0800 Subject: KVM: Setup empty IRQ routing when creating a VM Setup empty IRQ routing during VM creation so that x86 and s390 don't need to set empty/dummy IRQ routing during KVM_CREATE_IRQCHIP (in future patches). Initializing IRQ routing before there are any potential readers allows KVM to avoid the synchronize_srcu() in kvm_set_irq_routing(), which can introduces 20+ milliseconds of latency in the VM creation path. Ensuring that all VMs have non-NULL IRQ routing also hardens KVM against misbehaving userspace VMMs, e.g. RISC-V dynamically instantiates its interrupt controller, but doesn't override kvm_arch_intc_initialized() or kvm_arch_irqfd_allowed(), and so can likely reach kvm_irq_map_gsi() without fully initialized IRQ routing. Signed-off-by: Yi Wang Acked-by: Christian Borntraeger Link: https://lore.kernel.org/r/20240506101751.3145407-2-foxywang@tencent.com [sean: init refcount after IRQ routing, fix stub, massage changelog] Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c80fe03a1fa4..eaa70e9b1218 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2094,6 +2094,7 @@ int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *entries, unsigned nr, unsigned flags); +int kvm_init_irq_routing(struct kvm *kvm); int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue); @@ -2103,6 +2104,11 @@ void kvm_free_irq_routing(struct kvm *kvm); static inline void kvm_free_irq_routing(struct kvm *kvm) {} +static inline int kvm_init_irq_routing(struct kvm *kvm) +{ + return 0; +} + #endif int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); -- cgit v1.2.3 From d1ae567fb8b559401a9f65290bbb0cef5e987bfe Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 21 May 2024 18:40:08 -0700 Subject: KVM: Add a flag to track if a loaded vCPU is scheduled out Add a kvm_vcpu.scheduled_out flag to track if a vCPU is in the process of being scheduled out (vCPU put path), or if the vCPU is being reloaded after being scheduled out (vCPU load path). In the short term, this will allow dropping kvm_arch_sched_in(), as arch code can query scheduled_out during kvm_arch_vcpu_load(). Longer term, scheduled_out opens up other potential optimizations, without creating subtle/brittle dependencies. E.g. it allows KVM to keep guest state (that is managed via kvm_arch_vcpu_{load,put}()) loaded across kvm_sched_{out,in}(), if KVM knows the state isn't accessed by the host kernel. Forcing arch code to coordinate between kvm_arch_sched_{in,out}() and kvm_arch_vcpu_{load,put}() is awkward, not reusable, and relies on the exact ordering of calls into arch code. Adding scheduled_out also obviates the need for a kvm_arch_sched_out() hook, e.g. if arch code needs to do something novel when putting vCPU state. And even if KVM never uses scheduled_out for anything beyond dropping kvm_arch_sched_in(), just being able to remove all of the arch stubs makes it worth adding the flag. Link: https://lore.kernel.org/all/20240430224431.490139-1-seanjc@google.com Cc: Oliver Upton Reviewed-by: Oliver Upton Acked-by: Kai Huang Link: https://lore.kernel.org/r/20240522014013.1672962-2-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index eaa70e9b1218..5e04c6cae34a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -380,6 +380,7 @@ struct kvm_vcpu { #endif bool preempted; bool ready; + bool scheduled_out; struct kvm_vcpu_arch arch; struct kvm_vcpu_stat stat; char stats_id[KVM_STATS_NAME_SIZE]; -- cgit v1.2.3 From 2a27c431400797e0044872283d1971aa372fcd3a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 21 May 2024 18:40:11 -0700 Subject: KVM: Delete the now unused kvm_arch_sched_in() Delete kvm_arch_sched_in() now that all implementations are nops. Reviewed-by: Bibo Mao Acked-by: Kai Huang Link: https://lore.kernel.org/r/20240522014013.1672962-5-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5e04c6cae34a..7b9d2633a931 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1495,8 +1495,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg); int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu); -void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu); - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id); -- cgit v1.2.3 From 190fec72df4a5d4d98b1e783c333f471e5e5f344 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 12 Jun 2024 08:44:27 +0900 Subject: uprobe: Wire up uretprobe system call Wiring up uretprobe system call, which comes in following changes. We need to do the wiring before, because the uretprobe implementation needs the syscall number. Note at the moment uretprobe syscall is supported only for native 64-bit process. Link: https://lore.kernel.org/all/20240611112158.40795-3-jolsa@kernel.org/ Reviewed-by: Oleg Nesterov Reviewed-by: Masami Hiramatsu (Google) Acked-by: Andrii Nakryiko Signed-off-by: Jiri Olsa Signed-off-by: Masami Hiramatsu (Google) --- include/linux/syscalls.h | 2 ++ include/uapi/asm-generic/unistd.h | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 9104952d323d..494f5e0f61f7 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -973,6 +973,8 @@ asmlinkage long sys_lsm_list_modules(u64 *ids, u32 *size, u32 flags); /* x86 */ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on); +asmlinkage long sys_uretprobe(void); + /* pciconfig: alpha, arm, arm64, ia64, sparc */ asmlinkage long sys_pciconfig_read(unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index d983c48a3b6a..2378f88d5ad4 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -845,8 +845,11 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) #define __NR_mseal 462 __SYSCALL(__NR_mseal, sys_mseal) +#define __NR_uretprobe 463 +__SYSCALL(__NR_uretprobe, sys_uretprobe) + #undef __NR_syscalls -#define __NR_syscalls 463 +#define __NR_syscalls 464 /* * 32 bit systems traditionally used different -- cgit v1.2.3 From ff474a78cef5cb5f32be52fe25b78441327a2e7c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 12 Jun 2024 08:44:28 +0900 Subject: uprobe: Add uretprobe syscall to speed up return probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding uretprobe syscall instead of trap to speed up return probe. At the moment the uretprobe setup/path is: - install entry uprobe - when the uprobe is hit, it overwrites probed function's return address on stack with address of the trampoline that contains breakpoint instruction - the breakpoint trap code handles the uretprobe consumers execution and jumps back to original return address This patch replaces the above trampoline's breakpoint instruction with new ureprobe syscall call. This syscall does exactly the same job as the trap with some more extra work: - syscall trampoline must save original value for rax/r11/rcx registers on stack - rax is set to syscall number and r11/rcx are changed and used by syscall instruction - the syscall code reads the original values of those registers and restore those values in task's pt_regs area - only caller from trampoline exposed in '[uprobes]' is allowed, the process will receive SIGILL signal otherwise Even with some extra work, using the uretprobes syscall shows speed improvement (compared to using standard breakpoint): On Intel (11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz) current: uretprobe-nop : 1.498 ± 0.000M/s uretprobe-push : 1.448 ± 0.001M/s uretprobe-ret : 0.816 ± 0.001M/s with the fix: uretprobe-nop : 1.969 ± 0.002M/s < 31% speed up uretprobe-push : 1.910 ± 0.000M/s < 31% speed up uretprobe-ret : 0.934 ± 0.000M/s < 14% speed up On Amd (AMD Ryzen 7 5700U) current: uretprobe-nop : 0.778 ± 0.001M/s uretprobe-push : 0.744 ± 0.001M/s uretprobe-ret : 0.540 ± 0.001M/s with the fix: uretprobe-nop : 0.860 ± 0.001M/s < 10% speed up uretprobe-push : 0.818 ± 0.001M/s < 10% speed up uretprobe-ret : 0.578 ± 0.000M/s < 7% speed up The performance test spawns a thread that runs loop which triggers uprobe with attached bpf program that increments the counter that gets printed in results above. The uprobe (and uretprobe) kind is determined by which instruction is being patched with breakpoint instruction. That's also important for uretprobes, because uprobe is installed for each uretprobe. The performance test is part of bpf selftests: tools/testing/selftests/bpf/run_bench_uprobes.sh Note at the moment uretprobe syscall is supported only for native 64-bit process, compat process still uses standard breakpoint. Note that when shadow stack is enabled the uretprobe syscall returns via iret, which is slower than return via sysret, but won't cause the shadow stack violation. Link: https://lore.kernel.org/all/20240611112158.40795-4-jolsa@kernel.org/ Suggested-by: Andrii Nakryiko Reviewed-by: Oleg Nesterov Reviewed-by: Masami Hiramatsu (Google) Acked-by: Andrii Nakryiko Signed-off-by: Oleg Nesterov Signed-off-by: Jiri Olsa Signed-off-by: Masami Hiramatsu (Google) --- include/linux/uprobes.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index f46e0ca0169c..b503fafb7fb3 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -138,6 +138,9 @@ extern bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check c extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs); extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, void *src, unsigned long len); +extern void uprobe_handle_trampoline(struct pt_regs *regs); +extern void *arch_uprobe_trampoline(unsigned long *psize); +extern unsigned long uprobe_get_trampoline_vaddr(void); #else /* !CONFIG_UPROBES */ struct uprobes_state { }; -- cgit v1.2.3 From 90e6f08915ec6efe46570420412a65050ec826b2 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 11 Jun 2024 17:34:35 +0900 Subject: scsi: mpi3mr: Fix ATA NCQ priority support The function mpi3mr_qcmd() of the mpi3mr driver is able to indicate to the HBA if a read or write command directed at an ATA device should be translated to an NCQ read/write command with the high prioiryt bit set when the request uses the RT priority class and the user has enabled NCQ priority through sysfs. However, unlike the mpt3sas driver, the mpi3mr driver does not define the sas_ncq_prio_supported and sas_ncq_prio_enable sysfs attributes, so the ncq_prio_enable field of struct mpi3mr_sdev_priv_data is never actually set and NCQ Priority cannot ever be used. Fix this by defining these missing atributes to allow a user to check if an ATA device supports NCQ priority and to enable/disable the use of NCQ priority. To do this, lift the function scsih_ncq_prio_supp() out of the mpt3sas driver and make it the generic SCSI SAS transport function sas_ata_ncq_prio_supported(). Nothing in that function is hardware specific, so this function can be used in both the mpt3sas driver and the mpi3mr driver. Reported-by: Scott McCoy Fixes: 023ab2a9b4ed ("scsi: mpi3mr: Add support for queue command processing") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20240611083435.92961-1-dlemoal@kernel.org Reviewed-by: Niklas Cassel Signed-off-by: Martin K. Petersen --- include/scsi/scsi_transport_sas.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h index 0e75b9277c8c..e3b6ce3cbf88 100644 --- a/include/scsi/scsi_transport_sas.h +++ b/include/scsi/scsi_transport_sas.h @@ -200,6 +200,8 @@ unsigned int sas_is_tlr_enabled(struct scsi_device *); void sas_disable_tlr(struct scsi_device *); void sas_enable_tlr(struct scsi_device *); +bool sas_ata_ncq_prio_supported(struct scsi_device *sdev); + extern struct sas_rphy *sas_end_device_alloc(struct sas_port *); extern struct sas_rphy *sas_expander_alloc(struct sas_port *, enum sas_device_type); void sas_rphy_free(struct sas_rphy *); -- cgit v1.2.3 From fa59dc2f6fc616fde3941f62ccd4adcaa21ccd47 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Fri, 7 Jun 2024 18:25:24 +0800 Subject: net: core,vrf: Change pcpu_dstat fields to u64_stats_t The pcpu_sw_netstats and pcpu_lstats structs both contain a set of u64_stats_t fields for individual stats, but pcpu_dstats uses u64s instead. Make this consistent by using u64_stats_t across all stats types. The per-cpu dstats are only used by the vrf driver at present, so update that driver as part of this change. Signed-off-by: Jeremy Kerr Reviewed-by: Simon Horman Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240607-dstats-v3-1-cc781fe116f7@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d20c6c99eb88..f148a01dd1d1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2731,12 +2731,12 @@ struct pcpu_sw_netstats { } __aligned(4 * sizeof(u64)); struct pcpu_dstats { - u64 rx_packets; - u64 rx_bytes; - u64 rx_drops; - u64 tx_packets; - u64 tx_bytes; - u64 tx_drops; + u64_stats_t rx_packets; + u64_stats_t rx_bytes; + u64_stats_t rx_drops; + u64_stats_t tx_packets; + u64_stats_t tx_bytes; + u64_stats_t tx_drops; struct u64_stats_sync syncp; } __aligned(8 * sizeof(u64)); -- cgit v1.2.3 From 144ba8580bcb82b2686c3d1a043299d844b9a682 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Mon, 10 Jun 2024 10:34:26 +0200 Subject: net: pse-pd: Use EOPNOTSUPP error code instead of ENOTSUPP ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP as reported by checkpatch script. Fixes: 18ff0bcda6d1 ("ethtool: add interface to interact with Ethernet Power Equipment") Reviewed-by: Andrew Lunn Acked-by: Oleksij Rempel Signed-off-by: Kory Maincent Link: https://lore.kernel.org/r/20240610083426.740660-1-kory.maincent@bootlin.com Signed-off-by: Jakub Kicinski --- include/linux/pse-pd/pse.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h index 6d07c95dabb9..6eec24ffa866 100644 --- a/include/linux/pse-pd/pse.h +++ b/include/linux/pse-pd/pse.h @@ -167,14 +167,14 @@ static inline int pse_ethtool_get_status(struct pse_control *psec, struct netlink_ext_ack *extack, struct pse_control_status *status) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int pse_ethtool_set_config(struct pse_control *psec, struct netlink_ext_ack *extack, const struct pse_control_config *config) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline bool pse_has_podl(struct pse_control *psec) -- cgit v1.2.3 From 3966a668bfeef49e265e4975a2cdc55fb931036d Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Fri, 7 Jun 2024 00:25:55 +0100 Subject: net/tcp: Use static_branch_tcp_{md5,ao} to drop ifdefs It's possible to clean-up some ifdefs by hiding that tcp_{md5,ao}_needed static branch is defined and compiled only under related configs, since commit 4c8530dc7d7d ("net/tcp: Only produce AO/MD5 logs if there are any keys"). Reviewed-by: Eric Dumazet Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: David S. Miller --- include/net/tcp.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index a70fc39090fe..e5427b05129b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2386,21 +2386,15 @@ static inline void tcp_get_current_key(const struct sock *sk, static inline bool tcp_key_is_md5(const struct tcp_key *key) { -#ifdef CONFIG_TCP_MD5SIG - if (static_branch_unlikely(&tcp_md5_needed.key) && - key->type == TCP_KEY_MD5) - return true; -#endif + if (static_branch_tcp_md5()) + return key->type == TCP_KEY_MD5; return false; } static inline bool tcp_key_is_ao(const struct tcp_key *key) { -#ifdef CONFIG_TCP_AO - if (static_branch_unlikely(&tcp_ao_needed.key) && - key->type == TCP_KEY_AO) - return true; -#endif + if (static_branch_tcp_ao()) + return key->type == TCP_KEY_AO; return false; } -- cgit v1.2.3 From 72863087f635367323693b9ab83c3107e0353c5f Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Fri, 7 Jun 2024 00:25:56 +0100 Subject: net/tcp: Add a helper tcp_ao_hdr_maclen() It's going to be used more in TCP-AO tracepoints. Reviewed-by: Eric Dumazet Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: David S. Miller --- include/net/tcp_ao.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index 5d8e9ed2c005..198e02004ad2 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -19,6 +19,11 @@ struct tcp_ao_hdr { u8 rnext_keyid; }; +static inline u8 tcp_ao_hdr_maclen(const struct tcp_ao_hdr *aoh) +{ + return aoh->length - sizeof(struct tcp_ao_hdr); +} + struct tcp_ao_counters { atomic64_t pkt_good; atomic64_t pkt_bad; -- cgit v1.2.3 From 811efc06e5f30a57030451b2d1998aa81273baf8 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Fri, 7 Jun 2024 00:25:57 +0100 Subject: net/tcp: Move tcp_inbound_hash() from headers Two reasons: 1. It's grown up enough 2. In order to not do header spaghetti by including , which is necessary for TCP tracepoints. While at it, unexport and make static tcp_inbound_ao_hash(). Reviewed-by: Eric Dumazet Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: David S. Miller --- include/net/tcp.h | 78 +++---------------------------------------------------- 1 file changed, 4 insertions(+), 74 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index e5427b05129b..2aac11e7e1cc 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1863,12 +1863,6 @@ tcp_md5_do_lookup_any_l3index(const struct sock *sk, return __tcp_md5_do_lookup(sk, 0, addr, family, true); } -enum skb_drop_reason -tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, - const void *saddr, const void *daddr, - int family, int l3index, const __u8 *hash_location); - - #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) #else static inline struct tcp_md5sig_key * @@ -1885,13 +1879,6 @@ tcp_md5_do_lookup_any_l3index(const struct sock *sk, return NULL; } -static inline enum skb_drop_reason -tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, - const void *saddr, const void *daddr, - int family, int l3index, const __u8 *hash_location) -{ - return SKB_NOT_DROPPED_YET; -} #define tcp_twsk_md5_key(twsk) NULL #endif @@ -2806,66 +2793,9 @@ static inline bool tcp_ao_required(struct sock *sk, const void *saddr, return false; } -/* Called with rcu_read_lock() */ -static inline enum skb_drop_reason -tcp_inbound_hash(struct sock *sk, const struct request_sock *req, - const struct sk_buff *skb, - const void *saddr, const void *daddr, - int family, int dif, int sdif) -{ - const struct tcphdr *th = tcp_hdr(skb); - const struct tcp_ao_hdr *aoh; - const __u8 *md5_location; - int l3index; - - /* Invalid option or two times meet any of auth options */ - if (tcp_parse_auth_options(th, &md5_location, &aoh)) { - tcp_hash_fail("TCP segment has incorrect auth options set", - family, skb, ""); - return SKB_DROP_REASON_TCP_AUTH_HDR; - } - - if (req) { - if (tcp_rsk_used_ao(req) != !!aoh) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); - tcp_hash_fail("TCP connection can't start/end using TCP-AO", - family, skb, "%s", - !aoh ? "missing AO" : "AO signed"); - return SKB_DROP_REASON_TCP_AOFAILURE; - } - } - - /* sdif set, means packet ingressed via a device - * in an L3 domain and dif is set to the l3mdev - */ - l3index = sdif ? dif : 0; - - /* Fast path: unsigned segments */ - if (likely(!md5_location && !aoh)) { - /* Drop if there's TCP-MD5 or TCP-AO key with any rcvid/sndid - * for the remote peer. On TCP-AO established connection - * the last key is impossible to remove, so there's - * always at least one current_key. - */ - if (tcp_ao_required(sk, saddr, family, l3index, true)) { - tcp_hash_fail("AO hash is required, but not found", - family, skb, "L3 index %d", l3index); - return SKB_DROP_REASON_TCP_AONOTFOUND; - } - if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); - tcp_hash_fail("MD5 Hash not found", - family, skb, "L3 index %d", l3index); - return SKB_DROP_REASON_TCP_MD5NOTFOUND; - } - return SKB_NOT_DROPPED_YET; - } - - if (aoh) - return tcp_inbound_ao_hash(sk, skb, family, req, l3index, aoh); - - return tcp_inbound_md5_hash(sk, skb, saddr, daddr, family, - l3index, md5_location); -} +enum skb_drop_reason tcp_inbound_hash(struct sock *sk, + const struct request_sock *req, const struct sk_buff *skb, + const void *saddr, const void *daddr, + int family, int dif, int sdif); #endif /* _TCP_H */ -- cgit v1.2.3 From 96be3dcd013df6aa79acf32e739c0a35b89a4f50 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Fri, 7 Jun 2024 00:25:58 +0100 Subject: net/tcp: Add tcp-md5 and tcp-ao tracepoints Instead of forcing userspace to parse dmesg (that's what currently is happening, at least in codebase of my current company), provide a better way, that can be enabled/disabled in runtime. Currently, there are already tcp events, add hashing related ones there, too. Rasdaemon currently exercises net_dev_xmit_timeout, devlink_health_report, but it'll be trivial to teach it to deal with failed hashes. Otherwise, BGP may trace/log them itself. Especially exciting for possible investigations is key rotation (RNext_key requests). Suggested-by: Jakub Kicinski Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: David S. Miller --- include/trace/events/tcp.h | 317 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 317 insertions(+) (limited to 'include') diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 49b5ee091cf6..1c8bd8e186b8 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -411,6 +411,323 @@ TRACE_EVENT(tcp_cong_state_set, __entry->cong_state) ); +DECLARE_EVENT_CLASS(tcp_hash_event, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + + TP_ARGS(sk, skb), + + TP_STRUCT__entry( + __field(__u64, net_cookie) + __field(const void *, skbaddr) + __field(const void *, skaddr) + __field(int, state) + + /* sockaddr_in6 is always bigger than sockaddr_in */ + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) + __field(int, l3index) + + __field(__u16, sport) + __field(__u16, dport) + __field(__u16, family) + + __field(bool, fin) + __field(bool, syn) + __field(bool, rst) + __field(bool, psh) + __field(bool, ack) + ), + + TP_fast_assign( + const struct tcphdr *th = (const struct tcphdr *)skb->data; + + __entry->net_cookie = sock_net(sk)->net_cookie; + __entry->skbaddr = skb; + __entry->skaddr = sk; + __entry->state = sk->sk_state; + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + TP_STORE_ADDR_PORTS_SKB(skb, th, __entry->saddr, __entry->daddr); + __entry->l3index = inet_sdif(skb) ? inet_iif(skb) : 0; + + /* For filtering use */ + __entry->sport = ntohs(th->source); + __entry->dport = ntohs(th->dest); + __entry->family = sk->sk_family; + + __entry->fin = th->fin; + __entry->syn = th->syn; + __entry->rst = th->rst; + __entry->psh = th->psh; + __entry->ack = th->ack; + ), + + TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc L3index=%d [%c%c%c%c%c]", + __entry->net_cookie, + show_tcp_state_name(__entry->state), + show_family_name(__entry->family), + __entry->saddr, __entry->daddr, + __entry->l3index, + __entry->fin ? 'F' : ' ', + __entry->syn ? 'S' : ' ', + __entry->rst ? 'R' : ' ', + __entry->psh ? 'P' : ' ', + __entry->ack ? '.' : ' ') +); + +DEFINE_EVENT(tcp_hash_event, tcp_hash_bad_header, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + TP_ARGS(sk, skb) +); + +DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_required, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + TP_ARGS(sk, skb) +); + +DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_unexpected, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + TP_ARGS(sk, skb) +); + +DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_mismatch, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + TP_ARGS(sk, skb) +); + +DEFINE_EVENT(tcp_hash_event, tcp_hash_ao_required, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + TP_ARGS(sk, skb) +); + +DECLARE_EVENT_CLASS(tcp_ao_event, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + + TP_ARGS(sk, skb, keyid, rnext, maclen), + + TP_STRUCT__entry( + __field(__u64, net_cookie) + __field(const void *, skbaddr) + __field(const void *, skaddr) + __field(int, state) + + /* sockaddr_in6 is always bigger than sockaddr_in */ + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) + __field(int, l3index) + + __field(__u16, sport) + __field(__u16, dport) + __field(__u16, family) + + __field(bool, fin) + __field(bool, syn) + __field(bool, rst) + __field(bool, psh) + __field(bool, ack) + + __field(__u8, keyid) + __field(__u8, rnext) + __field(__u8, maclen) + ), + + TP_fast_assign( + const struct tcphdr *th = (const struct tcphdr *)skb->data; + + __entry->net_cookie = sock_net(sk)->net_cookie; + __entry->skbaddr = skb; + __entry->skaddr = sk; + __entry->state = sk->sk_state; + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + TP_STORE_ADDR_PORTS_SKB(skb, th, __entry->saddr, __entry->daddr); + __entry->l3index = inet_sdif(skb) ? inet_iif(skb) : 0; + + /* For filtering use */ + __entry->sport = ntohs(th->source); + __entry->dport = ntohs(th->dest); + __entry->family = sk->sk_family; + + __entry->fin = th->fin; + __entry->syn = th->syn; + __entry->rst = th->rst; + __entry->psh = th->psh; + __entry->ack = th->ack; + + __entry->keyid = keyid; + __entry->rnext = rnext; + __entry->maclen = maclen; + ), + + TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc L3index=%d [%c%c%c%c%c] keyid=%u rnext=%u maclen=%u", + __entry->net_cookie, + show_tcp_state_name(__entry->state), + show_family_name(__entry->family), + __entry->saddr, __entry->daddr, + __entry->l3index, + __entry->fin ? 'F' : ' ', + __entry->syn ? 'S' : ' ', + __entry->rst ? 'R' : ' ', + __entry->psh ? 'P' : ' ', + __entry->ack ? '.' : ' ', + __entry->keyid, __entry->rnext, __entry->maclen) +); + +DEFINE_EVENT(tcp_ao_event, tcp_ao_handshake_failure, + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + TP_ARGS(sk, skb, keyid, rnext, maclen) +); + +DEFINE_EVENT(tcp_ao_event, tcp_ao_wrong_maclen, + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + TP_ARGS(sk, skb, keyid, rnext, maclen) +); + +DEFINE_EVENT(tcp_ao_event, tcp_ao_mismatch, + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + TP_ARGS(sk, skb, keyid, rnext, maclen) +); + +DEFINE_EVENT(tcp_ao_event, tcp_ao_key_not_found, + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + TP_ARGS(sk, skb, keyid, rnext, maclen) +); + +DEFINE_EVENT(tcp_ao_event, tcp_ao_rnext_request, + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + TP_ARGS(sk, skb, keyid, rnext, maclen) +); + +DECLARE_EVENT_CLASS(tcp_ao_event_sk, + + TP_PROTO(const struct sock *sk, const __u8 keyid, const __u8 rnext), + + TP_ARGS(sk, keyid, rnext), + + TP_STRUCT__entry( + __field(__u64, net_cookie) + __field(const void *, skaddr) + __field(int, state) + + /* sockaddr_in6 is always bigger than sockaddr_in */ + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) + + __field(__u16, sport) + __field(__u16, dport) + __field(__u16, family) + + __field(__u8, keyid) + __field(__u8, rnext) + ), + + TP_fast_assign( + const struct inet_sock *inet = inet_sk(sk); + + __entry->net_cookie = sock_net(sk)->net_cookie; + __entry->skaddr = sk; + __entry->state = sk->sk_state; + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + TP_STORE_ADDR_PORTS(__entry, inet, sk); + + /* For filtering use */ + __entry->sport = ntohs(inet->inet_sport); + __entry->dport = ntohs(inet->inet_dport); + __entry->family = sk->sk_family; + + __entry->keyid = keyid; + __entry->rnext = rnext; + ), + + TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc keyid=%u rnext=%u", + __entry->net_cookie, + show_tcp_state_name(__entry->state), + show_family_name(__entry->family), + __entry->saddr, __entry->daddr, + __entry->keyid, __entry->rnext) +); + +DEFINE_EVENT(tcp_ao_event_sk, tcp_ao_synack_no_key, + TP_PROTO(const struct sock *sk, const __u8 keyid, const __u8 rnext), + TP_ARGS(sk, keyid, rnext) +); + +DECLARE_EVENT_CLASS(tcp_ao_event_sne, + + TP_PROTO(const struct sock *sk, __u32 new_sne), + + TP_ARGS(sk, new_sne), + + TP_STRUCT__entry( + __field(__u64, net_cookie) + __field(const void *, skaddr) + __field(int, state) + + /* sockaddr_in6 is always bigger than sockaddr_in */ + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) + + __field(__u16, sport) + __field(__u16, dport) + __field(__u16, family) + + __field(__u32, new_sne) + ), + + TP_fast_assign( + const struct inet_sock *inet = inet_sk(sk); + + __entry->net_cookie = sock_net(sk)->net_cookie; + __entry->skaddr = sk; + __entry->state = sk->sk_state; + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + TP_STORE_ADDR_PORTS(__entry, inet, sk); + + /* For filtering use */ + __entry->sport = ntohs(inet->inet_sport); + __entry->dport = ntohs(inet->inet_dport); + __entry->family = sk->sk_family; + + __entry->new_sne = new_sne; + ), + + TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc sne=%u", + __entry->net_cookie, + show_tcp_state_name(__entry->state), + show_family_name(__entry->family), + __entry->saddr, __entry->daddr, + __entry->new_sne) +); + +DEFINE_EVENT(tcp_ao_event_sne, tcp_ao_snd_sne_update, + TP_PROTO(const struct sock *sk, __u32 new_sne), + TP_ARGS(sk, new_sne) +); + +DEFINE_EVENT(tcp_ao_event_sne, tcp_ao_rcv_sne_update, + TP_PROTO(const struct sock *sk, __u32 new_sne), + TP_ARGS(sk, new_sne) +); + #endif /* _TRACE_TCP_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 78b1b27db91c7a94297a8b6a665fe7e86dfc5750 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Fri, 7 Jun 2024 00:25:59 +0100 Subject: net/tcp: Remove tcp_hash_fail() Now there are tracepoints, that cover all functionality of tcp_hash_fail(), but also wire up missing places They are also faster, can be disabled and provide filtering. This potentially may create a regression if a userspace depends on dmesg logs. Fingers crossed, let's see if anyone complains in reality. Reviewed-by: Eric Dumazet Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: David S. Miller --- include/net/tcp_ao.h | 37 ------------------------------------- 1 file changed, 37 deletions(-) (limited to 'include') diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index 198e02004ad2..1d46460d0fef 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -149,43 +149,6 @@ extern struct static_key_false_deferred tcp_ao_needed; #define static_branch_tcp_ao() false #endif -static inline bool tcp_hash_should_produce_warnings(void) -{ - return static_branch_tcp_md5() || static_branch_tcp_ao(); -} - -#define tcp_hash_fail(msg, family, skb, fmt, ...) \ -do { \ - const struct tcphdr *th = tcp_hdr(skb); \ - char hdr_flags[6]; \ - char *f = hdr_flags; \ - \ - if (!tcp_hash_should_produce_warnings()) \ - break; \ - if (th->fin) \ - *f++ = 'F'; \ - if (th->syn) \ - *f++ = 'S'; \ - if (th->rst) \ - *f++ = 'R'; \ - if (th->psh) \ - *f++ = 'P'; \ - if (th->ack) \ - *f++ = '.'; \ - *f = 0; \ - if ((family) == AF_INET) { \ - net_info_ratelimited("%s for %pI4.%d->%pI4.%d [%s] " fmt "\n", \ - msg, &ip_hdr(skb)->saddr, ntohs(th->source), \ - &ip_hdr(skb)->daddr, ntohs(th->dest), \ - hdr_flags, ##__VA_ARGS__); \ - } else { \ - net_info_ratelimited("%s for [%pI6c].%d->[%pI6c].%d [%s]" fmt "\n", \ - msg, &ipv6_hdr(skb)->saddr, ntohs(th->source), \ - &ipv6_hdr(skb)->daddr, ntohs(th->dest), \ - hdr_flags, ##__VA_ARGS__); \ - } \ -} while (0) - #ifdef CONFIG_TCP_AO /* TCP-AO structures and functions */ struct tcp4_ao_context { -- cgit v1.2.3 From 249ebf3f65f8530beb2cbfb91bff1d83ba88d23c Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 5 Jun 2024 14:38:49 +0200 Subject: power: sequencing: implement the pwrseq core Implement the power sequencing subsystem allowing devices to share complex powering-up and down procedures. It's split into the consumer and provider parts but does not implement any new DT bindings so that the actual power sequencing is never revealed in the DT representation. Tested-by: Amit Pundir Tested-by: Neil Armstrong # on SM8550-QRD, SM8650-QRD & SM8650-HDK Tested-by: Caleb Connolly # OnePlus 8T Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240605123850.24857-2-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- include/linux/pwrseq/consumer.h | 56 ++++++++++++++++++++++++++++++ include/linux/pwrseq/provider.h | 75 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 include/linux/pwrseq/consumer.h create mode 100644 include/linux/pwrseq/provider.h (limited to 'include') diff --git a/include/linux/pwrseq/consumer.h b/include/linux/pwrseq/consumer.h new file mode 100644 index 000000000000..7d583b4f266e --- /dev/null +++ b/include/linux/pwrseq/consumer.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 Linaro Ltd. + */ + +#ifndef __POWER_SEQUENCING_CONSUMER_H__ +#define __POWER_SEQUENCING_CONSUMER_H__ + +#include + +struct device; +struct pwrseq_desc; + +#if IS_ENABLED(CONFIG_POWER_SEQUENCING) + +struct pwrseq_desc * __must_check +pwrseq_get(struct device *dev, const char *target); +void pwrseq_put(struct pwrseq_desc *desc); + +struct pwrseq_desc * __must_check +devm_pwrseq_get(struct device *dev, const char *target); + +int pwrseq_power_on(struct pwrseq_desc *desc); +int pwrseq_power_off(struct pwrseq_desc *desc); + +#else /* CONFIG_POWER_SEQUENCING */ + +static inline struct pwrseq_desc * __must_check +pwrseq_get(struct device *dev, const char *target) +{ + return ERR_PTR(-ENOSYS); +} + +static inline void pwrseq_put(struct pwrseq_desc *desc) +{ +} + +static inline struct pwrseq_desc * __must_check +devm_pwrseq_get(struct device *dev, const char *target) +{ + return ERR_PTR(-ENOSYS); +} + +static inline int pwrseq_power_on(struct pwrseq_desc *desc) +{ + return -ENOSYS; +} + +static inline int pwrseq_power_off(struct pwrseq_desc *desc) +{ + return -ENOSYS; +} + +#endif /* CONFIG_POWER_SEQUENCING */ + +#endif /* __POWER_SEQUENCING_CONSUMER_H__ */ diff --git a/include/linux/pwrseq/provider.h b/include/linux/pwrseq/provider.h new file mode 100644 index 000000000000..cbc3607cbfcf --- /dev/null +++ b/include/linux/pwrseq/provider.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 Linaro Ltd. + */ + +#ifndef __POWER_SEQUENCING_PROVIDER_H__ +#define __POWER_SEQUENCING_PROVIDER_H__ + +struct device; +struct module; +struct pwrseq_device; + +typedef int (*pwrseq_power_state_func)(struct pwrseq_device *); +typedef int (*pwrseq_match_func)(struct pwrseq_device *, struct device *); + +/** + * struct pwrseq_unit_data - Configuration of a single power sequencing + * unit. + * @name: Name of the unit. + * @deps: Units that must be enabled before this one and disabled after it + * in the order they come in this array. Must be NULL-terminated. + * @enable: Callback running the part of the power-on sequence provided by + * this unit. + * @disable: Callback running the part of the power-off sequence provided + * by this unit. + */ +struct pwrseq_unit_data { + const char *name; + const struct pwrseq_unit_data **deps; + pwrseq_power_state_func enable; + pwrseq_power_state_func disable; +}; + +/** + * struct pwrseq_target_data - Configuration of a power sequencing target. + * @name: Name of the target. + * @unit: Final unit that this target must reach in order to be considered + * enabled. + * @post_enable: Callback run after the target unit has been enabled, *after* + * the state lock has been released. It's useful for implementing + * boot-up delays without blocking other users from powering up + * using the same power sequencer. + */ +struct pwrseq_target_data { + const char *name; + const struct pwrseq_unit_data *unit; + pwrseq_power_state_func post_enable; +}; + +/** + * struct pwrseq_config - Configuration used for registering a new provider. + * @parent: Parent device for the sequencer. Must be set. + * @owner: Module providing this device. + * @drvdata: Private driver data. + * @match: Provider callback used to match the consumer device to the sequencer. + * @targets: Array of targets for this power sequencer. Must be NULL-terminated. + */ +struct pwrseq_config { + struct device *parent; + struct module *owner; + void *drvdata; + pwrseq_match_func match; + const struct pwrseq_target_data **targets; +}; + +struct pwrseq_device * +pwrseq_device_register(const struct pwrseq_config *config); +void pwrseq_device_unregister(struct pwrseq_device *pwrseq); +struct pwrseq_device * +devm_pwrseq_device_register(struct device *dev, + const struct pwrseq_config *config); + +void *pwrseq_device_get_drvdata(struct pwrseq_device *pwrseq); + +#endif /* __POWER_SEQUENCING_PROVIDER_H__ */ -- cgit v1.2.3 From 000d1940c90984a9a2af9c02bc17e3ca0d87f71d Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 7 Jun 2024 16:22:58 +0300 Subject: drm/connector: hdmi: allow disabling Audio Infoframe Add drm_atomic_helper_connector_hdmi_disable_audio_infoframe(), an API to allow the driver disable sending the Audio Infoframe. This is to be used by the drivers if setup of the infoframes is not tightly coupled with the audio functionality and just disabling the audio playback doesn't stop the HDMI hardware from sending the Infoframe. Acked-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240607-bridge-hdmi-connector-v5-1-ab384e6021af@linaro.org Signed-off-by: Dmitry Baryshkov --- include/drm/display/drm_hdmi_state_helper.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/display/drm_hdmi_state_helper.h b/include/drm/display/drm_hdmi_state_helper.h index eb162ff24de0..285f366cf716 100644 --- a/include/drm/display/drm_hdmi_state_helper.h +++ b/include/drm/display/drm_hdmi_state_helper.h @@ -16,6 +16,7 @@ int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector, int drm_atomic_helper_connector_hdmi_update_audio_infoframe(struct drm_connector *connector, struct hdmi_audio_infoframe *frame); +int drm_atomic_helper_connector_hdmi_disable_audio_infoframe(struct drm_connector *connector); int drm_atomic_helper_connector_hdmi_update_infoframes(struct drm_connector *connector, struct drm_atomic_state *state); -- cgit v1.2.3 From 6b4468b0c6ba37a16795da567b58dc80bc7fb439 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 7 Jun 2024 16:23:00 +0300 Subject: drm/bridge-connector: implement glue code for HDMI connector In order to let bridge chains implement HDMI connector infrastructure, add necessary glue code to the drm_bridge_connector. In case there is a bridge that sets DRM_BRIDGE_OP_HDMI, drm_bridge_connector will register itself as a HDMI connector and provide proxy drm_connector_hdmi_funcs implementation. Note, to simplify implementation, there can be only one bridge in a chain that sets DRM_BRIDGE_OP_HDMI. Setting more than one is considered an error. This limitation can be lifted later, if the need arises. Acked-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240607-bridge-hdmi-connector-v5-3-ab384e6021af@linaro.org Signed-off-by: Dmitry Baryshkov --- include/drm/drm_bridge.h | 81 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) (limited to 'include') diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 5cf41f92d1f0..75019d16be64 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -630,6 +630,52 @@ struct drm_bridge_funcs { */ void (*hpd_disable)(struct drm_bridge *bridge); + /** + * @hdmi_tmds_char_rate_valid: + * + * Check whether a particular TMDS character rate is supported by the + * driver. + * + * This callback is optional and should only be implemented by the + * bridges that take part in the HDMI connector implementation. Bridges + * that implement it shall set the DRM_BRIDGE_OP_HDMI flag in their + * &drm_bridge->ops. + * + * Returns: + * + * Either &drm_mode_status.MODE_OK or one of the failure reasons + * in &enum drm_mode_status. + */ + enum drm_mode_status + (*hdmi_tmds_char_rate_valid)(const struct drm_bridge *bridge, + const struct drm_display_mode *mode, + unsigned long long tmds_rate); + + /** + * @hdmi_clear_infoframe: + * + * This callback clears the infoframes in the hardware during commit. + * It will be called multiple times, once for every disabled infoframe + * type. + * + * This callback is optional but it must be implemented by bridges that + * set the DRM_BRIDGE_OP_HDMI flag in their &drm_bridge->ops. + */ + int (*hdmi_clear_infoframe)(struct drm_bridge *bridge, + enum hdmi_infoframe_type type); + /** + * @hdmi_write_infoframe: + * + * Program the infoframe into the hardware. It will be called multiple + * times, once for every updated infoframe type. + * + * This callback is optional but it must be implemented by bridges that + * set the DRM_BRIDGE_OP_HDMI flag in their &drm_bridge->ops. + */ + int (*hdmi_write_infoframe)(struct drm_bridge *bridge, + enum hdmi_infoframe_type type, + const u8 *buffer, size_t len); + /** * @debugfs_init: * @@ -705,6 +751,16 @@ enum drm_bridge_ops { * this flag shall implement the &drm_bridge_funcs->get_modes callback. */ DRM_BRIDGE_OP_MODES = BIT(3), + /** + * @DRM_BRIDGE_OP_HDMI: The bridge provides HDMI connector operations, + * including infoframes support. Bridges that set this flag must + * implement the &drm_bridge_funcs->write_infoframe callback. + * + * Note: currently there can be at most one bridge in a chain that sets + * this bit. This is to simplify corresponding glue code in connector + * drivers. + */ + DRM_BRIDGE_OP_HDMI = BIT(4), }; /** @@ -773,6 +829,31 @@ struct drm_bridge { * @hpd_cb. */ void *hpd_data; + + /** + * @vendor: Vendor of the product to be used for the SPD InfoFrame + * generation. This is required if @DRM_BRIDGE_OP_HDMI is set. + */ + const char *vendor; + + /** + * @product: Name of the product to be used for the SPD InfoFrame + * generation. This is required if @DRM_BRIDGE_OP_HDMI is set. + */ + const char *product; + + /** + * @supported_formats: Bitmask of @hdmi_colorspace listing supported + * output formats. This is only relevant if @DRM_BRIDGE_OP_HDMI is set. + */ + unsigned int supported_formats; + + /** + * @max_bpc: Maximum bits per char the HDMI bridge supports. Allowed + * values are 8, 10 and 12. This is only relevant if + * @DRM_BRIDGE_OP_HDMI is set. + */ + unsigned int max_bpc; }; static inline struct drm_bridge * -- cgit v1.2.3 From 8682ad36870790ad6a9a03ac237c8d87d34b26d5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:09:43 +0200 Subject: wifi: cfg80211: use BIT() for flag enums Use BIT(x) instead of 1< Reviewed-by: Ilan Peer Signed-off-by: Johannes Berg Link: https://msgid.link/20240523120945.c21598fbf49c.Ib8f26c5e9f508aee19fdfa1fd4b5995f084c46d4@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 102 ++++++++++++++++++++++++------------------------- 1 file changed, 51 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5da9bb0ac6a4..051d4eb227bf 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -127,31 +127,31 @@ struct wiphy; * even if it is otherwise disabled. */ enum ieee80211_channel_flags { - IEEE80211_CHAN_DISABLED = 1<<0, - IEEE80211_CHAN_NO_IR = 1<<1, - IEEE80211_CHAN_PSD = 1<<2, - IEEE80211_CHAN_RADAR = 1<<3, - IEEE80211_CHAN_NO_HT40PLUS = 1<<4, - IEEE80211_CHAN_NO_HT40MINUS = 1<<5, - IEEE80211_CHAN_NO_OFDM = 1<<6, - IEEE80211_CHAN_NO_80MHZ = 1<<7, - IEEE80211_CHAN_NO_160MHZ = 1<<8, - IEEE80211_CHAN_INDOOR_ONLY = 1<<9, - IEEE80211_CHAN_IR_CONCURRENT = 1<<10, - IEEE80211_CHAN_NO_20MHZ = 1<<11, - IEEE80211_CHAN_NO_10MHZ = 1<<12, - IEEE80211_CHAN_NO_HE = 1<<13, - IEEE80211_CHAN_1MHZ = 1<<14, - IEEE80211_CHAN_2MHZ = 1<<15, - IEEE80211_CHAN_4MHZ = 1<<16, - IEEE80211_CHAN_8MHZ = 1<<17, - IEEE80211_CHAN_16MHZ = 1<<18, - IEEE80211_CHAN_NO_320MHZ = 1<<19, - IEEE80211_CHAN_NO_EHT = 1<<20, - IEEE80211_CHAN_DFS_CONCURRENT = 1<<21, - IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT = 1<<22, - IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT = 1<<23, - IEEE80211_CHAN_CAN_MONITOR = 1<<24, + IEEE80211_CHAN_DISABLED = BIT(0), + IEEE80211_CHAN_NO_IR = BIT(1), + IEEE80211_CHAN_PSD = BIT(2), + IEEE80211_CHAN_RADAR = BIT(3), + IEEE80211_CHAN_NO_HT40PLUS = BIT(4), + IEEE80211_CHAN_NO_HT40MINUS = BIT(5), + IEEE80211_CHAN_NO_OFDM = BIT(6), + IEEE80211_CHAN_NO_80MHZ = BIT(7), + IEEE80211_CHAN_NO_160MHZ = BIT(8), + IEEE80211_CHAN_INDOOR_ONLY = BIT(9), + IEEE80211_CHAN_IR_CONCURRENT = BIT(10), + IEEE80211_CHAN_NO_20MHZ = BIT(11), + IEEE80211_CHAN_NO_10MHZ = BIT(12), + IEEE80211_CHAN_NO_HE = BIT(13), + IEEE80211_CHAN_1MHZ = BIT(14), + IEEE80211_CHAN_2MHZ = BIT(15), + IEEE80211_CHAN_4MHZ = BIT(16), + IEEE80211_CHAN_8MHZ = BIT(17), + IEEE80211_CHAN_16MHZ = BIT(18), + IEEE80211_CHAN_NO_320MHZ = BIT(19), + IEEE80211_CHAN_NO_EHT = BIT(20), + IEEE80211_CHAN_DFS_CONCURRENT = BIT(21), + IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT = BIT(22), + IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT = BIT(23), + IEEE80211_CHAN_CAN_MONITOR = BIT(24), }; #define IEEE80211_CHAN_NO_HT40 \ @@ -229,13 +229,13 @@ struct ieee80211_channel { * @IEEE80211_RATE_SUPPORTS_10MHZ: Rate can be used in 10 MHz mode */ enum ieee80211_rate_flags { - IEEE80211_RATE_SHORT_PREAMBLE = 1<<0, - IEEE80211_RATE_MANDATORY_A = 1<<1, - IEEE80211_RATE_MANDATORY_B = 1<<2, - IEEE80211_RATE_MANDATORY_G = 1<<3, - IEEE80211_RATE_ERP_G = 1<<4, - IEEE80211_RATE_SUPPORTS_5MHZ = 1<<5, - IEEE80211_RATE_SUPPORTS_10MHZ = 1<<6, + IEEE80211_RATE_SHORT_PREAMBLE = BIT(0), + IEEE80211_RATE_MANDATORY_A = BIT(1), + IEEE80211_RATE_MANDATORY_B = BIT(2), + IEEE80211_RATE_MANDATORY_G = BIT(3), + IEEE80211_RATE_ERP_G = BIT(4), + IEEE80211_RATE_SUPPORTS_5MHZ = BIT(5), + IEEE80211_RATE_SUPPORTS_10MHZ = BIT(6), }; /** @@ -1957,9 +1957,9 @@ struct rate_info { * @BSS_PARAM_FLAGS_SHORT_SLOT_TIME: whether short slot time is enabled */ enum bss_param_flags { - BSS_PARAM_FLAGS_CTS_PROT = 1<<0, - BSS_PARAM_FLAGS_SHORT_PREAMBLE = 1<<1, - BSS_PARAM_FLAGS_SHORT_SLOT_TIME = 1<<2, + BSS_PARAM_FLAGS_CTS_PROT = BIT(0), + BSS_PARAM_FLAGS_SHORT_PREAMBLE = BIT(1), + BSS_PARAM_FLAGS_SHORT_SLOT_TIME = BIT(2), }; /** @@ -2266,13 +2266,13 @@ static inline int cfg80211_get_station(struct net_device *dev, * @MONITOR_FLAG_ACTIVE: active monitor, ACKs frames on its MAC address */ enum monitor_flags { - MONITOR_FLAG_CHANGED = 1<<__NL80211_MNTR_FLAG_INVALID, - MONITOR_FLAG_FCSFAIL = 1< Date: Thu, 23 May 2024 12:09:44 +0200 Subject: wifi: ieee80211: remove unused enum ieee80211_client_reg_power This has never been used, and it's really not directly representing the spec, so shouldn't have been here in the first place. Remove it. Reviewed-by: Miriam Rachel Korenblit Signed-off-by: Johannes Berg Link: https://msgid.link/20240523120945.32ed8fc1522d.Id4480d162e1921478e33d145890dc16c263b57bf@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 769008a51809..456a55bd6c6b 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2426,24 +2426,6 @@ enum ieee80211_ap_reg_power { IEEE80211_REG_AP_POWER_AFTER_LAST - 1, }; -/** - * enum ieee80211_client_reg_power - regulatory power for a client - * - * @IEEE80211_REG_UNSET_CLIENT: Client has no regulatory power mode - * @IEEE80211_REG_DEFAULT_CLIENT: Default Client - * @IEEE80211_REG_SUBORDINATE_CLIENT: Subordinate Client - * @IEEE80211_REG_CLIENT_POWER_AFTER_LAST: internal - * @IEEE80211_REG_CLIENT_POWER_MAX: maximum value - */ -enum ieee80211_client_reg_power { - IEEE80211_REG_UNSET_CLIENT, - IEEE80211_REG_DEFAULT_CLIENT, - IEEE80211_REG_SUBORDINATE_CLIENT, - IEEE80211_REG_CLIENT_POWER_AFTER_LAST, - IEEE80211_REG_CLIENT_POWER_MAX = - IEEE80211_REG_CLIENT_POWER_AFTER_LAST - 1, -}; - /* 802.11ax HE MAC capabilities */ #define IEEE80211_HE_MAC_CAP0_HTC_HE 0x01 #define IEEE80211_HE_MAC_CAP0_TWT_REQ 0x02 -- cgit v1.2.3 From 0a9314ad5f45aeb10326dce33c5d70b85f74ef2d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:09:45 +0200 Subject: wifi: cfg80211: move enum ieee80211_ap_reg_power to cfg80211 This really shouldn't have been in ieee80211.h, since it doesn't directly represent the spec. Move it to cfg80211 rather than mac80211 since upcoming changes will use it there. Reviewed-by: Miriam Rachel Korenblit Signed-off-by: Johannes Berg Link: https://msgid.link/20240523120945.962b16c831cd.I5745962525b1b176c5b90d37b3720fc100eee406@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 20 -------------------- include/net/cfg80211.h | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 456a55bd6c6b..30cef3b940eb 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2406,26 +2406,6 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, int mcs, bool ext_nss_bw_capable, unsigned int max_vht_nss); -/** - * enum ieee80211_ap_reg_power - regulatory power for a Access Point - * - * @IEEE80211_REG_UNSET_AP: Access Point has no regulatory power mode - * @IEEE80211_REG_LPI_AP: Indoor Access Point - * @IEEE80211_REG_SP_AP: Standard power Access Point - * @IEEE80211_REG_VLP_AP: Very low power Access Point - * @IEEE80211_REG_AP_POWER_AFTER_LAST: internal - * @IEEE80211_REG_AP_POWER_MAX: maximum value - */ -enum ieee80211_ap_reg_power { - IEEE80211_REG_UNSET_AP, - IEEE80211_REG_LPI_AP, - IEEE80211_REG_SP_AP, - IEEE80211_REG_VLP_AP, - IEEE80211_REG_AP_POWER_AFTER_LAST, - IEEE80211_REG_AP_POWER_MAX = - IEEE80211_REG_AP_POWER_AFTER_LAST - 1, -}; - /* 802.11ax HE MAC capabilities */ #define IEEE80211_HE_MAC_CAP0_HTC_HE 0x01 #define IEEE80211_HE_MAC_CAP0_TWT_REQ 0x02 diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 051d4eb227bf..fb7d76513e1c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6081,6 +6081,21 @@ void wiphy_delayed_work_cancel(struct wiphy *wiphy, void wiphy_delayed_work_flush(struct wiphy *wiphy, struct wiphy_delayed_work *dwork); +/** + * enum ieee80211_ap_reg_power - regulatory power for an Access Point + * + * @IEEE80211_REG_UNSET_AP: Access Point has no regulatory power mode + * @IEEE80211_REG_LPI_AP: Indoor Access Point + * @IEEE80211_REG_SP_AP: Standard power Access Point + * @IEEE80211_REG_VLP_AP: Very low power Access Point + */ +enum ieee80211_ap_reg_power { + IEEE80211_REG_UNSET_AP, + IEEE80211_REG_LPI_AP, + IEEE80211_REG_SP_AP, + IEEE80211_REG_VLP_AP, +}; + /** * struct wireless_dev - wireless device state * -- cgit v1.2.3 From 9fd171a71b9d4cd8855891c0ba7e2a152139b41a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:09:48 +0200 Subject: wifi: cfg80211: refactor regulatory beaconing checking There are two functions exported now, with different settings, refactor to just export a single function that take a struct with different settings. This will make it easier to add more parameters. Reviewed-by: Miriam Rachel Korenblit Signed-off-by: Johannes Berg Link: https://msgid.link/20240523120945.d44c34dadfc2.I59b4403108e0dbf7fc6ae8f7522e1af520cffb1c@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 54 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fb7d76513e1c..45ffeb110d36 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -8800,6 +8800,31 @@ static inline void cfg80211_report_obss_beacon(struct wiphy *wiphy, sig_dbm); } +/** + * struct cfg80211_beaconing_check_config - beacon check configuration + * @iftype: the interface type to check for + * @relax: allow IR-relaxation conditions to apply (e.g. another + * interface connected already on the same channel) + * NOTE: If this is set, wiphy mutex must be held. + */ +struct cfg80211_beaconing_check_config { + enum nl80211_iftype iftype; + bool relax; +}; + +/** + * cfg80211_reg_check_beaconing - check if beaconing is allowed + * @wiphy: the wiphy + * @chandef: the channel definition + * @cfg: additional parameters for the checking + * + * Return: %true if there is no secondary channel or the secondary channel(s) + * can be used for beaconing (i.e. is not a radar channel etc.) + */ +bool cfg80211_reg_check_beaconing(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + struct cfg80211_beaconing_check_config *cfg); + /** * cfg80211_reg_can_beacon - check if beaconing is allowed * @wiphy: the wiphy @@ -8809,9 +8834,17 @@ static inline void cfg80211_report_obss_beacon(struct wiphy *wiphy, * Return: %true if there is no secondary channel or the secondary channel(s) * can be used for beaconing (i.e. is not a radar channel etc.) */ -bool cfg80211_reg_can_beacon(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, - enum nl80211_iftype iftype); +static inline bool +cfg80211_reg_can_beacon(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype) +{ + struct cfg80211_beaconing_check_config config = { + .iftype = iftype, + }; + + return cfg80211_reg_check_beaconing(wiphy, chandef, &config); +} /** * cfg80211_reg_can_beacon_relax - check if beaconing is allowed with relaxation @@ -8826,9 +8859,18 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, * * Context: Requires the wiphy mutex to be held. */ -bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, - enum nl80211_iftype iftype); +static inline bool +cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype) +{ + struct cfg80211_beaconing_check_config config = { + .iftype = iftype, + .relax = true, + }; + + return cfg80211_reg_check_beaconing(wiphy, chandef, &config); +} /** * cfg80211_ch_switch_notify - update wdev channel and notify userspace -- cgit v1.2.3 From c1d8bd8d777d55f6708ca6e47c54dbe9f66f9bbb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:09:49 +0200 Subject: wifi: cfg80211: add regulatory flag to allow VLP AP operation Add a regulatory flag to allow VLP AP operation even on channels otherwise marked NO_IR, which may be possible in some regulatory domains/countries. Note that this requires checking also when the beacon is changed, since that may change the regulatory power type. Reviewed-by: Miriam Rachel Korenblit Signed-off-by: Johannes Berg Link: https://msgid.link/20240523120945.63792ce19790.Ie2a02750d283b78fbf3c686b10565fb0388889e2@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 ++++++ include/uapi/linux/nl80211.h | 6 ++++++ 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 45ffeb110d36..6f992aff74ae 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -125,6 +125,8 @@ struct wiphy; * @IEEE80211_CHAN_CAN_MONITOR: This channel can be used for monitor * mode even in the presence of other (regulatory) restrictions, * even if it is otherwise disabled. + * @IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP: Allow using this channel for AP operation + * with very low power (VLP), even if otherwise set to NO_IR. */ enum ieee80211_channel_flags { IEEE80211_CHAN_DISABLED = BIT(0), @@ -152,6 +154,7 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT = BIT(22), IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT = BIT(23), IEEE80211_CHAN_CAN_MONITOR = BIT(24), + IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP = BIT(25), }; #define IEEE80211_CHAN_NO_HT40 \ @@ -8806,9 +8809,12 @@ static inline void cfg80211_report_obss_beacon(struct wiphy *wiphy, * @relax: allow IR-relaxation conditions to apply (e.g. another * interface connected already on the same channel) * NOTE: If this is set, wiphy mutex must be held. + * @reg_power: &enum ieee80211_ap_reg_power value indicating the + * advertised/used 6 GHz regulatory power setting */ struct cfg80211_beaconing_check_config { enum nl80211_iftype iftype; + enum ieee80211_ap_reg_power reg_power; bool relax; }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f917bc6c9b6f..6ae3997061b6 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4277,6 +4277,8 @@ enum nl80211_wmm_rule { * @NL80211_FREQUENCY_ATTR_CAN_MONITOR: This channel can be used in monitor * mode despite other (regulatory) restrictions, even if the channel is * otherwise completely disabled. + * @NL80211_FREQUENCY_ATTR_ALLOW_6GHZ_VLP_AP: This channel can be used for a + * very low power (VLP) AP, despite being NO_IR. * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -4320,6 +4322,7 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_NO_6GHZ_VLP_CLIENT, NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT, NL80211_FREQUENCY_ATTR_CAN_MONITOR, + NL80211_FREQUENCY_ATTR_ALLOW_6GHZ_VLP_AP, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, @@ -4529,6 +4532,8 @@ enum nl80211_sched_scan_match_attr { * Should be used together with %NL80211_RRF_DFS only. * @NL80211_RRF_NO_6GHZ_VLP_CLIENT: Client connection to VLP AP not allowed * @NL80211_RRF_NO_6GHZ_AFC_CLIENT: Client connection to AFC AP not allowed + * @NL80211_RRF_ALLOW_6GHZ_VLP_AP: Very low power (VLP) AP can be permitted + * despite NO_IR configuration. */ enum nl80211_reg_rule_flags { NL80211_RRF_NO_OFDM = 1<<0, @@ -4553,6 +4558,7 @@ enum nl80211_reg_rule_flags { NL80211_RRF_DFS_CONCURRENT = 1<<21, NL80211_RRF_NO_6GHZ_VLP_CLIENT = 1<<22, NL80211_RRF_NO_6GHZ_AFC_CLIENT = 1<<23, + NL80211_RRF_ALLOW_6GHZ_VLP_AP = 1<<24, }; #define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR -- cgit v1.2.3 From 4565d2652a37e438e4cd729e2a8dfeffe34c958c Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 12 Jun 2024 10:20:17 +0200 Subject: PCI/pwrctl: Add PCI power control core code Some PCI devices must be powered-on before they can be detected on the bus. Introduce a simple framework reusing the existing PCI OF infrastructure. The way this works is: a DT node representing a PCI device connected to the port can be matched against its power control platform driver. If the match succeeds, the driver is responsible for powering-up the device and calling pci_pwrctl_device_set_ready() which will trigger a PCI bus rescan as well as subscribe to PCI bus notifications. When the device is detected and created, we'll make it consume the same DT node that the platform device did. When the device is bound, we'll create a device link between it and the parent power control device. Tested-by: Amit Pundir Tested-by: Neil Armstrong # on SM8550-QRD, SM8650-QRD & SM8650-HDK Tested-by: Caleb Connolly # OnePlus 8T Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20240612082019.19161-5-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- include/linux/pci-pwrctl.h | 51 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 include/linux/pci-pwrctl.h (limited to 'include') diff --git a/include/linux/pci-pwrctl.h b/include/linux/pci-pwrctl.h new file mode 100644 index 000000000000..45e9cfe740e4 --- /dev/null +++ b/include/linux/pci-pwrctl.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 Linaro Ltd. + */ + +#ifndef __PCI_PWRCTL_H__ +#define __PCI_PWRCTL_H__ + +#include + +struct device; +struct device_link; + +/* + * This is a simple framework for solving the issue of PCI devices that require + * certain resources (regulators, GPIOs, clocks) to be enabled before the + * device can actually be detected on the PCI bus. + * + * The idea is to reuse the platform bus to populate OF nodes describing the + * PCI device and its resources, let these platform devices probe and enable + * relevant resources and then trigger a rescan of the PCI bus allowing for the + * same device (with a second associated struct device) to be registered with + * the PCI subsystem. + * + * To preserve a correct hierarchy for PCI power management and device reset, + * we create a device link between the power control platform device (parent) + * and the supplied PCI device (child). + */ + +/** + * struct pci_pwrctl - PCI device power control context. + * @dev: Address of the power controlling device. + * + * An object of this type must be allocated by the PCI power control device and + * passed to the pwrctl subsystem to trigger a bus rescan and setup a device + * link with the device once it's up. + */ +struct pci_pwrctl { + struct device *dev; + + /* Private: don't use. */ + struct notifier_block nb; + struct device_link *link; +}; + +int pci_pwrctl_device_set_ready(struct pci_pwrctl *pwrctl); +void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl); +int devm_pci_pwrctl_device_set_ready(struct device *dev, + struct pci_pwrctl *pwrctl); + +#endif /* __PCI_PWRCTL_H__ */ -- cgit v1.2.3 From 7180f8d91fcbf252de572d9ffacc945effed0060 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 11 Jun 2024 19:38:22 +0200 Subject: vfs: add rcu-based find_inode variants for iget ops This avoids one inode hash lock acquire in the common case on inode creation, in effect significantly reducing contention. On the stock kernel said lock is typically taken twice: 1. once to check if the inode happens to already be present 2. once to add it to the hash The back-to-back lock/unlock pattern is known to degrade performance significantly, which is further exacerbated if the hash is heavily populated (long chains to walk, extending hold time). Arguably hash sizing and hashing algo need to be revisited, but that's beyond the scope of this patch. With the acquire from step 1 eliminated with RCU lookup throughput increases significantly at the scale of 20 cores (benchmark results at the bottom). So happens the hash already supports RCU-based operation, but lookups on inode insertions didn't take advantage of it. This of course has its limits as the global lock is still a bottleneck. There was a patchset posted which introduced fine-grained locking[1] but it appears staled. Apart from that doubt was expressed whether a handrolled hash implementation is appropriate to begin with, suggesting replacement with rhashtables. Nobody committed to carrying [1] across the finish line or implementing anything better, thus the bandaid below. iget_locked consumers (notably ext4) get away without any changes because inode comparison method is built-in. iget5_locked consumers pass a custom callback. Since removal of locking adds more problems (inode can be changing) it's not safe to assume all filesystems happen to cope. Thus iget5_locked_rcu gets added, requiring manual conversion of interested filesystems. In order to reduce code duplication find_inode and find_inode_fast grow an argument indicating whether inode hash lock is held, which is passed down in case sleeping is necessary. They always rcu_read_lock, which is redundant but harmless. Doing it conditionally reduces readability for no real gain that I can see. RCU-alike restrictions were already put on callbacks due to the hash spinlock being held. Benchmarking: There is a real cache-busting workload scanning millions of files in parallel (it's a backup appliance), where the initial lookup is guaranteed to fail resulting in the two lock acquires on stock kernel (and one with the patch at hand). Implemented below is a synthetic benchmark providing the same behavior. [I shall note the workload is not running on Linux, instead it was causing trouble elsewhere. Benchmark below was used while addressing said problems and was found to adequately represent the real workload.] Total real time fluctuates by 1-2s. With 20 threads each walking a dedicated 1000 dirs * 1000 files directory tree to stat(2) on a 32 core + 24GB RAM vm: ext4 (needed mkfs.ext4 -N 24000000): before: 3.77s user 890.90s system 1939% cpu 46.118 total after: 3.24s user 397.73s system 1858% cpu 21.581 total (-53%) That's 20 million files to visit, while the machine can only cache about 15 million at a time (obtained from ext4_inode_cache object count in /proc/slabinfo). Since each terminal inode is only visited once per run this amounts to 0% hit ratio for the dentry cache and the hash table (there are however hits for the intermediate directories). On repeated runs the kernel caches the last ~15 mln, meaning there is ~5 mln of uncached inodes which are going to be visited first, evicting the previously cached state as it happens. Lack of hits can be trivially verified with bpftrace, like so: bpftrace -e 'kretprobe:find_inode_fast { @[kstack(), retval != 0] = count(); }'\ -c "/bin/sh walktrees /testfs 20" Best ran more than once. Expected results after "warmup": [snip] @[ __ext4_iget+275 ext4_lookup+224 __lookup_slow+130 walk_component+219 link_path_walk.part.0.constprop.0+614 path_lookupat+62 filename_lookup+204 vfs_statx+128 vfs_fstatat+131 __do_sys_newfstatat+38 do_syscall_64+87 entry_SYSCALL_64_after_hwframe+118 , 1]: 20000 @[ __ext4_iget+275 ext4_lookup+224 __lookup_slow+130 walk_component+219 path_lookupat+106 filename_lookup+204 vfs_statx+128 vfs_fstatat+131 __do_sys_newfstatat+38 do_syscall_64+87 entry_SYSCALL_64_after_hwframe+118 , 1]: 20000000 That is 20 million calls for the initial lookup and 20 million after allocating a new inode, all of them failing to return a value != 0 (i.e., they are returning NULL -- no match found). Of course aborting the benchmark in the middle and starting it again (or messing with the state in other ways) is going to alter these results. Benchmark can be found here: https://people.freebsd.org/~mjg/fstree.tgz [1] https://lore.kernel.org/all/20231206060629.2827226-1-david@fromorbit.com/ Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/r/20240611173824.535995-2-mjguzik@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0283cf366c2a..aac51da4f90c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3029,7 +3029,12 @@ extern struct inode *inode_insert5(struct inode *inode, unsigned long hashval, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data); -extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); +struct inode *iget5_locked(struct super_block *, unsigned long, + int (*test)(struct inode *, void *), + int (*set)(struct inode *, void *), void *); +struct inode *iget5_locked_rcu(struct super_block *, unsigned long, + int (*test)(struct inode *, void *), + int (*set)(struct inode *, void *), void *); extern struct inode * iget_locked(struct super_block *, unsigned long); extern struct inode *find_inode_nowait(struct super_block *, unsigned long, -- cgit v1.2.3 From 66055636a146c435cd226fb5a334176304652f3c Mon Sep 17 00:00:00 2001 From: Tejas Vipin Date: Wed, 12 Jun 2024 19:05:43 +0530 Subject: drm/mipi-dsi: fix handling of ctx in mipi_dsi_msleep ctx would be better off treated as a pointer to account for most of its usage so far, and brackets should be added to account for operator precedence for correct evaluation. Fixes: f79d6d28d8fe ("drm/mipi-dsi: wrap more functions for streamline handling") Signed-off-by: Tejas Vipin Suggested-by: Douglas Anderson Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/20240612133550.473279-3-tejasvipin76@gmail.com [narmstrong: fixed fixes tag] Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240612133550.473279-3-tejasvipin76@gmail.com --- include/drm/drm_mipi_dsi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index bd5a0b6d0711..71d121aeef24 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -293,7 +293,7 @@ ssize_t mipi_dsi_generic_read(struct mipi_dsi_device *dsi, const void *params, #define mipi_dsi_msleep(ctx, delay) \ do { \ - if (!ctx.accum_err) \ + if (!(ctx)->accum_err) \ msleep(delay); \ } while (0) -- cgit v1.2.3 From 3b9c181bcde8555ca81b2394c2dc2201cefc2dd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Tue, 11 Jun 2024 10:47:15 -0700 Subject: devcoredump: Add dev_coredumpm_timeout() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add function to set a custom coredump timeout. For Xe driver usage, current 5 minutes timeout may be too short for users to search and understand what needs to be done to capture coredump to report bugs. We have plans to automate(distribute a udev script) it but at the end will be up to distros and users to pack it so having a option to increase the timeout is a safer option. v2: - replace dev_coredump_timeout_set() by dev_coredumpm_timeout() (Mukesh) v3: - make dev_coredumpm() static inline (Johannes) v5: - rename DEVCOREDUMP_TIMEOUT -> DEVCD_TIMEOUT to avoid redefinition in include/net/bluetooth/coredump.h v6: - fix definition of dev_coredumpm_timeout() when CONFIG_DEV_COREDUMP is disabled Cc: Rodrigo Vivi Cc: Mukesh Ojha Cc: Johannes Berg Cc: Jonathan Cavitt Reviewed-by: Rodrigo Vivi Reviewed-by: Jonathan Cavitt Signed-off-by: José Roberto de Souza Acked-by: Greg Kroah-Hartman Acked-by: Johannes Berg Link: https://patchwork.freedesktop.org/patch/msgid/20240611174716.72660-1-jose.souza@intel.com Signed-off-by: Rodrigo Vivi --- include/linux/devcoredump.h | 53 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/devcoredump.h b/include/linux/devcoredump.h index c8f7eb6cc191..377892604ff4 100644 --- a/include/linux/devcoredump.h +++ b/include/linux/devcoredump.h @@ -12,6 +12,9 @@ #include #include +/* if data isn't read by userspace after 5 minutes then delete it */ +#define DEVCD_TIMEOUT (HZ * 60 * 5) + /* * _devcd_free_sgtable - free all the memory of the given scatterlist table * (i.e. both pages and scatterlist instances) @@ -50,16 +53,17 @@ static inline void _devcd_free_sgtable(struct scatterlist *table) kfree(delete_iter); } - #ifdef CONFIG_DEV_COREDUMP void dev_coredumpv(struct device *dev, void *data, size_t datalen, gfp_t gfp); -void dev_coredumpm(struct device *dev, struct module *owner, - void *data, size_t datalen, gfp_t gfp, - ssize_t (*read)(char *buffer, loff_t offset, size_t count, - void *data, size_t datalen), - void (*free)(void *data)); +void dev_coredumpm_timeout(struct device *dev, struct module *owner, + void *data, size_t datalen, gfp_t gfp, + ssize_t (*read)(char *buffer, loff_t offset, + size_t count, void *data, + size_t datalen), + void (*free)(void *data), + unsigned long timeout); void dev_coredumpsg(struct device *dev, struct scatterlist *table, size_t datalen, gfp_t gfp); @@ -73,11 +77,13 @@ static inline void dev_coredumpv(struct device *dev, void *data, } static inline void -dev_coredumpm(struct device *dev, struct module *owner, - void *data, size_t datalen, gfp_t gfp, - ssize_t (*read)(char *buffer, loff_t offset, size_t count, - void *data, size_t datalen), - void (*free)(void *data)) +dev_coredumpm_timeout(struct device *dev, struct module *owner, + void *data, size_t datalen, gfp_t gfp, + ssize_t (*read)(char *buffer, loff_t offset, + size_t count, void *data, + size_t datalen), + void (*free)(void *data), + unsigned long timeout) { free(data); } @@ -92,4 +98,29 @@ static inline void dev_coredump_put(struct device *dev) } #endif /* CONFIG_DEV_COREDUMP */ +/** + * dev_coredumpm - create device coredump with read/free methods + * @dev: the struct device for the crashed device + * @owner: the module that contains the read/free functions, use %THIS_MODULE + * @data: data cookie for the @read/@free functions + * @datalen: length of the data + * @gfp: allocation flags + * @read: function to read from the given buffer + * @free: function to free the given buffer + * + * Creates a new device coredump for the given device. If a previous one hasn't + * been read yet, the new coredump is discarded. The data lifetime is determined + * by the device coredump framework and when it is no longer needed the @free + * function will be called to free the data. + */ +static inline void dev_coredumpm(struct device *dev, struct module *owner, + void *data, size_t datalen, gfp_t gfp, + ssize_t (*read)(char *buffer, loff_t offset, size_t count, + void *data, size_t datalen), + void (*free)(void *data)) +{ + dev_coredumpm_timeout(dev, owner, data, datalen, gfp, read, free, + DEVCD_TIMEOUT); +} + #endif /* __DEVCOREDUMP_H */ -- cgit v1.2.3 From e54700f7d6aa2ae0d0a0aeeebedcecd7ce1123fe Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 3 Jun 2024 20:24:30 +0530 Subject: drm/xe/bmg: Add PCI IDs Add the initial set of device IDs for Battlemage. Signed-off-by: Matt Roper Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/20240603145430.1260817-1-balasubramani.vivekanandan@intel.com --- include/drm/xe_pciids.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h index adb37bc541e4..644872a35c35 100644 --- a/include/drm/xe_pciids.h +++ b/include/drm/xe_pciids.h @@ -192,4 +192,11 @@ MACRO__(0x64A0, ## __VA_ARGS__), \ MACRO__(0x64B0, ## __VA_ARGS__) +#define XE_BMG_IDS(MACRO__, ...) \ + MACRO__(0xE202, ## __VA_ARGS__), \ + MACRO__(0xE20B, ## __VA_ARGS__), \ + MACRO__(0xE20C, ## __VA_ARGS__), \ + MACRO__(0xE20D, ## __VA_ARGS__), \ + MACRO__(0xE212, ## __VA_ARGS__) + #endif -- cgit v1.2.3 From e038ee6189842e9662d2fc59d09dbcf48350cf99 Mon Sep 17 00:00:00 2001 From: Anuj Gupta Date: Mon, 10 Jun 2024 16:41:44 +0530 Subject: block: unmap and free user mapped integrity via submitter The user mapped intergity is copied back and unpinned by bio_integrity_free which is a low-level routine. Do it via the submitter rather than doing it in the low-level block layer code, to split the submitter side from the consumer side of the bio. Signed-off-by: Anuj Gupta Signed-off-by: Kanchan Joshi Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20240610111144.14647-1-anuj20.g@samsung.com Signed-off-by: Jens Axboe --- include/linux/bio.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index d5379548d684..818e93612947 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -731,6 +731,7 @@ static inline bool bioset_initialized(struct bio_set *bs) bip_for_each_vec(_bvl, _bio->bi_integrity, _iter) int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed); +void bio_integrity_unmap_free_user(struct bio *bio); extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); extern bool bio_integrity_prep(struct bio *); @@ -807,6 +808,9 @@ static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf, { return -EINVAL; } +static inline void bio_integrity_unmap_free_user(struct bio *bio) +{ +} #endif /* CONFIG_BLK_DEV_INTEGRITY */ -- cgit v1.2.3 From ec209ad86324de84ef66990f0e9df0851e45e054 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Wed, 12 Jun 2024 09:58:32 -0600 Subject: bpf: verifier: Relax caller requirements for kfunc projection type args Currently, if a kfunc accepts a projection type as an argument (eg struct __sk_buff *), the caller must exactly provide exactly the same type with provable provenance. However in practice, kfuncs that accept projection types _must_ cast to the underlying type before use b/c projection type layouts are completely made up. Thus, it is ok to relax the verifier rules around implicit conversions. We will use this functionality in the next commit when we align kfuncs to user-facing types. Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/e2c025cb09ccfd4af1ec9e18284dc3cecff7514d.1718207789.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index f9e56fd12a9f..56d91daacdba 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -531,6 +531,7 @@ s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id); int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt, struct module *owner); struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id); +bool btf_is_projection_of(const char *pname, const char *tname); bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, enum bpf_prog_type prog_type, int arg); -- cgit v1.2.3 From cce4c40b960673f9e020835def310f1e89d3a940 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Wed, 12 Jun 2024 09:58:33 -0600 Subject: bpf: treewide: Align kfunc signatures to prog point-of-view Previously, kfunc declarations in bpf_kfuncs.h (and others) used "user facing" types for kfuncs prototypes while the actual kfunc definitions used "kernel facing" types. More specifically: bpf_dynptr vs bpf_dynptr_kern, __sk_buff vs sk_buff, and xdp_md vs xdp_buff. It wasn't an issue before, as the verifier allows aliased types. However, since we are now generating kfunc prototypes in vmlinux.h (in addition to keeping bpf_kfuncs.h around), this conflict creates compilation errors. Fix this conflict by using "user facing" types in kfunc definitions. This results in more casts, but otherwise has no additional runtime cost. Note, similar to 5b268d1ebcdc ("bpf: Have bpf_rdonly_cast() take a const pointer"), we also make kfuncs take const arguments where appropriate in order to make the kfunc more permissive. Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/b58346a63a0e66bc9b7504da751b526b0b189a67.1718207789.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a834f4b761bc..f636b4998bf7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3265,8 +3265,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, struct bpf_insn *insn_buf, struct bpf_prog *prog, u32 *target_size); -int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, - struct bpf_dynptr_kern *ptr); +int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, + struct bpf_dynptr *ptr); #else static inline bool bpf_sock_common_is_valid_access(int off, int size, enum bpf_access_type type, @@ -3288,8 +3288,8 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, { return 0; } -static inline int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, - struct bpf_dynptr_kern *ptr) +static inline int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, + struct bpf_dynptr *ptr) { return -EOPNOTSUPP; } -- cgit v1.2.3 From 7fc45cb68696c7213c484ec81892bc8a986fde52 Mon Sep 17 00:00:00 2001 From: Shradha Gupta Date: Mon, 10 Jun 2024 03:28:39 -0700 Subject: net: mana: Allow variable size indirection table Allow variable size indirection table allocation in MANA instead of using a constant value MANA_INDIRECT_TABLE_SIZE. The size is now derived from the MANA_QUERY_VPORT_CONFIG and the indirection table is allocated dynamically. Signed-off-by: Shradha Gupta Link: https://lore.kernel.org/r/1718015319-9609-1-git-send-email-shradhagupta@linux.microsoft.com Reviewed-by: Dexuan Cui Reviewed-by: Haiyang Zhang Signed-off-by: Leon Romanovsky --- include/net/mana/gdma.h | 4 +++- include/net/mana/mana.h | 9 +++++---- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 27684135bb4d..c547756c4284 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -543,11 +543,13 @@ enum { */ #define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2) #define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG BIT(3) +#define GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT BIT(5) #define GDMA_DRV_CAP_FLAGS1 \ (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \ GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \ - GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG) + GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \ + GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT) #define GDMA_DRV_CAP_FLAGS2 0 diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 561f6719fb4e..59823901b74f 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -30,8 +30,8 @@ enum TRI_STATE { }; /* Number of entries for hardware indirection table must be in power of 2 */ -#define MANA_INDIRECT_TABLE_SIZE 64 -#define MANA_INDIRECT_TABLE_MASK (MANA_INDIRECT_TABLE_SIZE - 1) +#define MANA_INDIRECT_TABLE_MAX_SIZE 512 +#define MANA_INDIRECT_TABLE_DEF_SIZE 64 /* The Toeplitz hash key's length in bytes: should be multiple of 8 */ #define MANA_HASH_KEY_SIZE 40 @@ -410,10 +410,11 @@ struct mana_port_context { struct mana_tx_qp *tx_qp; /* Indirection Table for RX & TX. The values are queue indexes */ - u32 indir_table[MANA_INDIRECT_TABLE_SIZE]; + u32 *indir_table; + u32 indir_table_sz; /* Indirection table containing RxObject Handles */ - mana_handle_t rxobj_table[MANA_INDIRECT_TABLE_SIZE]; + mana_handle_t *rxobj_table; /* Hash key used by the NIC */ u8 hashkey[MANA_HASH_KEY_SIZE]; -- cgit v1.2.3 From b975d3ee5962237c1e2f5d5aeeaaf0dc2173486c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 9 Jun 2024 00:10:39 +0200 Subject: net: add and use skb_get_hash_net Years ago flow dissector gained ability to delegate flow dissection to a bpf program, scoped per netns. Unfortunately, skb_get_hash() only gets an sk_buff argument instead of both net+skb. This means the flow dissector needs to obtain the netns pointer from somewhere else. The netns is derived from skb->dev, and if that is not available, from skb->sk. If neither is set, we hit a (benign) WARN_ON_ONCE(). Trying both dev and sk covers most cases, but not all, as recently reported by Christoph Paasch. In case of nf-generated tcp reset, both sk and dev are NULL: WARNING: .. net/core/flow_dissector.c:1104 skb_flow_dissect_flow_keys include/linux/skbuff.h:1536 [inline] skb_get_hash include/linux/skbuff.h:1578 [inline] nft_trace_init+0x7d/0x120 net/netfilter/nf_tables_trace.c:320 nft_do_chain+0xb26/0xb90 net/netfilter/nf_tables_core.c:268 nft_do_chain_ipv4+0x7a/0xa0 net/netfilter/nft_chain_filter.c:23 nf_hook_slow+0x57/0x160 net/netfilter/core.c:626 __ip_local_out+0x21d/0x260 net/ipv4/ip_output.c:118 ip_local_out+0x26/0x1e0 net/ipv4/ip_output.c:127 nf_send_reset+0x58c/0x700 net/ipv4/netfilter/nf_reject_ipv4.c:308 nft_reject_ipv4_eval+0x53/0x90 net/ipv4/netfilter/nft_reject_ipv4.c:30 [..] syzkaller did something like this: table inet filter { chain input { type filter hook input priority filter; policy accept; meta nftrace set 1 tcp dport 42 reject with tcp reset } chain output { type filter hook output priority filter; policy accept; # empty chain is enough } } ... then sends a tcp packet to port 42. Initial attempt to simply set skb->dev from nf_reject_ipv4 doesn't cover all cases: skbs generated via ipv4 igmp_send_report trigger similar splat. Moreover, Pablo Neira found that nft_hash.c uses __skb_get_hash_symmetric() which would trigger same warn splat for such skbs. Lets allow callers to pass the current netns explicitly. The nf_trace infrastructure is adjusted to use the new helper. __skb_get_hash_symmetric is handled in the next patch. Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/494 Reviewed-by: Willem de Bruijn Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240608221057.16070-2-fw@strlen.de Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fe7d8dbef77e..6e78019f899a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1498,7 +1498,7 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4) __skb_set_hash(skb, hash, true, is_l4); } -void __skb_get_hash(struct sk_buff *skb); +void __skb_get_hash_net(const struct net *net, struct sk_buff *skb); u32 __skb_get_hash_symmetric(const struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, const void *data, @@ -1578,10 +1578,18 @@ void skb_flow_dissect_hash(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container); +static inline __u32 skb_get_hash_net(const struct net *net, struct sk_buff *skb) +{ + if (!skb->l4_hash && !skb->sw_hash) + __skb_get_hash_net(net, skb); + + return skb->hash; +} + static inline __u32 skb_get_hash(struct sk_buff *skb) { if (!skb->l4_hash && !skb->sw_hash) - __skb_get_hash(skb); + __skb_get_hash_net(NULL, skb); return skb->hash; } -- cgit v1.2.3 From d1dab4f71d372e00e2d34a9c32bf261623e3a95c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 9 Jun 2024 00:10:40 +0200 Subject: net: add and use __skb_get_hash_symmetric_net Similar to previous patch: apply same logic for __skb_get_hash_symmetric and let callers pass the netns to the dissector core. Existing function is turned into a wrapper to avoid adjusting all callers, nft_hash.c uses new function. Reviewed-by: Willem de Bruijn Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240608221057.16070-3-fw@strlen.de Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6e78019f899a..813406a9bd6c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1498,8 +1498,14 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4) __skb_set_hash(skb, hash, true, is_l4); } +u32 __skb_get_hash_symmetric_net(const struct net *net, const struct sk_buff *skb); + +static inline u32 __skb_get_hash_symmetric(const struct sk_buff *skb) +{ + return __skb_get_hash_symmetric_net(NULL, skb); +} + void __skb_get_hash_net(const struct net *net, struct sk_buff *skb); -u32 __skb_get_hash_symmetric(const struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, const void *data, const struct flow_keys_basic *keys, int hlen); -- cgit v1.2.3 From 3e453ca122d483eb519f934b6624215f0536301c Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 7 Jun 2024 17:13:53 +0200 Subject: net: ipv4,ipv6: Pass multipath hash computation through a helper The following patches will add a sysctl to control multipath hash seed. In order to centralize the hash computation, add a helper, fib_multipath_hash_from_keys(), and have all IPv4 and IPv6 route.c invocations of flow_hash_from_keys() go through this helper instead. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240607151357.421181-2-petrm@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/ip_fib.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 9b2f69ba5e49..b8b3c07e8f7b 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -521,6 +521,13 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig); int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys); #endif + +static inline u32 fib_multipath_hash_from_keys(const struct net *net, + struct flow_keys *keys) +{ + return flow_hash_from_keys(keys); +} + int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, struct netlink_ext_ack *extack); void fib_select_multipath(struct fib_result *res, int hash); -- cgit v1.2.3 From 4ee2a8cace3fb9a34aea6a56426f89d26dd514f3 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 7 Jun 2024 17:13:54 +0200 Subject: net: ipv4: Add a sysctl to set multipath hash seed When calculating hashes for the purpose of multipath forwarding, both IPv4 and IPv6 code currently fall back on flow_hash_from_keys(). That uses a randomly-generated seed. That's a fine choice by default, but unfortunately some deployments may need a tighter control over the seed used. In this patch, make the seed configurable by adding a new sysctl key, net.ipv4.fib_multipath_hash_seed to control the seed. This seed is used specifically for multipath forwarding and not for the other concerns that flow_hash_from_keys() is used for, such as queue selection. Expose the knob as sysctl because other such settings, such as headers to hash, are also handled that way. Like those, the multipath hash seed is a per-netns variable. Despite being placed in the net.ipv4 namespace, the multipath seed sysctl is used for both IPv4 and IPv6, similarly to e.g. a number of TCP variables. The seed used by flow_hash_from_keys() is a 128-bit quantity. However it seems that usually the seed is a much more modest value. 32 bits seem typical (Cisco, Cumulus), some systems go even lower. For that reason, and to decouple the user interface from implementation details, go with a 32-bit quantity, which is then quadruplicated to form the siphash key. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240607151357.421181-3-petrm@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/flow_dissector.h | 2 ++ include/net/ip_fib.h | 23 ++++++++++++++++++++++- include/net/netns/ipv4.h | 8 ++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 99626475c3f4..3e47e123934d 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -442,6 +442,8 @@ static inline bool flow_keys_have_l4(const struct flow_keys *keys) } u32 flow_hash_from_keys(struct flow_keys *keys); +u32 flow_hash_from_keys_seed(struct flow_keys *keys, + const siphash_key_t *keyval); void skb_flow_get_icmp_tci(const struct sk_buff *skb, struct flow_dissector_key_icmp *key_icmp, const void *data, int thoff, int hlen); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index b8b3c07e8f7b..6e7984bfb986 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -520,13 +520,34 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig); #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys); -#endif + +static void +fib_multipath_hash_construct_key(siphash_key_t *key, u32 mp_seed) +{ + u64 mp_seed_64 = mp_seed; + + key->key[0] = (mp_seed_64 << 32) | mp_seed_64; + key->key[1] = key->key[0]; +} +static inline u32 fib_multipath_hash_from_keys(const struct net *net, + struct flow_keys *keys) +{ + siphash_aligned_key_t hash_key; + u32 mp_seed; + + mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed; + fib_multipath_hash_construct_key(&hash_key, mp_seed); + + return flow_hash_from_keys_seed(keys, &hash_key); +} +#else static inline u32 fib_multipath_hash_from_keys(const struct net *net, struct flow_keys *keys) { return flow_hash_from_keys(keys); } +#endif int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, struct netlink_ext_ack *extack); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index a91bb971f901..5fcd61ada622 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -40,6 +40,13 @@ struct inet_timewait_death_row { struct tcp_fastopen_context; +#ifdef CONFIG_IP_ROUTE_MULTIPATH +struct sysctl_fib_multipath_hash_seed { + u32 user_seed; + u32 mp_seed; +}; +#endif + struct netns_ipv4 { /* Cacheline organization can be found documented in * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst. @@ -246,6 +253,7 @@ struct netns_ipv4 { #endif #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH + struct sysctl_fib_multipath_hash_seed sysctl_fib_multipath_hash_seed; u32 sysctl_fib_multipath_hash_fields; u8 sysctl_fib_multipath_use_neigh; u8 sysctl_fib_multipath_hash_policy; -- cgit v1.2.3 From b48a1540b73a3c3a9ef59ce34176cc8180c6ce57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Sun, 9 Jun 2024 17:33:51 +0000 Subject: flow_offload: add encapsulation control flag helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds two new helper functions: flow_rule_is_supp_enc_control_flags() flow_rule_has_enc_control_flags() They are intended to be used for validating encapsulation control flags, and compliment the similar helpers without "enc_" in the name. The only difference is that they have their own error message, to make it obvious if an unsupported flag error is related to FLOW_DISSECTOR_KEY_CONTROL or FLOW_DISSECTOR_KEY_ENC_CONTROL. flow_rule_has_enc_control_flags() is for drivers supporting FLOW_DISSECTOR_KEY_ENC_CONTROL, but not supporting any encapsulation control flags. (Currently all 4 drivers fits this category) flow_rule_is_supp_enc_control_flags() is currently only used for the above helper, but should also be used by drivers once they implement at least one encapsulation control flag. There is AFAICT currently no need for an "enc_" variant of flow_rule_match_has_control_flags(), as all drivers currently supporting FLOW_DISSECTOR_KEY_ENC_CONTROL, are already calling flow_rule_match_enc_control() directly. Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Davide Caratti Link: https://lore.kernel.org/r/20240609173358.193178-2-ast@fiberby.net Signed-off-by: Jakub Kicinski --- include/net/flow_offload.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index ec9f80509f60..292cd8f4b762 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -471,6 +471,28 @@ static inline bool flow_rule_is_supp_control_flags(const u32 supp_flags, return false; } +/** + * flow_rule_is_supp_enc_control_flags() - check for supported control flags + * @supp_enc_flags: encapsulation control flags supported by driver + * @enc_ctrl_flags: encapsulation control flags present in rule + * @extack: The netlink extended ACK for reporting errors. + * + * Return: true if only supported control flags are set, false otherwise. + */ +static inline bool flow_rule_is_supp_enc_control_flags(const u32 supp_enc_flags, + const u32 enc_ctrl_flags, + struct netlink_ext_ack *extack) +{ + if (likely((enc_ctrl_flags & ~supp_enc_flags) == 0)) + return true; + + NL_SET_ERR_MSG_FMT_MOD(extack, + "Unsupported match on enc_control.flags %#x", + enc_ctrl_flags); + + return false; +} + /** * flow_rule_has_control_flags() - check for presence of any control flags * @ctrl_flags: control flags present in rule @@ -484,6 +506,19 @@ static inline bool flow_rule_has_control_flags(const u32 ctrl_flags, return !flow_rule_is_supp_control_flags(0, ctrl_flags, extack); } +/** + * flow_rule_has_enc_control_flags() - check for presence of any control flags + * @enc_ctrl_flags: encapsulation control flags present in rule + * @extack: The netlink extended ACK for reporting errors. + * + * Return: true if control flags are set, false otherwise. + */ +static inline bool flow_rule_has_enc_control_flags(const u32 enc_ctrl_flags, + struct netlink_ext_ack *extack) +{ + return !flow_rule_is_supp_enc_control_flags(0, enc_ctrl_flags, extack); +} + /** * flow_rule_match_has_control_flags() - match and check for any control flags * @rule: The flow_rule under evaluation. -- cgit v1.2.3 From 80bbd1c355d661678d2a25bd36e739b6925e7a4e Mon Sep 17 00:00:00 2001 From: Luo Jie Date: Wed, 5 Jun 2024 20:45:39 +0800 Subject: dt-bindings: clock: add qca8386/qca8084 clock and reset definitions QCA8386/QCA8084 includes the clock & reset controller that is accessed by MDIO bus. Two work modes are supported, qca8386 works as switch mode, qca8084 works as PHY mode. Reviewed-by: Rob Herring Signed-off-by: Luo Jie Link: https://lore.kernel.org/r/20240605124541.2711467-3-quic_luoj@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,qca8k-nsscc.h | 101 +++++++++++++++++++++++++++ include/dt-bindings/reset/qcom,qca8k-nsscc.h | 76 ++++++++++++++++++++ 2 files changed, 177 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,qca8k-nsscc.h create mode 100644 include/dt-bindings/reset/qcom,qca8k-nsscc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,qca8k-nsscc.h b/include/dt-bindings/clock/qcom,qca8k-nsscc.h new file mode 100644 index 000000000000..0ac3e4c69a1a --- /dev/null +++ b/include/dt-bindings/clock/qcom,qca8k-nsscc.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_QCA8K_NSS_CC_H +#define _DT_BINDINGS_CLK_QCOM_QCA8K_NSS_CC_H + +#define NSS_CC_SWITCH_CORE_CLK_SRC 0 +#define NSS_CC_SWITCH_CORE_CLK 1 +#define NSS_CC_APB_BRIDGE_CLK 2 +#define NSS_CC_MAC0_TX_CLK_SRC 3 +#define NSS_CC_MAC0_TX_DIV_CLK_SRC 4 +#define NSS_CC_MAC0_TX_CLK 5 +#define NSS_CC_MAC0_TX_SRDS1_CLK 6 +#define NSS_CC_MAC0_RX_CLK_SRC 7 +#define NSS_CC_MAC0_RX_DIV_CLK_SRC 8 +#define NSS_CC_MAC0_RX_CLK 9 +#define NSS_CC_MAC0_RX_SRDS1_CLK 10 +#define NSS_CC_MAC1_TX_CLK_SRC 11 +#define NSS_CC_MAC1_TX_DIV_CLK_SRC 12 +#define NSS_CC_MAC1_SRDS1_CH0_XGMII_RX_DIV_CLK_SRC 13 +#define NSS_CC_MAC1_SRDS1_CH0_RX_CLK 14 +#define NSS_CC_MAC1_TX_CLK 15 +#define NSS_CC_MAC1_GEPHY0_TX_CLK 16 +#define NSS_CC_MAC1_SRDS1_CH0_XGMII_RX_CLK 17 +#define NSS_CC_MAC1_RX_CLK_SRC 18 +#define NSS_CC_MAC1_RX_DIV_CLK_SRC 19 +#define NSS_CC_MAC1_SRDS1_CH0_XGMII_TX_DIV_CLK_SRC 20 +#define NSS_CC_MAC1_SRDS1_CH0_TX_CLK 21 +#define NSS_CC_MAC1_RX_CLK 22 +#define NSS_CC_MAC1_GEPHY0_RX_CLK 23 +#define NSS_CC_MAC1_SRDS1_CH0_XGMII_TX_CLK 24 +#define NSS_CC_MAC2_TX_CLK_SRC 25 +#define NSS_CC_MAC2_TX_DIV_CLK_SRC 26 +#define NSS_CC_MAC2_SRDS1_CH1_XGMII_RX_DIV_CLK_SRC 27 +#define NSS_CC_MAC2_SRDS1_CH1_RX_CLK 28 +#define NSS_CC_MAC2_TX_CLK 29 +#define NSS_CC_MAC2_GEPHY1_TX_CLK 30 +#define NSS_CC_MAC2_SRDS1_CH1_XGMII_RX_CLK 31 +#define NSS_CC_MAC2_RX_CLK_SRC 32 +#define NSS_CC_MAC2_RX_DIV_CLK_SRC 33 +#define NSS_CC_MAC2_SRDS1_CH1_XGMII_TX_DIV_CLK_SRC 34 +#define NSS_CC_MAC2_SRDS1_CH1_TX_CLK 35 +#define NSS_CC_MAC2_RX_CLK 36 +#define NSS_CC_MAC2_GEPHY1_RX_CLK 37 +#define NSS_CC_MAC2_SRDS1_CH1_XGMII_TX_CLK 38 +#define NSS_CC_MAC3_TX_CLK_SRC 39 +#define NSS_CC_MAC3_TX_DIV_CLK_SRC 40 +#define NSS_CC_MAC3_SRDS1_CH2_XGMII_RX_DIV_CLK_SRC 41 +#define NSS_CC_MAC3_SRDS1_CH2_RX_CLK 42 +#define NSS_CC_MAC3_TX_CLK 43 +#define NSS_CC_MAC3_GEPHY2_TX_CLK 44 +#define NSS_CC_MAC3_SRDS1_CH2_XGMII_RX_CLK 45 +#define NSS_CC_MAC3_RX_CLK_SRC 46 +#define NSS_CC_MAC3_RX_DIV_CLK_SRC 47 +#define NSS_CC_MAC3_SRDS1_CH2_XGMII_TX_DIV_CLK_SRC 48 +#define NSS_CC_MAC3_SRDS1_CH2_TX_CLK 49 +#define NSS_CC_MAC3_RX_CLK 50 +#define NSS_CC_MAC3_GEPHY2_RX_CLK 51 +#define NSS_CC_MAC3_SRDS1_CH2_XGMII_TX_CLK 52 +#define NSS_CC_MAC4_TX_CLK_SRC 53 +#define NSS_CC_MAC4_TX_DIV_CLK_SRC 54 +#define NSS_CC_MAC4_SRDS1_CH3_XGMII_RX_DIV_CLK_SRC 55 +#define NSS_CC_MAC4_SRDS1_CH3_RX_CLK 56 +#define NSS_CC_MAC4_TX_CLK 57 +#define NSS_CC_MAC4_GEPHY3_TX_CLK 58 +#define NSS_CC_MAC4_SRDS1_CH3_XGMII_RX_CLK 59 +#define NSS_CC_MAC4_RX_CLK_SRC 60 +#define NSS_CC_MAC4_RX_DIV_CLK_SRC 61 +#define NSS_CC_MAC4_SRDS1_CH3_XGMII_TX_DIV_CLK_SRC 62 +#define NSS_CC_MAC4_SRDS1_CH3_TX_CLK 63 +#define NSS_CC_MAC4_RX_CLK 64 +#define NSS_CC_MAC4_GEPHY3_RX_CLK 65 +#define NSS_CC_MAC4_SRDS1_CH3_XGMII_TX_CLK 66 +#define NSS_CC_MAC5_TX_CLK_SRC 67 +#define NSS_CC_MAC5_TX_DIV_CLK_SRC 68 +#define NSS_CC_MAC5_TX_SRDS0_CLK 69 +#define NSS_CC_MAC5_TX_CLK 70 +#define NSS_CC_MAC5_RX_CLK_SRC 71 +#define NSS_CC_MAC5_RX_DIV_CLK_SRC 72 +#define NSS_CC_MAC5_RX_SRDS0_CLK 73 +#define NSS_CC_MAC5_RX_CLK 74 +#define NSS_CC_MAC5_TX_SRDS0_CLK_SRC 75 +#define NSS_CC_MAC5_RX_SRDS0_CLK_SRC 76 +#define NSS_CC_AHB_CLK_SRC 77 +#define NSS_CC_AHB_CLK 78 +#define NSS_CC_SEC_CTRL_AHB_CLK 79 +#define NSS_CC_TLMM_CLK 80 +#define NSS_CC_TLMM_AHB_CLK 81 +#define NSS_CC_CNOC_AHB_CLK 82 +#define NSS_CC_MDIO_AHB_CLK 83 +#define NSS_CC_MDIO_MASTER_AHB_CLK 84 +#define NSS_CC_SYS_CLK_SRC 85 +#define NSS_CC_SRDS0_SYS_CLK 86 +#define NSS_CC_SRDS1_SYS_CLK 87 +#define NSS_CC_GEPHY0_SYS_CLK 88 +#define NSS_CC_GEPHY1_SYS_CLK 89 +#define NSS_CC_GEPHY2_SYS_CLK 90 +#define NSS_CC_GEPHY3_SYS_CLK 91 +#endif diff --git a/include/dt-bindings/reset/qcom,qca8k-nsscc.h b/include/dt-bindings/reset/qcom,qca8k-nsscc.h new file mode 100644 index 000000000000..c71167a3bd41 --- /dev/null +++ b/include/dt-bindings/reset/qcom,qca8k-nsscc.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_RESET_QCOM_QCA8K_NSS_CC_H +#define _DT_BINDINGS_RESET_QCOM_QCA8K_NSS_CC_H + +#define NSS_CC_SWITCH_CORE_ARES 1 +#define NSS_CC_APB_BRIDGE_ARES 2 +#define NSS_CC_MAC0_TX_ARES 3 +#define NSS_CC_MAC0_TX_SRDS1_ARES 4 +#define NSS_CC_MAC0_RX_ARES 5 +#define NSS_CC_MAC0_RX_SRDS1_ARES 6 +#define NSS_CC_MAC1_SRDS1_CH0_RX_ARES 7 +#define NSS_CC_MAC1_TX_ARES 8 +#define NSS_CC_MAC1_GEPHY0_TX_ARES 9 +#define NSS_CC_MAC1_SRDS1_CH0_XGMII_RX_ARES 10 +#define NSS_CC_MAC1_SRDS1_CH0_TX_ARES 11 +#define NSS_CC_MAC1_RX_ARES 12 +#define NSS_CC_MAC1_GEPHY0_RX_ARES 13 +#define NSS_CC_MAC1_SRDS1_CH0_XGMII_TX_ARES 14 +#define NSS_CC_MAC2_SRDS1_CH1_RX_ARES 15 +#define NSS_CC_MAC2_TX_ARES 16 +#define NSS_CC_MAC2_GEPHY1_TX_ARES 17 +#define NSS_CC_MAC2_SRDS1_CH1_XGMII_RX_ARES 18 +#define NSS_CC_MAC2_SRDS1_CH1_TX_ARES 19 +#define NSS_CC_MAC2_RX_ARES 20 +#define NSS_CC_MAC2_GEPHY1_RX_ARES 21 +#define NSS_CC_MAC2_SRDS1_CH1_XGMII_TX_ARES 22 +#define NSS_CC_MAC3_SRDS1_CH2_RX_ARES 23 +#define NSS_CC_MAC3_TX_ARES 24 +#define NSS_CC_MAC3_GEPHY2_TX_ARES 25 +#define NSS_CC_MAC3_SRDS1_CH2_XGMII_RX_ARES 26 +#define NSS_CC_MAC3_SRDS1_CH2_TX_ARES 27 +#define NSS_CC_MAC3_RX_ARES 28 +#define NSS_CC_MAC3_GEPHY2_RX_ARES 29 +#define NSS_CC_MAC3_SRDS1_CH2_XGMII_TX_ARES 30 +#define NSS_CC_MAC4_SRDS1_CH3_RX_ARES 31 +#define NSS_CC_MAC4_TX_ARES 32 +#define NSS_CC_MAC4_GEPHY3_TX_ARES 33 +#define NSS_CC_MAC4_SRDS1_CH3_XGMII_RX_ARES 34 +#define NSS_CC_MAC4_SRDS1_CH3_TX_ARES 35 +#define NSS_CC_MAC4_RX_ARES 36 +#define NSS_CC_MAC4_GEPHY3_RX_ARES 37 +#define NSS_CC_MAC4_SRDS1_CH3_XGMII_TX_ARES 38 +#define NSS_CC_MAC5_TX_ARES 39 +#define NSS_CC_MAC5_TX_SRDS0_ARES 40 +#define NSS_CC_MAC5_RX_ARES 41 +#define NSS_CC_MAC5_RX_SRDS0_ARES 42 +#define NSS_CC_AHB_ARES 43 +#define NSS_CC_SEC_CTRL_AHB_ARES 44 +#define NSS_CC_TLMM_ARES 45 +#define NSS_CC_TLMM_AHB_ARES 46 +#define NSS_CC_CNOC_AHB_ARES 47 +#define NSS_CC_MDIO_AHB_ARES 48 +#define NSS_CC_MDIO_MASTER_AHB_ARES 49 +#define NSS_CC_SRDS0_SYS_ARES 50 +#define NSS_CC_SRDS1_SYS_ARES 51 +#define NSS_CC_GEPHY0_SYS_ARES 52 +#define NSS_CC_GEPHY1_SYS_ARES 53 +#define NSS_CC_GEPHY2_SYS_ARES 54 +#define NSS_CC_GEPHY3_SYS_ARES 55 +#define NSS_CC_SEC_CTRL_ARES 56 +#define NSS_CC_SEC_CTRL_SENSE_ARES 57 +#define NSS_CC_SLEEP_ARES 58 +#define NSS_CC_DEBUG_ARES 59 +#define NSS_CC_GEPHY0_ARES 60 +#define NSS_CC_GEPHY1_ARES 61 +#define NSS_CC_GEPHY2_ARES 62 +#define NSS_CC_GEPHY3_ARES 63 +#define NSS_CC_DSP_ARES 64 +#define NSS_CC_GEPHY_FULL_ARES 65 +#define NSS_CC_GLOBAL_ARES 66 +#define NSS_CC_XPCS_ARES 67 +#endif -- cgit v1.2.3 From 525b42832bd333e3e7ccb0efceb41b47347beab5 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 6 Jun 2024 13:36:00 +0200 Subject: dt-bindings: clock: Add Qcom QCM2290 GPUCC Add device tree bindings for graphics clock controller for Qualcomm Technology Inc's QCM2290 SoCs. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240606-topic-rb1_gpu-v4-1-4bc0c19da4af@linaro.org Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,qcm2290-gpucc.h | 32 ++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,qcm2290-gpucc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,qcm2290-gpucc.h b/include/dt-bindings/clock/qcom,qcm2290-gpucc.h new file mode 100644 index 000000000000..7c76dd05278f --- /dev/null +++ b/include/dt-bindings/clock/qcom,qcm2290-gpucc.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved. + * Copyright (c) 2024, Linaro Limited + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_GPU_CC_QCM2290_H +#define _DT_BINDINGS_CLK_QCOM_GPU_CC_QCM2290_H + +/* GPU_CC clocks */ +#define GPU_CC_AHB_CLK 0 +#define GPU_CC_CRC_AHB_CLK 1 +#define GPU_CC_CX_GFX3D_CLK 2 +#define GPU_CC_CX_GMU_CLK 3 +#define GPU_CC_CX_SNOC_DVM_CLK 4 +#define GPU_CC_CXO_AON_CLK 5 +#define GPU_CC_CXO_CLK 6 +#define GPU_CC_GMU_CLK_SRC 7 +#define GPU_CC_GX_GFX3D_CLK 8 +#define GPU_CC_GX_GFX3D_CLK_SRC 9 +#define GPU_CC_PLL0 10 +#define GPU_CC_SLEEP_CLK 11 +#define GPU_CC_HLOS1_VOTE_GPU_SMMU_CLK 12 + +/* Resets */ +#define GPU_GX_BCR 0 + +/* GDSCs */ +#define GPU_CX_GDSC 0 +#define GPU_GX_GDSC 1 + +#endif -- cgit v1.2.3 From b5c29fba72a6c950655d1cb0f6aa16b60dc83be7 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 28 May 2024 12:54:58 +0800 Subject: iommu: Make iommu_sva_domain_alloc() static iommu_sva_domain_alloc() is only called in iommu-sva.c, hence make it static. On the other hand, iommu_sva_domain_alloc() should not return NULL anymore after commit <80af5a452024> ("iommu: Add ops->domain_alloc_sva()"), the removal of inline code avoids potential confusion. Fixes: 80af5a452024 ("iommu: Add ops->domain_alloc_sva()") Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240528045458.81458-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 17b3f36ad843..1cd19b903354 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1527,8 +1527,6 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm); void iommu_sva_unbind_device(struct iommu_sva *handle); u32 iommu_sva_get_pasid(struct iommu_sva *handle); -struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, - struct mm_struct *mm); #else static inline struct iommu_sva * iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) @@ -1553,12 +1551,6 @@ static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm) } static inline void mm_pasid_drop(struct mm_struct *mm) {} - -static inline struct iommu_domain * -iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) -{ - return NULL; -} #endif /* CONFIG_IOMMU_SVA */ #ifdef CONFIG_IOMMU_IOPF -- cgit v1.2.3 From 0e6b6dedf16800df0ff73ffe2bb5066514db29c2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 12 Jun 2024 16:15:55 +0200 Subject: ACPI: EC: Evaluate orphan _REG under EC device After starting to install the EC address space handler at the ACPI namespace root, if there is an "orphan" _REG method in the EC device's scope, it will not be evaluated any more. This breaks EC operation regions on some systems, like Asus gu605. To address this, use a wrapper around an existing ACPICA function to look for an "orphan" _REG method in the EC device scope and evaluate it if present. Fixes: 60fa6ae6e6d0 ("ACPI: EC: Install address space handler at the namespace root") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218945 Reported-by: VitaliiT Tested-by: VitaliiT Signed-off-by: Rafael J. Wysocki --- include/acpi/acpixf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 94d0fc3bd412..80dc36f9d527 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -662,6 +662,10 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id)) +ACPI_EXTERNAL_RETURN_STATUS(acpi_status + acpi_execute_orphan_reg_method(acpi_handle device, + acpi_adr_space_type + space_id)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_remove_address_space_handler(acpi_handle device, -- cgit v1.2.3 From 1a8009e108382848d149a24dd3fc67607d054a05 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Mon, 10 Jun 2024 10:57:30 +0200 Subject: dt-bindings: interconnect: Add MediaTek EMI Interconnect bindings Add bindings for the MediaTek External Memory Interface Interconnect, which providers support system bandwidth requirements through Dynamic Voltage Frequency Scaling Resource Collector (DVFSRC) hardware. This adds bindings for MediaTek MT8183 and MT8195 SoCs. Note that this is modeled as a subnode of DVFSRC for multiple reasons: - Some SoCs have more than one interconnect on the DVFSRC (and two different kinds of EMI interconnect, and also a SMI interconnect); - Some boards will want to not enable the interconnect driver because some of those are not battery powered (so they just keep the knobs at full thrust from the bootloader and never care scaling busses); - Some DVFSRC interconnect features may depend on firmware. Reviewed-by: Rob Herring Signed-off-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240610085735.147134-3-angelogioacchino.delregno@collabora.com Signed-off-by: Georgi Djakov --- include/dt-bindings/interconnect/mediatek,mt8183.h | 23 +++++++++++ include/dt-bindings/interconnect/mediatek,mt8195.h | 44 ++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 include/dt-bindings/interconnect/mediatek,mt8183.h create mode 100644 include/dt-bindings/interconnect/mediatek,mt8195.h (limited to 'include') diff --git a/include/dt-bindings/interconnect/mediatek,mt8183.h b/include/dt-bindings/interconnect/mediatek,mt8183.h new file mode 100644 index 000000000000..1088c350258d --- /dev/null +++ b/include/dt-bindings/interconnect/mediatek,mt8183.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2021 MediaTek Inc. + * Copyright (c) 2024 Collabora Ltd. + * AngeloGioacchino Del Regno + */ + +#ifndef __DT_BINDINGS_INTERCONNECT_MEDIATEK_MT8183_H +#define __DT_BINDINGS_INTERCONNECT_MEDIATEK_MT8183_H + +#define SLAVE_DDR_EMI 0 +#define MASTER_MCUSYS 1 +#define MASTER_MFG 2 +#define MASTER_MMSYS 3 +#define MASTER_MM_VPU 4 +#define MASTER_MM_DISP 5 +#define MASTER_MM_VDEC 6 +#define MASTER_MM_VENC 7 +#define MASTER_MM_CAM 8 +#define MASTER_MM_IMG 9 +#define MASTER_MM_MDP 10 + +#endif diff --git a/include/dt-bindings/interconnect/mediatek,mt8195.h b/include/dt-bindings/interconnect/mediatek,mt8195.h new file mode 100644 index 000000000000..33e0e6cde732 --- /dev/null +++ b/include/dt-bindings/interconnect/mediatek,mt8195.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2020 MediaTek Inc. + * Copyright (c) 2024 Collabora Ltd. + * AngeloGioacchino Del Regno + */ + +#ifndef __DT_BINDINGS_INTERCONNECT_MEDIATEK_MT8195_H +#define __DT_BINDINGS_INTERCONNECT_MEDIATEK_MT8195_H + +#define SLAVE_DDR_EMI 0 +#define MASTER_MCUSYS 1 +#define MASTER_GPUSYS 2 +#define MASTER_MMSYS 3 +#define MASTER_MM_VPU 4 +#define MASTER_MM_DISP 5 +#define MASTER_MM_VDEC 6 +#define MASTER_MM_VENC 7 +#define MASTER_MM_CAM 8 +#define MASTER_MM_IMG 9 +#define MASTER_MM_MDP 10 +#define MASTER_VPUSYS 11 +#define MASTER_VPU_0 12 +#define MASTER_VPU_1 13 +#define MASTER_MDLASYS 14 +#define MASTER_MDLA_0 15 +#define MASTER_UFS 16 +#define MASTER_PCIE_0 17 +#define MASTER_PCIE_1 18 +#define MASTER_USB 19 +#define MASTER_DBGIF 20 +#define SLAVE_HRT_DDR_EMI 21 +#define MASTER_HRT_MMSYS 22 +#define MASTER_HRT_MM_DISP 23 +#define MASTER_HRT_MM_VDEC 24 +#define MASTER_HRT_MM_VENC 25 +#define MASTER_HRT_MM_CAM 26 +#define MASTER_HRT_MM_IMG 27 +#define MASTER_HRT_MM_MDP 28 +#define MASTER_HRT_DBGIF 29 +#define MASTER_WIFI 30 +#define MASTER_BT 31 +#define MASTER_NETSYS 32 +#endif -- cgit v1.2.3 From 163f10b2935362f0e8ef8d7fadd0b5aa33e9130f Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 12 Jun 2024 08:47:07 +0200 Subject: PCI: Add INTEL_HDA_PTL to pci_ids.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit More PCI ids for Intel audio. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Reviewed-by: Bard Liao Reviewed-by: Péter Ujfalusi Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20240612064709.51141-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 942a587bb97e..0168c6a60148 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -3112,6 +3112,7 @@ #define PCI_DEVICE_ID_INTEL_HDA_LNL_P 0xa828 #define PCI_DEVICE_ID_INTEL_S21152BB 0xb152 #define PCI_DEVICE_ID_INTEL_HDA_BMG 0xe2f7 +#define PCI_DEVICE_ID_INTEL_HDA_PTL 0xe428 #define PCI_DEVICE_ID_INTEL_HDA_CML_R 0xf0c8 #define PCI_DEVICE_ID_INTEL_HDA_RKL_S 0xf1c8 -- cgit v1.2.3 From a3cfe84cca28f205761a0450016593b0d728165e Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 6 Jun 2024 07:58:50 -0700 Subject: bpf: Add CHECKSUM_COMPLETE to bpf test progs Add special flag to validate that TC BPF program properly updates checksum information in skb. Signed-off-by: Vadim Fedorenko Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Kicinski Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240606145851.229116-1-vadfed@meta.com --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 25ea393cf084..35bcf52dbc65 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1425,6 +1425,8 @@ enum { #define BPF_F_TEST_RUN_ON_CPU (1U << 0) /* If set, XDP frames will be transmitted after processing */ #define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1) +/* If set, apply CHECKSUM_COMPLETE to skb and validate the checksum */ +#define BPF_F_TEST_SKB_CHECKSUM_COMPLETE (1U << 2) /* type for BPF_ENABLE_STATS */ enum bpf_stats_type { -- cgit v1.2.3 From 2755d1f46aa25f65179964bf315d8a16b3540eab Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 10 Jun 2024 13:12:00 +0200 Subject: drm/connector: hdmi: Fix kerneldoc warnings It looks like the documentation for the HDMI-related fields recently added to both the drm_connector and drm_connector_state structures trigger some warnings because of their use of anonymous structures: $ scripts/kernel-doc -none include/drm/drm_connector.h include/drm/drm_connector.h:1138: warning: Excess struct member 'broadcast_rgb' description in 'drm_connector_state' include/drm/drm_connector.h:1138: warning: Excess struct member 'infoframes' description in 'drm_connector_state' include/drm/drm_connector.h:1138: warning: Excess struct member 'avi' description in 'drm_connector_state' include/drm/drm_connector.h:1138: warning: Excess struct member 'hdr_drm' description in 'drm_connector_state' include/drm/drm_connector.h:1138: warning: Excess struct member 'spd' description in 'drm_connector_state' include/drm/drm_connector.h:1138: warning: Excess struct member 'vendor' description in 'drm_connector_state' include/drm/drm_connector.h:1138: warning: Excess struct member 'is_limited_range' description in 'drm_connector_state' include/drm/drm_connector.h:1138: warning: Excess struct member 'output_bpc' description in 'drm_connector_state' include/drm/drm_connector.h:1138: warning: Excess struct member 'output_format' description in 'drm_connector_state' include/drm/drm_connector.h:1138: warning: Excess struct member 'tmds_char_rate' description in 'drm_connector_state' include/drm/drm_connector.h:2112: warning: Excess struct member 'vendor' description in 'drm_connector' include/drm/drm_connector.h:2112: warning: Excess struct member 'product' description in 'drm_connector' include/drm/drm_connector.h:2112: warning: Excess struct member 'supported_formats' description in 'drm_connector' include/drm/drm_connector.h:2112: warning: Excess struct member 'infoframes' description in 'drm_connector' include/drm/drm_connector.h:2112: warning: Excess struct member 'lock' description in 'drm_connector' include/drm/drm_connector.h:2112: warning: Excess struct member 'audio' description in 'drm_connector' Create some intermediate structures instead of anonymous ones to silence the warnings. Reported-by: Jani Nikula Suggested-by: Jani Nikula Fixes: 54cb39e2293b ("drm/connector: hdmi: Create an HDMI sub-state") Fixes: 948f01d5e559 ("drm/connector: hdmi: Add support for output format") Reviewed-by: Dmitry Baryshkov Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240610111200.428224-1-mripard@kernel.org --- include/drm/drm_connector.h | 206 +++++++++++++++++++++++--------------------- 1 file changed, 108 insertions(+), 98 deletions(-) (limited to 'include') diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index e04a8a0d1bbd..f750765d8fbc 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -929,6 +929,67 @@ struct drm_connector_hdmi_infoframe { bool set; }; +/* + * struct drm_connector_hdmi_state - HDMI state container + */ +struct drm_connector_hdmi_state { + /** + * @broadcast_rgb: Connector property to pass the + * Broadcast RGB selection value. + */ + enum drm_hdmi_broadcast_rgb broadcast_rgb; + + /** + * @infoframes: HDMI Infoframes matching that state + */ + struct { + /** + * @avi: AVI Infoframes structure matching our + * state. + */ + struct drm_connector_hdmi_infoframe avi; + + /** + * @hdr_drm: DRM (Dynamic Range and Mastering) + * Infoframes structure matching our state. + */ + struct drm_connector_hdmi_infoframe hdr_drm; + + /** + * @spd: SPD Infoframes structure matching our + * state. + */ + struct drm_connector_hdmi_infoframe spd; + + /** + * @vendor: HDMI Vendor Infoframes structure + * matching our state. + */ + struct drm_connector_hdmi_infoframe hdmi; + } infoframes; + + /** + * @is_limited_range: Is the output supposed to use a limited + * RGB Quantization Range or not? + */ + bool is_limited_range; + + /** + * @output_bpc: Bits per color channel to output. + */ + unsigned int output_bpc; + + /** + * @output_format: Pixel format to output in. + */ + enum hdmi_colorspace output_format; + + /** + * @tmds_char_rate: TMDS Character Rate, in Hz. + */ + unsigned long long tmds_char_rate; +}; + /** * struct drm_connector_state - mutable connector state */ @@ -1078,63 +1139,7 @@ struct drm_connector_state { * @hdmi: HDMI-related variable and properties. Filled by * @drm_atomic_helper_connector_hdmi_check(). */ - struct { - /** - * @broadcast_rgb: Connector property to pass the - * Broadcast RGB selection value. - */ - enum drm_hdmi_broadcast_rgb broadcast_rgb; - - /** - * @infoframes: HDMI Infoframes matching that state - */ - struct { - /** - * @avi: AVI Infoframes structure matching our - * state. - */ - struct drm_connector_hdmi_infoframe avi; - - /** - * @hdr_drm: DRM (Dynamic Range and Mastering) - * Infoframes structure matching our state. - */ - struct drm_connector_hdmi_infoframe hdr_drm; - - /** - * @spd: SPD Infoframes structure matching our - * state. - */ - struct drm_connector_hdmi_infoframe spd; - - /** - * @vendor: HDMI Vendor Infoframes structure - * matching our state. - */ - struct drm_connector_hdmi_infoframe hdmi; - } infoframes; - - /** - * @is_limited_range: Is the output supposed to use a limited - * RGB Quantization Range or not? - */ - bool is_limited_range; - - /** - * @output_bpc: Bits per color channel to output. - */ - unsigned int output_bpc; - - /** - * @output_format: Pixel format to output in. - */ - enum hdmi_colorspace output_format; - - /** - * @tmds_char_rate: TMDS Character Rate, in Hz. - */ - unsigned long long tmds_char_rate; - } hdmi; + struct drm_connector_hdmi_state hdmi; }; /** @@ -1656,6 +1661,51 @@ struct drm_cmdline_mode { bool tv_mode_specified; }; +/* + * struct drm_connector_hdmi - DRM Connector HDMI-related structure + */ +struct drm_connector_hdmi { +#define DRM_CONNECTOR_HDMI_VENDOR_LEN 8 + /** + * @vendor: HDMI Controller Vendor Name + */ + unsigned char vendor[DRM_CONNECTOR_HDMI_VENDOR_LEN] __nonstring; + +#define DRM_CONNECTOR_HDMI_PRODUCT_LEN 16 + /** + * @product: HDMI Controller Product Name + */ + unsigned char product[DRM_CONNECTOR_HDMI_PRODUCT_LEN] __nonstring; + + /** + * @supported_formats: Bitmask of @hdmi_colorspace + * supported by the controller. + */ + unsigned long supported_formats; + + /** + * @funcs: HDMI connector Control Functions + */ + const struct drm_connector_hdmi_funcs *funcs; + + /** + * @infoframes: Current Infoframes output by the connector + */ + struct { + /** + * @lock: Mutex protecting against concurrent access to + * the infoframes, most notably between KMS and ALSA. + */ + struct mutex lock; + + /** + * @audio: Current Audio Infoframes structure. Protected + * by @lock. + */ + struct drm_connector_hdmi_infoframe audio; + } infoframes; +}; + /** * struct drm_connector - central DRM connector control structure * @@ -2068,47 +2118,7 @@ struct drm_connector { /** * @hdmi: HDMI-related variable and properties. */ - struct { -#define DRM_CONNECTOR_HDMI_VENDOR_LEN 8 - /** - * @vendor: HDMI Controller Vendor Name - */ - unsigned char vendor[DRM_CONNECTOR_HDMI_VENDOR_LEN] __nonstring; - -#define DRM_CONNECTOR_HDMI_PRODUCT_LEN 16 - /** - * @product: HDMI Controller Product Name - */ - unsigned char product[DRM_CONNECTOR_HDMI_PRODUCT_LEN] __nonstring; - - /** - * @supported_formats: Bitmask of @hdmi_colorspace - * supported by the controller. - */ - unsigned long supported_formats; - - /** - * @funcs: HDMI connector Control Functions - */ - const struct drm_connector_hdmi_funcs *funcs; - - /** - * @infoframes: Current Infoframes output by the connector - */ - struct { - /** - * @lock: Mutex protecting against concurrent access to - * the infoframes, most notably between KMS and ALSA. - */ - struct mutex lock; - - /** - * @audio: Current Audio Infoframes structure. Protected - * by @lock. - */ - struct drm_connector_hdmi_infoframe audio; - } infoframes; - } hdmi; + struct drm_connector_hdmi hdmi; }; #define obj_to_connector(x) container_of(x, struct drm_connector, base) -- cgit v1.2.3 From ff985c759778986f55cbc557055fbeb84ee833eb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 11 Jun 2024 15:01:04 +0200 Subject: auxbus: make to_auxiliary_drv accept and return a constant pointer In the quest to make struct device constant, start by making to_auxiliary_drv() return a constant pointer so that drivers that call this can be fixed up before the driver core changes. As the return type previously was not constant, also fix up all callers that were assuming that the pointer was not going to be a constant one in order to not break the build. Cc: Dave Ertman Cc: Ira Weiny Cc: Rafael J. Wysocki Cc: Bingbu Cao Cc: Tianshu Qiu Cc: Mauro Carvalho Chehab Cc: Michael Chan Cc: David S. Miller Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Jesse Brandeburg Cc: Tony Nguyen Cc: Saeed Mahameed Cc: Leon Romanovsky Cc: Tariq Toukan Cc: Pierre-Louis Bossart Cc: Liam Girdwood Cc: Peter Ujfalusi Cc: Bard Liao Cc: Ranjani Sridharan Cc: Daniel Baluta Cc: Kai Vehmanen Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: Richard Cochran Cc: linux-media@vger.kernel.org Cc: netdev@vger.kernel.org Cc: intel-wired-lan@lists.osuosl.org Cc: linux-rdma@vger.kernel.org Cc: sound-open-firmware@alsa-project.org Cc: linux-sound@vger.kernel.org Acked-by: Sakari Ailus # drivers/media/pci/intel/ipu6 Acked-by: Mark Brown Reviewed-by: Martin Habets Link: https://lore.kernel.org/r/20240611130103.3262749-7-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/auxiliary_bus.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index de21d9d24a95..bdff7b85f2ae 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -203,7 +203,7 @@ static inline struct auxiliary_device *to_auxiliary_dev(struct device *dev) return container_of(dev, struct auxiliary_device, dev); } -static inline struct auxiliary_driver *to_auxiliary_drv(struct device_driver *drv) +static inline const struct auxiliary_driver *to_auxiliary_drv(const struct device_driver *drv) { return container_of(drv, struct auxiliary_driver, driver); } -- cgit v1.2.3 From 288b550463cf5dd21ad34f736b8c5ccb7ff69ceb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 8 Jun 2024 17:55:24 +0200 Subject: mfd: pm8008: Rework to match new DT binding Rework the pm8008 driver to match the new devicetree binding which no longer describes internal details like interrupts and register offsets (including which of the two consecutive I2C addresses the registers belong to). Instead make the interrupt controller implementation internal and pass interrupts to the subdrivers using MFD cell resources. Note that subdrivers may either get their resources, like register block offsets, from the parent MFD or this can be included in the subdrivers directly. In the current implementation, the temperature alarm driver is generic enough to just get its base address and alarm interrupt from the parent driver, which already uses this information to implement the interrupt controller. The regulator driver, however, needs additional information like parent supplies and regulator characteristics so in that case it is easier to just augment its table with the regulator register base addresses. Similarly, the current GPIO driver already holds the number of pins and that lookup table can therefore also be extended with register offsets. Note that subdrivers can now access the two regmaps by name, even if the primary regmap is registered last so that it is returned by default when no name is provided in lookups. Finally, note that the temperature alarm and GPIO subdrivers need some minor rework before they can be used with non-SPMI devices like the PM8008. The temperature alarm MFD cell name specifically uses a "qpnp" rather than "spmi" prefix to prevent binding until the driver has been updated. Tested-by: Bryan O'Donoghue Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20240608155526.12996-11-johan+linaro@kernel.org Signed-off-by: Lee Jones --- include/dt-bindings/mfd/qcom-pm8008.h | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 include/dt-bindings/mfd/qcom-pm8008.h (limited to 'include') diff --git a/include/dt-bindings/mfd/qcom-pm8008.h b/include/dt-bindings/mfd/qcom-pm8008.h deleted file mode 100644 index eca9448df228..000000000000 --- a/include/dt-bindings/mfd/qcom-pm8008.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2021 The Linux Foundation. All rights reserved. - */ - -#ifndef __DT_BINDINGS_MFD_QCOM_PM8008_H -#define __DT_BINDINGS_MFD_QCOM_PM8008_H - -/* PM8008 IRQ numbers */ -#define PM8008_IRQ_MISC_UVLO 0 -#define PM8008_IRQ_MISC_OVLO 1 -#define PM8008_IRQ_MISC_OTST2 2 -#define PM8008_IRQ_MISC_OTST3 3 -#define PM8008_IRQ_MISC_LDO_OCP 4 -#define PM8008_IRQ_TEMP_ALARM 5 -#define PM8008_IRQ_GPIO1 6 -#define PM8008_IRQ_GPIO2 7 - -#endif -- cgit v1.2.3 From 92424801261d1564a0bb759da3cf3ccd69fdf5a2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 13 Jun 2024 13:53:08 +0200 Subject: bpf: Fix reg_set_min_max corruption of fake_reg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Juan reported that after doing some changes to buzzer [0] and implementing a new fuzzing strategy guided by coverage, they noticed the following in one of the probes: [...] 13: (79) r6 = *(u64 *)(r0 +0) ; R0=map_value(ks=4,vs=8) R6_w=scalar() 14: (b7) r0 = 0 ; R0_w=0 15: (b4) w0 = -1 ; R0_w=0xffffffff 16: (74) w0 >>= 1 ; R0_w=0x7fffffff 17: (5c) w6 &= w0 ; R0_w=0x7fffffff R6_w=scalar(smin=smin32=0,smax=umax=umax32=0x7fffffff,var_off=(0x0; 0x7fffffff)) 18: (44) w6 |= 2 ; R6_w=scalar(smin=umin=smin32=umin32=2,smax=umax=umax32=0x7fffffff,var_off=(0x2; 0x7ffffffd)) 19: (56) if w6 != 0x7ffffffd goto pc+1 REG INVARIANTS VIOLATION (true_reg2): range bounds violation u64=[0x7fffffff, 0x7ffffffd] s64=[0x7fffffff, 0x7ffffffd] u32=[0x7fffffff, 0x7ffffffd] s32=[0x7fffffff, 0x7ffffffd] var_off=(0x7fffffff, 0x0) REG INVARIANTS VIOLATION (false_reg1): range bounds violation u64=[0x7fffffff, 0x7ffffffd] s64=[0x7fffffff, 0x7ffffffd] u32=[0x7fffffff, 0x7ffffffd] s32=[0x7fffffff, 0x7ffffffd] var_off=(0x7fffffff, 0x0) REG INVARIANTS VIOLATION (false_reg2): const tnum out of sync with range bounds u64=[0x0, 0xffffffffffffffff] s64=[0x8000000000000000, 0x7fffffffffffffff] u32=[0x0, 0xffffffff] s32=[0x80000000, 0x7fffffff] var_off=(0x7fffffff, 0x0) 19: R6_w=0x7fffffff 20: (95) exit from 19 to 21: R0=0x7fffffff R6=scalar(smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=0x7ffffffe,var_off=(0x2; 0x7ffffffd)) R7=map_ptr(ks=4,vs=8) R9=ctx() R10=fp0 fp-24=map_ptr(ks=4,vs=8) fp-40=mmmmmmmm 21: R0=0x7fffffff R6=scalar(smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=0x7ffffffe,var_off=(0x2; 0x7ffffffd)) R7=map_ptr(ks=4,vs=8) R9=ctx() R10=fp0 fp-24=map_ptr(ks=4,vs=8) fp-40=mmmmmmmm 21: (14) w6 -= 2147483632 ; R6_w=scalar(smin=umin=umin32=2,smax=umax=0xffffffff,smin32=0x80000012,smax32=14,var_off=(0x2; 0xfffffffd)) 22: (76) if w6 s>= 0xe goto pc+1 ; R6_w=scalar(smin=umin=umin32=2,smax=umax=0xffffffff,smin32=0x80000012,smax32=13,var_off=(0x2; 0xfffffffd)) 23: (95) exit from 22 to 24: R0=0x7fffffff R6_w=14 R7=map_ptr(ks=4,vs=8) R9=ctx() R10=fp0 fp-24=map_ptr(ks=4,vs=8) fp-40=mmmmmmmm 24: R0=0x7fffffff R6_w=14 R7=map_ptr(ks=4,vs=8) R9=ctx() R10=fp0 fp-24=map_ptr(ks=4,vs=8) fp-40=mmmmmmmm 24: (14) w6 -= 14 ; R6_w=0 [...] What can be seen here is a register invariant violation on line 19. After the binary-or in line 18, the verifier knows that bit 2 is set but knows nothing about the rest of the content which was loaded from a map value, meaning, range is [2,0x7fffffff] with var_off=(0x2; 0x7ffffffd). When in line 19 the verifier analyzes the branch, it splits the register states in reg_set_min_max() into the registers of the true branch (true_reg1, true_reg2) and the registers of the false branch (false_reg1, false_reg2). Since the test is w6 != 0x7ffffffd, the src_reg is a known constant. Internally, the verifier creates a "fake" register initialized as scalar to the value of 0x7ffffffd, and then passes it onto reg_set_min_max(). Now, for line 19, it is mathematically impossible to take the false branch of this program, yet the verifier analyzes it. It is impossible because the second bit of r6 will be set due to the prior or operation and the constant in the condition has that bit unset (hex(fd) == binary(1111 1101). When the verifier first analyzes the false / fall-through branch, it will compute an intersection between the var_off of r6 and of the constant. This is because the verifier creates a "fake" register initialized to the value of the constant. The intersection result later refines both registers in regs_refine_cond_op(): [...] t = tnum_intersect(tnum_subreg(reg1->var_off), tnum_subreg(reg2->var_off)); reg1->var_off = tnum_with_subreg(reg1->var_off, t); reg2->var_off = tnum_with_subreg(reg2->var_off, t); [...] Since the verifier is analyzing the false branch of the conditional jump, reg1 is equal to false_reg1 and reg2 is equal to false_reg2, i.e. the reg2 is the "fake" register that was meant to hold a constant value. The resulting var_off of the intersection says that both registers now hold a known value of var_off=(0x7fffffff, 0x0) or in other words: this operation manages to make the verifier think that the "constant" value that was passed in the jump operation now holds a different value. Normally this would not be an issue since it should not influence the true branch, however, false_reg2 and true_reg2 are pointers to the same "fake" register. Meaning, the false branch can influence the results of the true branch. In line 24, the verifier assumes R6_w=0, but the actual runtime value in this case is 1. The fix is simply not passing in the same "fake" register location as inputs to reg_set_min_max(), but instead making a copy. Moving the fake_reg into the env also reduces stack consumption by 120 bytes. With this, the verifier successfully rejects invalid accesses from the test program. [0] https://github.com/google/buzzer Fixes: 67420501e868 ("bpf: generalize reg_set_min_max() to handle non-const register comparisons") Reported-by: Juan José López Jaimez Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Link: https://lore.kernel.org/r/20240613115310.25383-1-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 50aa87f8d77f..e4070fb02b11 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -746,6 +746,8 @@ struct bpf_verifier_env { /* Same as scratched_regs but for stack slots */ u64 scratched_stack_slots; u64 prev_log_pos, prev_insn_print_pos; + /* buffer used to temporary hold constants as scalar registers */ + struct bpf_reg_state fake_reg[2]; /* buffer used to generate temporary string representations, * e.g., in reg_type_str() to generate reg_type string */ -- cgit v1.2.3 From 9a95c5bfbf02a0a7f5983280fe284a0ff0836c34 Mon Sep 17 00:00:00 2001 From: GUO Zihua Date: Tue, 7 May 2024 01:25:41 +0000 Subject: ima: Avoid blocking in RCU read-side critical section A panic happens in ima_match_policy: BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 PGD 42f873067 P4D 0 Oops: 0000 [#1] SMP NOPTI CPU: 5 PID: 1286325 Comm: kubeletmonit.sh Kdump: loaded Tainted: P Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015 RIP: 0010:ima_match_policy+0x84/0x450 Code: 49 89 fc 41 89 cf 31 ed 89 44 24 14 eb 1c 44 39 7b 18 74 26 41 83 ff 05 74 20 48 8b 1b 48 3b 1d f2 b9 f4 00 0f 84 9c 01 00 00 <44> 85 73 10 74 ea 44 8b 6b 14 41 f6 c5 01 75 d4 41 f6 c5 02 74 0f RSP: 0018:ff71570009e07a80 EFLAGS: 00010207 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000200 RDX: ffffffffad8dc7c0 RSI: 0000000024924925 RDI: ff3e27850dea2000 RBP: 0000000000000000 R08: 0000000000000000 R09: ffffffffabfce739 R10: ff3e27810cc42400 R11: 0000000000000000 R12: ff3e2781825ef970 R13: 00000000ff3e2785 R14: 000000000000000c R15: 0000000000000001 FS: 00007f5195b51740(0000) GS:ff3e278b12d40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 0000000626d24002 CR4: 0000000000361ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ima_get_action+0x22/0x30 process_measurement+0xb0/0x830 ? page_add_file_rmap+0x15/0x170 ? alloc_set_pte+0x269/0x4c0 ? prep_new_page+0x81/0x140 ? simple_xattr_get+0x75/0xa0 ? selinux_file_open+0x9d/0xf0 ima_file_check+0x64/0x90 path_openat+0x571/0x1720 do_filp_open+0x9b/0x110 ? page_counter_try_charge+0x57/0xc0 ? files_cgroup_alloc_fd+0x38/0x60 ? __alloc_fd+0xd4/0x250 ? do_sys_open+0x1bd/0x250 do_sys_open+0x1bd/0x250 do_syscall_64+0x5d/0x1d0 entry_SYSCALL_64_after_hwframe+0x65/0xca Commit c7423dbdbc9e ("ima: Handle -ESTALE returned by ima_filter_rule_match()") introduced call to ima_lsm_copy_rule within a RCU read-side critical section which contains kmalloc with GFP_KERNEL. This implies a possible sleep and violates limitations of RCU read-side critical sections on non-PREEMPT systems. Sleeping within RCU read-side critical section might cause synchronize_rcu() returning early and break RCU protection, allowing a UAF to happen. The root cause of this issue could be described as follows: | Thread A | Thread B | | |ima_match_policy | | | rcu_read_lock | |ima_lsm_update_rule | | | synchronize_rcu | | | | kmalloc(GFP_KERNEL)| | | sleep | ==> synchronize_rcu returns early | kfree(entry) | | | | entry = entry->next| ==> UAF happens and entry now becomes NULL (or could be anything). | | entry->action | ==> Accessing entry might cause panic. To fix this issue, we are converting all kmalloc that is called within RCU read-side critical section to use GFP_ATOMIC. Fixes: c7423dbdbc9e ("ima: Handle -ESTALE returned by ima_filter_rule_match()") Cc: stable@vger.kernel.org Signed-off-by: GUO Zihua Acked-by: John Johansen Reviewed-by: Mimi Zohar Reviewed-by: Casey Schaufler [PM: fixed missing comment, long lines, !CONFIG_IMA_LSM_RULES case] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 2 +- include/linux/security.h | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index f804b76cde44..44488b1ab9a9 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -413,7 +413,7 @@ LSM_HOOK(void, LSM_RET_VOID, key_post_create_or_update, struct key *keyring, #ifdef CONFIG_AUDIT LSM_HOOK(int, 0, audit_rule_init, u32 field, u32 op, char *rulestr, - void **lsmrule) + void **lsmrule, gfp_t gfp) LSM_HOOK(int, 0, audit_rule_known, struct audit_krule *krule) LSM_HOOK(int, 0, audit_rule_match, u32 secid, u32 field, u32 op, void *lsmrule) LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule) diff --git a/include/linux/security.h b/include/linux/security.h index 21cf70346b33..de3af33e6ff5 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2048,7 +2048,8 @@ static inline void security_key_post_create_or_update(struct key *keyring, #ifdef CONFIG_AUDIT #ifdef CONFIG_SECURITY -int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule); +int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule, + gfp_t gfp); int security_audit_rule_known(struct audit_krule *krule); int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule); void security_audit_rule_free(void *lsmrule); @@ -2056,7 +2057,7 @@ void security_audit_rule_free(void *lsmrule); #else static inline int security_audit_rule_init(u32 field, u32 op, char *rulestr, - void **lsmrule) + void **lsmrule, gfp_t gfp) { return 0; } -- cgit v1.2.3 From 79a947bd54348d4f63120aacc30505b3ad81fa59 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Thu, 6 Jun 2024 20:52:02 +0200 Subject: ACPI: NUMA: Consolidate header includes The header file acpi/acpi_numa.h is included whether CONFIG_ACPI is defined or not. Include it only once before the #ifdef/#else/#endif preprocessor directives and fix the following make includecheck warning: acpi/acpi_numa.h is included more than once Signed-off-by: Thorsten Blum Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 28c3fb2bef0d..bb18e7bf8826 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -24,6 +24,7 @@ struct irq_domain_ops; #define _LINUX #endif #include +#include #ifdef CONFIG_ACPI @@ -35,7 +36,6 @@ struct irq_domain_ops; #include #include -#include #include #include @@ -777,8 +777,6 @@ const char *acpi_get_subsystem_id(acpi_handle handle); #define acpi_dev_uid_match(adev, uid2) (adev && false) #define acpi_dev_hid_uid_match(adev, hid2, uid2) (adev && false) -#include - struct fwnode_handle; static inline bool acpi_dev_found(const char *hid) -- cgit v1.2.3 From d6161b7d8b33aa8756ab2f7eed8cb371c2e7e8ed Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 12 Jun 2024 15:42:29 +0200 Subject: video/logo: Remove linux_serial_image comments The last user of the serial_console ASCII image was removed in v2.1.115. Signed-off-by: Geert Uytterhoeven Signed-off-by: Helge Deller --- include/linux/linux_logo.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/linux_logo.h b/include/linux/linux_logo.h index d4d5b93efe84..e37699b7e839 100644 --- a/include/linux/linux_logo.h +++ b/include/linux/linux_logo.h @@ -10,9 +10,6 @@ * Copyright (C) 2001 Greg Banks * Copyright (C) 2001 Jan-Benedict Glaw * Copyright (C) 2003 Geert Uytterhoeven - * - * Serial_console ascii image can be any size, - * but should contain %s to display the version */ #include -- cgit v1.2.3 From 633aeefafc9c2a07a76a62be6aac1d73c3e3defa Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 13 Jun 2024 14:18:26 -0700 Subject: scsi: core: Introduce the BLIST_SKIP_IO_HINTS flag Prepare for skipping the IO Advice Hints Grouping mode page for USB storage devices. Cc: Alan Stern Cc: Joao Machado Cc: Andy Shevchenko Cc: Christian Heusel Cc: stable@vger.kernel.org Fixes: 4f53138fffc2 ("scsi: sd: Translate data lifetime information") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240613211828.2077477-2-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- include/scsi/scsi_devinfo.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/scsi/scsi_devinfo.h b/include/scsi/scsi_devinfo.h index 6b548dc2c496..1d79a3b536ce 100644 --- a/include/scsi/scsi_devinfo.h +++ b/include/scsi/scsi_devinfo.h @@ -69,8 +69,10 @@ #define BLIST_RETRY_ITF ((__force blist_flags_t)(1ULL << 32)) /* Always retry ABORTED_COMMAND with ASC 0xc1 */ #define BLIST_RETRY_ASC_C1 ((__force blist_flags_t)(1ULL << 33)) +/* Do not query the IO Advice Hints Grouping mode page */ +#define BLIST_SKIP_IO_HINTS ((__force blist_flags_t)(1ULL << 34)) -#define __BLIST_LAST_USED BLIST_RETRY_ASC_C1 +#define __BLIST_LAST_USED BLIST_SKIP_IO_HINTS #define __BLIST_HIGH_UNUSED (~(__BLIST_LAST_USED | \ (__force blist_flags_t) \ -- cgit v1.2.3 From f4a1254f2a076afb0edd473589bf40f9b4d36b41 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 14 Jun 2024 01:04:29 +0100 Subject: io_uring: fix cancellation overwriting req->flags Only the current owner of a request is allowed to write into req->flags. Hence, the cancellation path should never touch it. Add a new field instead of the flag, move it into the 3rd cache line because it should always be initialised. poll_refs can move further as polling is an involved process anyway. It's a minimal patch, in the future we can and should find a better place for it and remove now unused REQ_F_CANCEL_SEQ. Fixes: 521223d7c229f ("io_uring/cancel: don't default to setting req->work.cancel_seq") Cc: stable@vger.kernel.org Reported-by: Li Shi Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/6827b129f8f0ad76fa9d1f0a773de938b240ffab.1718323430.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 7a6b190c7da7..b48570eaa449 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -648,7 +648,7 @@ struct io_kiocb { struct io_rsrc_node *rsrc_node; atomic_t refs; - atomic_t poll_refs; + bool cancel_seq_set; struct io_task_work io_task_work; /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ struct hlist_node hash_node; @@ -657,6 +657,7 @@ struct io_kiocb { /* opcode allocated if it needs to store data for async defer */ void *async_data; /* linked requests, IFF REQ_F_HARDLINK or REQ_F_LINK are set */ + atomic_t poll_refs; struct io_kiocb *link; /* custom credentials, valid IFF REQ_F_CREDS is set */ const struct cred *creds; -- cgit v1.2.3 From 88a26c3c2405626e0083a49609c5e8cd6c453d87 Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Mon, 15 Jan 2024 17:58:46 +0800 Subject: dt-bindings: clock: sophgo: add pll clocks for SG2042 Add bindings for the pll clocks for Sophgo SG2042. Signed-off-by: Chen Wang Reviewed-by: Rob Herring Reviewed-by: Guo Ren --- include/dt-bindings/clock/sophgo,sg2042-pll.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/dt-bindings/clock/sophgo,sg2042-pll.h (limited to 'include') diff --git a/include/dt-bindings/clock/sophgo,sg2042-pll.h b/include/dt-bindings/clock/sophgo,sg2042-pll.h new file mode 100644 index 000000000000..2d519b3bf51c --- /dev/null +++ b/include/dt-bindings/clock/sophgo,sg2042-pll.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2023 Sophgo Technology Inc. All rights reserved. + */ + +#ifndef __DT_BINDINGS_SOPHGO_SG2042_PLL_H__ +#define __DT_BINDINGS_SOPHGO_SG2042_PLL_H__ + +#define MPLL_CLK 0 +#define FPLL_CLK 1 +#define DPLL0_CLK 2 +#define DPLL1_CLK 3 + +#endif /* __DT_BINDINGS_SOPHGO_SG2042_PLL_H__ */ -- cgit v1.2.3 From 5a7144d61d73d801540f8846d8e1b9fa52a5559c Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Wed, 31 Jan 2024 09:57:01 +0800 Subject: dt-bindings: clock: sophgo: add RP gate clocks for SG2042 Add bindings for the gate clocks of RP subsystem for Sophgo SG2042. Signed-off-by: Chen Wang Reviewed-by: Rob Herring --- include/dt-bindings/clock/sophgo,sg2042-rpgate.h | 58 ++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 include/dt-bindings/clock/sophgo,sg2042-rpgate.h (limited to 'include') diff --git a/include/dt-bindings/clock/sophgo,sg2042-rpgate.h b/include/dt-bindings/clock/sophgo,sg2042-rpgate.h new file mode 100644 index 000000000000..8b4522d5f559 --- /dev/null +++ b/include/dt-bindings/clock/sophgo,sg2042-rpgate.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2023 Sophgo Technology Inc. All rights reserved. + */ + +#ifndef __DT_BINDINGS_SOPHGO_SG2042_RPGATE_H__ +#define __DT_BINDINGS_SOPHGO_SG2042_RPGATE_H__ + +#define GATE_CLK_RXU0 0 +#define GATE_CLK_RXU1 1 +#define GATE_CLK_RXU2 2 +#define GATE_CLK_RXU3 3 +#define GATE_CLK_RXU4 4 +#define GATE_CLK_RXU5 5 +#define GATE_CLK_RXU6 6 +#define GATE_CLK_RXU7 7 +#define GATE_CLK_RXU8 8 +#define GATE_CLK_RXU9 9 +#define GATE_CLK_RXU10 10 +#define GATE_CLK_RXU11 11 +#define GATE_CLK_RXU12 12 +#define GATE_CLK_RXU13 13 +#define GATE_CLK_RXU14 14 +#define GATE_CLK_RXU15 15 +#define GATE_CLK_RXU16 16 +#define GATE_CLK_RXU17 17 +#define GATE_CLK_RXU18 18 +#define GATE_CLK_RXU19 19 +#define GATE_CLK_RXU20 20 +#define GATE_CLK_RXU21 21 +#define GATE_CLK_RXU22 22 +#define GATE_CLK_RXU23 23 +#define GATE_CLK_RXU24 24 +#define GATE_CLK_RXU25 25 +#define GATE_CLK_RXU26 26 +#define GATE_CLK_RXU27 27 +#define GATE_CLK_RXU28 28 +#define GATE_CLK_RXU29 29 +#define GATE_CLK_RXU30 30 +#define GATE_CLK_RXU31 31 +#define GATE_CLK_MP0 32 +#define GATE_CLK_MP1 33 +#define GATE_CLK_MP2 34 +#define GATE_CLK_MP3 35 +#define GATE_CLK_MP4 36 +#define GATE_CLK_MP5 37 +#define GATE_CLK_MP6 38 +#define GATE_CLK_MP7 39 +#define GATE_CLK_MP8 40 +#define GATE_CLK_MP9 41 +#define GATE_CLK_MP10 42 +#define GATE_CLK_MP11 43 +#define GATE_CLK_MP12 44 +#define GATE_CLK_MP13 45 +#define GATE_CLK_MP14 46 +#define GATE_CLK_MP15 47 + +#endif /* __DT_BINDINGS_SOPHGO_SG2042_RPGATE_H__ */ -- cgit v1.2.3 From 5911423798b2eba65d6ea0aff6505280b3eb55e6 Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Fri, 24 Nov 2023 14:02:43 +0800 Subject: dt-bindings: clock: sophgo: add clkgen for SG2042 Add bindings for the clock generator of divider/mux and gates working for other subsystem than RP subsystem for Sophgo SG2042. Signed-off-by: Chen Wang Reviewed-by: Rob Herring --- include/dt-bindings/clock/sophgo,sg2042-clkgen.h | 111 +++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 include/dt-bindings/clock/sophgo,sg2042-clkgen.h (limited to 'include') diff --git a/include/dt-bindings/clock/sophgo,sg2042-clkgen.h b/include/dt-bindings/clock/sophgo,sg2042-clkgen.h new file mode 100644 index 000000000000..84f7857317a2 --- /dev/null +++ b/include/dt-bindings/clock/sophgo,sg2042-clkgen.h @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2023 Sophgo Technology Inc. All rights reserved. + */ + +#ifndef __DT_BINDINGS_SOPHGO_SG2042_CLKGEN_H__ +#define __DT_BINDINGS_SOPHGO_SG2042_CLKGEN_H__ + +#define DIV_CLK_MPLL_RP_CPU_NORMAL_0 0 +#define DIV_CLK_MPLL_AXI_DDR_0 1 +#define DIV_CLK_FPLL_DDR01_1 2 +#define DIV_CLK_FPLL_DDR23_1 3 +#define DIV_CLK_FPLL_RP_CPU_NORMAL_1 4 +#define DIV_CLK_FPLL_50M_A53 5 +#define DIV_CLK_FPLL_TOP_RP_CMN_DIV2 6 +#define DIV_CLK_FPLL_UART_500M 7 +#define DIV_CLK_FPLL_AHB_LPC 8 +#define DIV_CLK_FPLL_EFUSE 9 +#define DIV_CLK_FPLL_TX_ETH0 10 +#define DIV_CLK_FPLL_PTP_REF_I_ETH0 11 +#define DIV_CLK_FPLL_REF_ETH0 12 +#define DIV_CLK_FPLL_EMMC 13 +#define DIV_CLK_FPLL_SD 14 +#define DIV_CLK_FPLL_TOP_AXI0 15 +#define DIV_CLK_FPLL_TOP_AXI_HSPERI 16 +#define DIV_CLK_FPLL_AXI_DDR_1 17 +#define DIV_CLK_FPLL_DIV_TIMER1 18 +#define DIV_CLK_FPLL_DIV_TIMER2 19 +#define DIV_CLK_FPLL_DIV_TIMER3 20 +#define DIV_CLK_FPLL_DIV_TIMER4 21 +#define DIV_CLK_FPLL_DIV_TIMER5 22 +#define DIV_CLK_FPLL_DIV_TIMER6 23 +#define DIV_CLK_FPLL_DIV_TIMER7 24 +#define DIV_CLK_FPLL_DIV_TIMER8 25 +#define DIV_CLK_FPLL_100K_EMMC 26 +#define DIV_CLK_FPLL_100K_SD 27 +#define DIV_CLK_FPLL_GPIO_DB 28 +#define DIV_CLK_DPLL0_DDR01_0 29 +#define DIV_CLK_DPLL1_DDR23_0 30 + +#define GATE_CLK_RP_CPU_NORMAL_DIV0 31 +#define GATE_CLK_AXI_DDR_DIV0 32 + +#define GATE_CLK_RP_CPU_NORMAL_DIV1 33 +#define GATE_CLK_A53_50M 34 +#define GATE_CLK_TOP_RP_CMN_DIV2 35 +#define GATE_CLK_HSDMA 36 +#define GATE_CLK_EMMC_100M 37 +#define GATE_CLK_SD_100M 38 +#define GATE_CLK_TX_ETH0 39 +#define GATE_CLK_PTP_REF_I_ETH0 40 +#define GATE_CLK_REF_ETH0 41 +#define GATE_CLK_UART_500M 42 +#define GATE_CLK_EFUSE 43 + +#define GATE_CLK_AHB_LPC 44 +#define GATE_CLK_AHB_ROM 45 +#define GATE_CLK_AHB_SF 46 + +#define GATE_CLK_APB_UART 47 +#define GATE_CLK_APB_TIMER 48 +#define GATE_CLK_APB_EFUSE 49 +#define GATE_CLK_APB_GPIO 50 +#define GATE_CLK_APB_GPIO_INTR 51 +#define GATE_CLK_APB_SPI 52 +#define GATE_CLK_APB_I2C 53 +#define GATE_CLK_APB_WDT 54 +#define GATE_CLK_APB_PWM 55 +#define GATE_CLK_APB_RTC 56 + +#define GATE_CLK_AXI_PCIE0 57 +#define GATE_CLK_AXI_PCIE1 58 +#define GATE_CLK_SYSDMA_AXI 59 +#define GATE_CLK_AXI_DBG_I2C 60 +#define GATE_CLK_AXI_SRAM 61 +#define GATE_CLK_AXI_ETH0 62 +#define GATE_CLK_AXI_EMMC 63 +#define GATE_CLK_AXI_SD 64 +#define GATE_CLK_TOP_AXI0 65 +#define GATE_CLK_TOP_AXI_HSPERI 66 + +#define GATE_CLK_TIMER1 67 +#define GATE_CLK_TIMER2 68 +#define GATE_CLK_TIMER3 69 +#define GATE_CLK_TIMER4 70 +#define GATE_CLK_TIMER5 71 +#define GATE_CLK_TIMER6 72 +#define GATE_CLK_TIMER7 73 +#define GATE_CLK_TIMER8 74 +#define GATE_CLK_100K_EMMC 75 +#define GATE_CLK_100K_SD 76 +#define GATE_CLK_GPIO_DB 77 + +#define GATE_CLK_AXI_DDR_DIV1 78 +#define GATE_CLK_DDR01_DIV1 79 +#define GATE_CLK_DDR23_DIV1 80 + +#define GATE_CLK_DDR01_DIV0 81 +#define GATE_CLK_DDR23_DIV0 82 + +#define GATE_CLK_DDR01 83 +#define GATE_CLK_DDR23 84 +#define GATE_CLK_RP_CPU_NORMAL 85 +#define GATE_CLK_AXI_DDR 86 + +#define MUX_CLK_DDR01 87 +#define MUX_CLK_DDR23 88 +#define MUX_CLK_RP_CPU_NORMAL 89 +#define MUX_CLK_AXI_DDR 90 + +#endif /* __DT_BINDINGS_SOPHGO_SG2042_CLKGEN_H__ */ -- cgit v1.2.3 From 6b0d3355e5a58fd73529fa6f930a877caca3f75d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sun, 26 May 2024 20:17:16 +0200 Subject: leds: class: Add flag to avoid automatic renaming of LED devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a mechanism for drivers to opt-out of the automatic device renaming on conflicts. Those drivers will provide their own conflict resolution. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20240526-cros_ec-kbd-led-framework-v3-2-ee577415a521@weissschuh.net Signed-off-by: Lee Jones --- include/linux/leds.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/leds.h b/include/linux/leds.h index 6300313c46b7..36663ac6c58a 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -107,6 +107,7 @@ struct led_classdev { #define LED_BRIGHT_HW_CHANGED BIT(21) #define LED_RETAIN_AT_SHUTDOWN BIT(22) #define LED_INIT_DEFAULT_TRIGGER BIT(23) +#define LED_REJECT_NAME_CONFLICT BIT(24) /* set_brightness_work / blink_timer flags, atomic, private. */ unsigned long work_flags; -- cgit v1.2.3 From 146a06a0d225cae240065233fd168fb0b95a10ff Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Sat, 8 Jun 2024 11:01:13 +0200 Subject: HID: rename struct hid_bpf_ops into hid_ops Those operations are the ones from HID, not HID-BPF, and I'd like to reuse hid_bpf_ops as the user facing struct_ops API. Link: https://lore.kernel.org/r/20240608-hid_bpf_struct_ops-v3-1-6ac6ade58329@kernel.org Signed-off-by: Benjamin Tissoires --- include/linux/hid_bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index eec2592dec12..a66103618e6e 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -97,7 +97,7 @@ enum hid_bpf_prog_type { struct hid_report_enum; -struct hid_bpf_ops { +struct hid_ops { struct hid_report *(*hid_get_report)(struct hid_report_enum *report_enum, const u8 *data); int (*hid_hw_raw_request)(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, @@ -110,7 +110,7 @@ struct hid_bpf_ops { const struct bus_type *bus_type; }; -extern struct hid_bpf_ops *hid_bpf_ops; +extern struct hid_ops *hid_ops; struct hid_bpf_prog_list { u16 prog_idx[HID_BPF_MAX_PROGS_PER_DEV]; -- cgit v1.2.3 From ebc0d8093e8c97de459615438edefad1a4ac352c Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Sat, 8 Jun 2024 11:01:15 +0200 Subject: HID: bpf: implement HID-BPF through bpf_struct_ops We do this implementation in several steps to not have the CI failing: - first (this patch), we add struct_ops while keeping the existing infra available - then we change the selftests, the examples and the existing in-tree HID-BPF programs - then we remove the existing trace points making old HID-BPF obsolete There are a few advantages of struct_ops over tracing: - compatibility with sleepable programs (for hid_hw_raw_request() in a later patch) - a lot simpler in the kernel: it's a simple rcu protected list - we can add more parameters to the function called without much trouble - the "attach" is now generic through BPF-core: the caller just needs to set hid_id and flags before calling __load(). - all the BPF tough part is not handled in BPF-core through generic processing - hid_bpf_ctx is now only writable where it needs be Acked-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20240608-hid_bpf_struct_ops-v3-3-6ac6ade58329@kernel.org Signed-off-by: Benjamin Tissoires --- include/linux/hid_bpf.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index a66103618e6e..c4f4ce10b7dd 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -65,11 +65,12 @@ struct hid_bpf_ctx { * @HID_BPF_FLAG_INSERT_HEAD: insert the given program before any other program * currently attached to the device. This doesn't * guarantee that this program will always be first - * @HID_BPF_FLAG_MAX: sentinel value, not to be used by the callers */ enum hid_bpf_attach_flags { HID_BPF_FLAG_NONE = 0, HID_BPF_FLAG_INSERT_HEAD = _BITUL(0), + + /* private: internal use only */ HID_BPF_FLAG_MAX, }; @@ -112,6 +113,60 @@ struct hid_ops { extern struct hid_ops *hid_ops; +/** + * struct hid_bpf_ops - A BPF struct_ops of callbacks allowing to attach HID-BPF + * programs to a HID device + * @hid_id: the HID uniq ID to attach to. This is writeable before ``load()``, and + * cannot be changed after + * @flags: flags used while attaching the struct_ops to the device. Currently only + * available value is %0 or ``BPF_F_BEFORE``. + * Writeable only before ``load()`` + */ +struct hid_bpf_ops { + /* hid_id needs to stay first so we can easily change it + * from userspace. + */ + int hid_id; + u32 flags; + + /* private: do not show up in the docs */ + struct list_head list; + + /* public: rest should show up in the docs */ + + /** + * @hid_device_event: called whenever an event is coming in from the device + * + * It has the following arguments: + * + * ``ctx``: The HID-BPF context as &struct hid_bpf_ctx + * + * Return: %0 on success and keep processing; a positive + * value to change the incoming size buffer; a negative + * error code to interrupt the processing of this event + * + * Context: Interrupt context. + */ + int (*hid_device_event)(struct hid_bpf_ctx *ctx, enum hid_report_type report_type); + + /** + * @hid_rdesc_fixup: called when the probe function parses the report descriptor + * of the HID device + * + * It has the following arguments: + * + * ``ctx``: The HID-BPF context as &struct hid_bpf_ctx + * + * Return: %0 on success and keep processing; a positive + * value to change the incoming size buffer; a negative + * error code to interrupt the processing of this device + */ + int (*hid_rdesc_fixup)(struct hid_bpf_ctx *ctx); + + /* private: do not show up in the docs */ + struct hid_device *hdev; +}; + struct hid_bpf_prog_list { u16 prog_idx[HID_BPF_MAX_PROGS_PER_DEV]; u8 prog_cnt; @@ -129,6 +184,10 @@ struct hid_bpf { bool destroyed; /* prevents the assignment of any progs */ spinlock_t progs_lock; /* protects RCU update of progs */ + + struct hid_bpf_ops *rdesc_ops; + struct list_head prog_list; + struct mutex prog_list_lock; /* protects prog_list update */ }; /* specific HID-BPF link when a program is attached to a device */ -- cgit v1.2.3 From 4a86220e046da009bef0948e9f51d1d26d68f93c Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Sat, 8 Jun 2024 11:01:20 +0200 Subject: HID: bpf: remove tracing HID-BPF capability We can now rely on struct_ops as we cleared the users in-tree. Link: https://lore.kernel.org/r/20240608-hid_bpf_struct_ops-v3-8-6ac6ade58329@kernel.org Signed-off-by: Benjamin Tissoires --- include/linux/hid_bpf.h | 54 +------------------------------------------------ 1 file changed, 1 insertion(+), 53 deletions(-) (limited to 'include') diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index c4f4ce10b7dd..447b94aa99ab 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -4,7 +4,7 @@ #define __HID_BPF_H #include -#include +#include #include struct hid_device; @@ -24,11 +24,7 @@ struct hid_device; * * All of these fields are currently read-only. * - * @index: program index in the jump table. No special meaning (a smaller index - * doesn't mean the program will be executed before another program with - * a bigger index). * @hid: the ``struct hid_device`` representing the device itself - * @report_type: used for ``hid_bpf_device_event()`` * @allocated_size: Allocated size of data. * * This is how much memory is available and can be requested @@ -47,54 +43,21 @@ struct hid_device; * @retval: Return value of the previous program. */ struct hid_bpf_ctx { - __u32 index; const struct hid_device *hid; __u32 allocated_size; - enum hid_report_type report_type; union { __s32 retval; __s32 size; }; }; -/** - * enum hid_bpf_attach_flags - flags used when attaching a HIF-BPF program - * - * @HID_BPF_FLAG_NONE: no specific flag is used, the kernel choses where to - * insert the program - * @HID_BPF_FLAG_INSERT_HEAD: insert the given program before any other program - * currently attached to the device. This doesn't - * guarantee that this program will always be first - */ -enum hid_bpf_attach_flags { - HID_BPF_FLAG_NONE = 0, - HID_BPF_FLAG_INSERT_HEAD = _BITUL(0), - - /* private: internal use only */ - HID_BPF_FLAG_MAX, -}; - -/* Following functions are tracepoints that BPF programs can attach to */ -int hid_bpf_device_event(struct hid_bpf_ctx *ctx); -int hid_bpf_rdesc_fixup(struct hid_bpf_ctx *ctx); - /* * Below is HID internal */ -/* internal function to call eBPF programs, not to be used by anybody */ -int __hid_bpf_tail_call(struct hid_bpf_ctx *ctx); - #define HID_BPF_MAX_PROGS_PER_DEV 64 #define HID_BPF_FLAG_MASK (((HID_BPF_FLAG_MAX - 1) << 1) - 1) -/* types of HID programs to attach to */ -enum hid_bpf_prog_type { - HID_BPF_PROG_TYPE_UNDEF = -1, - HID_BPF_PROG_TYPE_DEVICE_EVENT, /* an event is emitted from the device */ - HID_BPF_PROG_TYPE_RDESC_FIXUP, - HID_BPF_PROG_TYPE_MAX, -}; struct hid_report_enum; @@ -167,11 +130,6 @@ struct hid_bpf_ops { struct hid_device *hdev; }; -struct hid_bpf_prog_list { - u16 prog_idx[HID_BPF_MAX_PROGS_PER_DEV]; - u8 prog_cnt; -}; - /* stored in each device */ struct hid_bpf { u8 *device_data; /* allocated when a bpf program of type @@ -179,23 +137,13 @@ struct hid_bpf { * to this HID device */ u32 allocated_data; - - struct hid_bpf_prog_list __rcu *progs[HID_BPF_PROG_TYPE_MAX]; /* attached BPF progs */ bool destroyed; /* prevents the assignment of any progs */ - spinlock_t progs_lock; /* protects RCU update of progs */ - struct hid_bpf_ops *rdesc_ops; struct list_head prog_list; struct mutex prog_list_lock; /* protects prog_list update */ }; -/* specific HID-BPF link when a program is attached to a device */ -struct hid_bpf_link { - struct bpf_link link; - int hid_table_index; -}; - #ifdef CONFIG_HID_BPF u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 *size, int interrupt); -- cgit v1.2.3 From c5958697a5fa29d3ba9332205a88725afe9ed912 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Sat, 8 Jun 2024 11:01:22 +0200 Subject: Documentation: HID: amend HID-BPF for struct_ops Now that we are using struct_ops, the docs need to be changed. Link: https://lore.kernel.org/r/20240608-hid_bpf_struct_ops-v3-10-6ac6ade58329@kernel.org Signed-off-by: Benjamin Tissoires --- include/linux/hid_bpf.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index 447b94aa99ab..1b4cc1b2c31d 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -20,11 +20,9 @@ struct hid_device; * struct hid_bpf_ctx - User accessible data for all HID programs * * ``data`` is not directly accessible from the context. We need to issue - * a call to ``hid_bpf_get_data()`` in order to get a pointer to that field. + * a call to hid_bpf_get_data() in order to get a pointer to that field. * - * All of these fields are currently read-only. - * - * @hid: the ``struct hid_device`` representing the device itself + * @hid: the &struct hid_device representing the device itself * @allocated_size: Allocated size of data. * * This is how much memory is available and can be requested @@ -41,6 +39,8 @@ struct hid_device; * ``size`` must always be less or equal than ``allocated_size`` (it is enforced * once all BPF programs have been run). * @retval: Return value of the previous program. + * + * ``hid`` and ``allocated_size`` are read-only, ``size`` and ``retval`` are read-write. */ struct hid_bpf_ctx { const struct hid_device *hid; -- cgit v1.2.3 From 33c0fb85b571b0f1bbdbf466e770eebeb29e6f41 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Sat, 8 Jun 2024 11:01:28 +0200 Subject: HID: bpf: make part of struct hid_device writable It is useful to change the name, the phys and/or the uniq of a struct hid_device during .rdesc_fixup(). For example, hid-uclogic.ko changes the uniq to store the firmware version to differentiate between 2 devices sharing the same PID. In the same way, changing the device name is useful when the device export 3 nodes, all with the same name. Link: https://lore.kernel.org/r/20240608-hid_bpf_struct_ops-v3-16-6ac6ade58329@kernel.org Signed-off-by: Benjamin Tissoires --- include/linux/hid_bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index 1b4cc1b2c31d..65d7e0acc8c2 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -43,7 +43,7 @@ struct hid_device; * ``hid`` and ``allocated_size`` are read-only, ``size`` and ``retval`` are read-write. */ struct hid_bpf_ctx { - const struct hid_device *hid; + struct hid_device *hid; __u32 allocated_size; union { __s32 retval; -- cgit v1.2.3 From 5e5f2f92cccc29f356422d3cbc104f7f42430f22 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 14 Jun 2024 02:43:39 +0300 Subject: platform: arm64: add Lenovo Yoga C630 WOS EC driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lenovo Yoga C630 WOS is a laptop using Snapdragon 850 SoC. Like many laptops it uses an embedded controller (EC) to perform various platform operations, including, but not limited, to Type-C port control or power supply handlng. Add the driver for the EC, that creates devices for UCSI and power supply devices. Reviewed-by: Bryan O'Donoghue Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240614-yoga-ec-driver-v7-2-9f0b9b40ae76@linaro.org [ij: added #include ] Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/platform_data/lenovo-yoga-c630.h | 44 ++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 include/linux/platform_data/lenovo-yoga-c630.h (limited to 'include') diff --git a/include/linux/platform_data/lenovo-yoga-c630.h b/include/linux/platform_data/lenovo-yoga-c630.h new file mode 100644 index 000000000000..5d1f9fb33cfc --- /dev/null +++ b/include/linux/platform_data/lenovo-yoga-c630.h @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022-2024, Linaro Ltd + * Authors: + * Bjorn Andersson + * Dmitry Baryshkov + */ + +#ifndef _LENOVO_YOGA_C630_DATA_H +#define _LENOVO_YOGA_C630_DATA_H + +struct yoga_c630_ec; +struct notifier_block; + +#define YOGA_C630_MOD_NAME "lenovo_yoga_c630" + +#define YOGA_C630_DEV_UCSI "ucsi" +#define YOGA_C630_DEV_PSY "psy" + +int yoga_c630_ec_read8(struct yoga_c630_ec *ec, u8 addr); +int yoga_c630_ec_read16(struct yoga_c630_ec *ec, u8 addr); + +int yoga_c630_ec_register_notify(struct yoga_c630_ec *ec, struct notifier_block *nb); +void yoga_c630_ec_unregister_notify(struct yoga_c630_ec *ec, struct notifier_block *nb); + +#define YOGA_C630_UCSI_WRITE_SIZE 8 +#define YOGA_C630_UCSI_CCI_SIZE 4 +#define YOGA_C630_UCSI_DATA_SIZE 16 +#define YOGA_C630_UCSI_READ_SIZE (YOGA_C630_UCSI_CCI_SIZE + YOGA_C630_UCSI_DATA_SIZE) + +u16 yoga_c630_ec_ucsi_get_version(struct yoga_c630_ec *ec); +int yoga_c630_ec_ucsi_write(struct yoga_c630_ec *ec, + const u8 req[YOGA_C630_UCSI_WRITE_SIZE]); +int yoga_c630_ec_ucsi_read(struct yoga_c630_ec *ec, + u8 resp[YOGA_C630_UCSI_READ_SIZE]); + +#define LENOVO_EC_EVENT_USB 0x20 +#define LENOVO_EC_EVENT_UCSI 0x21 +#define LENOVO_EC_EVENT_HPD 0x22 +#define LENOVO_EC_EVENT_BAT_STATUS 0x24 +#define LENOVO_EC_EVENT_BAT_INFO 0x25 +#define LENOVO_EC_EVENT_BAT_ADPT_STATUS 0x37 + +#endif -- cgit v1.2.3 From 1652b0bafeaa8281ca9a805d81e13d7647bd2f44 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 31 May 2024 09:48:08 +0200 Subject: block: remove unused queue limits API Remove all APIs that are unused now that sd and sr have been converted to the atomic queue limits API. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: John Garry Reviewed-by: Nitesh Shetty Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240531074837.1648501-14-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 24c36929920b..ad995e7a7698 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -332,8 +332,6 @@ struct queue_limits { typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, void *data); -void disk_set_zoned(struct gendisk *disk); - #define BLK_ALL_ZONES ((unsigned int)-1) int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); @@ -638,18 +636,6 @@ static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) return sector >> ilog2(disk->queue->limits.chunk_sectors); } -static inline void disk_set_max_open_zones(struct gendisk *disk, - unsigned int max_open_zones) -{ - disk->queue->limits.max_open_zones = max_open_zones; -} - -static inline void disk_set_max_active_zones(struct gendisk *disk, - unsigned int max_active_zones) -{ - disk->queue->limits.max_active_zones = max_active_zones; -} - static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { return bdev->bd_disk->queue->limits.max_open_zones; @@ -929,24 +915,14 @@ static inline void queue_limits_cancel_update(struct request_queue *q) /* * Access functions for manipulating queue properties */ -extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); void blk_queue_max_secure_erase_sectors(struct request_queue *q, unsigned int max_sectors); extern void blk_queue_max_discard_sectors(struct request_queue *q, unsigned int max_discard_sectors); extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q, unsigned int max_write_same_sectors); -extern void blk_queue_logical_block_size(struct request_queue *, unsigned int); -extern void blk_queue_max_zone_append_sectors(struct request_queue *q, - unsigned int max_zone_append_sectors); -extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); -void blk_queue_zone_write_granularity(struct request_queue *q, - unsigned int size); -extern void blk_queue_alignment_offset(struct request_queue *q, - unsigned int alignment); void disk_update_readahead(struct gendisk *disk); extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); -extern void blk_queue_io_min(struct request_queue *q, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); extern void blk_set_stacking_limits(struct queue_limits *lim); -- cgit v1.2.3 From 73e3715ed14844067c5c598e72777641004a7f60 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 31 May 2024 09:48:09 +0200 Subject: block: add special APIs for run-time disabling of discard and friends A few drivers optimistically try to support discard, write zeroes and secure erase and disable the features from the I/O completion handler if the hardware can't support them. This disable can't be done using the atomic queue limits API because the I/O completion handlers can't take sleeping locks or freeze the queue. Keep the existing clearing of the relevant field to zero, but replace the old blk_queue_max_* APIs with new disable APIs that force the value to 0. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Reviewed-by: John Garry Reviewed-by: Nitesh Shetty Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240531074837.1648501-15-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ad995e7a7698..ac8e0cb2353a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -912,15 +912,31 @@ static inline void queue_limits_cancel_update(struct request_queue *q) mutex_unlock(&q->limits_lock); } +/* + * These helpers are for drivers that have sloppy feature negotiation and might + * have to disable DISCARD, WRITE_ZEROES or SECURE_DISCARD from the I/O + * completion handler when the device returned an indicator that the respective + * feature is not actually supported. They are racy and the driver needs to + * cope with that. Try to avoid this scheme if you can. + */ +static inline void blk_queue_disable_discard(struct request_queue *q) +{ + q->limits.max_discard_sectors = 0; +} + +static inline void blk_queue_disable_secure_erase(struct request_queue *q) +{ + q->limits.max_secure_erase_sectors = 0; +} + +static inline void blk_queue_disable_write_zeroes(struct request_queue *q) +{ + q->limits.max_write_zeroes_sectors = 0; +} + /* * Access functions for manipulating queue properties */ -void blk_queue_max_secure_erase_sectors(struct request_queue *q, - unsigned int max_sectors); -extern void blk_queue_max_discard_sectors(struct request_queue *q, - unsigned int max_discard_sectors); -extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q, - unsigned int max_write_same_sectors); void disk_update_readahead(struct gendisk *disk); extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); -- cgit v1.2.3 From e9f5f44ad3725335d9c559c3c22cd3726152a7b1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Jun 2024 10:48:15 +0200 Subject: block: remove the blk_integrity_profile structure Block layer integrity configuration is a bit complex right now, as it indirects through operation vectors for a simple two-dimensional configuration: a) the checksum type of none, ip checksum, crc, crc64 b) the presence or absence of a reference tag Remove the integrity profile, and instead add a separate csum_type flag which replaces the existing ip-checksum field and a new flag that indicates the presence of the reference tag. This removes up to two layers of indirect calls, remove the need to offload the no-op verification of non-PI metadata to a workqueue and generally simplifies the code. The downside is that block/t10-pi.c now has to be built into the kernel when CONFIG_BLK_DEV_INTEGRITY is supported. Given that both nvme and SCSI require t10-pi.ko, it is loaded for all usual configurations that enabled CONFIG_BLK_DEV_INTEGRITY already, though. Signed-off-by: Christoph Hellwig Reviewed-by: Kanchan Joshi Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240613084839.1044015-6-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-integrity.h | 35 ++++++++++------------------------- include/linux/blkdev.h | 9 ++++++++- include/linux/t10-pi.h | 8 -------- 3 files changed, 18 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index 7428cb43952d..56ce1ae35580 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -10,7 +10,7 @@ enum blk_integrity_flags { BLK_INTEGRITY_VERIFY = 1 << 0, BLK_INTEGRITY_GENERATE = 1 << 1, BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2, - BLK_INTEGRITY_IP_CHECKSUM = 1 << 3, + BLK_INTEGRITY_REF_TAG = 1 << 3, }; struct blk_integrity_iter { @@ -19,22 +19,10 @@ struct blk_integrity_iter { sector_t seed; unsigned int data_size; unsigned short interval; - unsigned char tuple_size; - unsigned char pi_offset; const char *disk_name; }; -typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *); -typedef void (integrity_prepare_fn) (struct request *); -typedef void (integrity_complete_fn) (struct request *, unsigned int); - -struct blk_integrity_profile { - integrity_processing_fn *generate_fn; - integrity_processing_fn *verify_fn; - integrity_prepare_fn *prepare_fn; - integrity_complete_fn *complete_fn; - const char *name; -}; +const char *blk_integrity_profile_name(struct blk_integrity *bi); #ifdef CONFIG_BLK_DEV_INTEGRITY void blk_integrity_register(struct gendisk *, struct blk_integrity *); @@ -44,14 +32,17 @@ int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, struct scatterlist *); int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); -static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) +static inline bool +blk_integrity_queue_supports_integrity(struct request_queue *q) { - struct blk_integrity *bi = &disk->queue->integrity; + return q->integrity.tuple_size; +} - if (!bi->profile) +static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) +{ + if (!blk_integrity_queue_supports_integrity(disk->queue)) return NULL; - - return bi; + return &disk->queue->integrity; } static inline struct blk_integrity * @@ -60,12 +51,6 @@ bdev_get_integrity(struct block_device *bdev) return blk_get_integrity(bdev->bd_disk); } -static inline bool -blk_integrity_queue_supports_integrity(struct request_queue *q) -{ - return q->integrity.profile; -} - static inline unsigned short queue_max_integrity_segments(const struct request_queue *q) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ac8e0cb2353a..bdd33388e1ce 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -105,9 +105,16 @@ enum { struct disk_events; struct badblocks; +enum blk_integrity_checksum { + BLK_INTEGRITY_CSUM_NONE = 0, + BLK_INTEGRITY_CSUM_IP = 1, + BLK_INTEGRITY_CSUM_CRC = 2, + BLK_INTEGRITY_CSUM_CRC64 = 3, +} __packed ; + struct blk_integrity { - const struct blk_integrity_profile *profile; unsigned char flags; + enum blk_integrity_checksum csum_type; unsigned char tuple_size; unsigned char pi_offset; unsigned char interval_exp; diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h index 248f4ac95642..d2bafb76badf 100644 --- a/include/linux/t10-pi.h +++ b/include/linux/t10-pi.h @@ -48,11 +48,6 @@ static inline u32 t10_pi_ref_tag(struct request *rq) return blk_rq_pos(rq) >> (shift - SECTOR_SHIFT) & 0xffffffff; } -extern const struct blk_integrity_profile t10_pi_type1_crc; -extern const struct blk_integrity_profile t10_pi_type1_ip; -extern const struct blk_integrity_profile t10_pi_type3_crc; -extern const struct blk_integrity_profile t10_pi_type3_ip; - struct crc64_pi_tuple { __be64 guard_tag; __be16 app_tag; @@ -79,7 +74,4 @@ static inline u64 ext_pi_ref_tag(struct request *rq) return lower_48_bits(blk_rq_pos(rq) >> (shift - SECTOR_SHIFT)); } -extern const struct blk_integrity_profile ext_pi_type1_crc64; -extern const struct blk_integrity_profile ext_pi_type3_crc64; - #endif -- cgit v1.2.3 From 3c3e85ddffae93eba1a257eb6939bf5dc1e93b9e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Jun 2024 10:48:20 +0200 Subject: block: bypass the STABLE_WRITES flag for protection information Currently registering a checksum-enabled (aka PI) integrity profile sets the QUEUE_FLAG_STABLE_WRITE flag, and unregistering it clears the flag. This can incorrectly clear the flag when the driver requires stable writes even without PI, e.g. in case of iSCSI or NVMe/TCP with data digest enabled. Fix this by looking at the csum_type directly in bdev_stable_writes and not setting the queue flag. Also remove the blk_queue_stable_writes helper as the only user in nvme wants to only look at the actual QUEUE_FLAG_STABLE_WRITE flag as it inherits the integrity configuration by other means. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240613084839.1044015-11-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bdd33388e1ce..f9089750919c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -571,8 +571,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_noxmerges(q) \ test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) -#define blk_queue_stable_writes(q) \ - test_bit(QUEUE_FLAG_STABLE_WRITES, &(q)->queue_flags) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) #define blk_queue_zone_resetall(q) \ @@ -1300,8 +1298,14 @@ static inline bool bdev_synchronous(struct block_device *bdev) static inline bool bdev_stable_writes(struct block_device *bdev) { - return test_bit(QUEUE_FLAG_STABLE_WRITES, - &bdev_get_queue(bdev)->queue_flags); + struct request_queue *q = bdev_get_queue(bdev); + +#ifdef CONFIG_BLK_DEV_INTEGRITY + /* BLK_INTEGRITY_CSUM_NONE is not available in blkdev.h */ + if (q->integrity.csum_type != 0) + return true; +#endif + return test_bit(QUEUE_FLAG_STABLE_WRITES, &q->queue_flags); } static inline bool bdev_write_cache(struct block_device *bdev) -- cgit v1.2.3 From 9f4aa46f2a7401025d8561495cf8740f773310fc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Jun 2024 10:48:21 +0200 Subject: block: invert the BLK_INTEGRITY_{GENERATE,VERIFY} flags Invert the flags so that user set values will be able to persist revalidating the integrity information once we switch the integrity information to queue_limits. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240613084839.1044015-12-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-integrity.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index 56ce1ae35580..bafa01d4e7f9 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -7,8 +7,8 @@ struct request; enum blk_integrity_flags { - BLK_INTEGRITY_VERIFY = 1 << 0, - BLK_INTEGRITY_GENERATE = 1 << 1, + BLK_INTEGRITY_NOVERIFY = 1 << 0, + BLK_INTEGRITY_NOGENERATE = 1 << 1, BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2, BLK_INTEGRITY_REF_TAG = 1 << 3, }; -- cgit v1.2.3 From c6e56cf6b2e79a463af21286ba951714ed20828c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Jun 2024 10:48:22 +0200 Subject: block: move integrity information into queue_limits Move the integrity information into the queue limits so that it can be set atomically with other queue limits, and that the sysfs changes to the read_verify and write_generate flags are properly synchronized. This also allows to provide a more useful helper to stack the integrity fields, although it still is separate from the main stacking function as not all stackable devices want to inherit the integrity settings. Even with that it greatly simplifies the code in md and dm. Note that the integrity field is moved as-is into the queue limits. While there are good arguments for removing the separate blk_integrity structure, this would cause a lot of churn and might better be done at a later time if desired. However the integrity field in the queue_limits structure is now unconditional so that various ifdefs can be avoided or replaced with IS_ENABLED(). Given that tiny size of it that seems like a worthwhile trade off. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240613084839.1044015-13-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-integrity.h | 27 +++++++++++---------------- include/linux/blkdev.h | 12 ++++-------- include/linux/t10-pi.h | 12 ++---------- 3 files changed, 17 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index bafa01d4e7f9..d201140d77a3 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -11,6 +11,7 @@ enum blk_integrity_flags { BLK_INTEGRITY_NOGENERATE = 1 << 1, BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2, BLK_INTEGRITY_REF_TAG = 1 << 3, + BLK_INTEGRITY_STACKED = 1 << 4, }; struct blk_integrity_iter { @@ -23,11 +24,15 @@ struct blk_integrity_iter { }; const char *blk_integrity_profile_name(struct blk_integrity *bi); +bool queue_limits_stack_integrity(struct queue_limits *t, + struct queue_limits *b); +static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t, + struct block_device *bdev) +{ + return queue_limits_stack_integrity(t, &bdev->bd_disk->queue->limits); +} #ifdef CONFIG_BLK_DEV_INTEGRITY -void blk_integrity_register(struct gendisk *, struct blk_integrity *); -void blk_integrity_unregister(struct gendisk *); -int blk_integrity_compare(struct gendisk *, struct gendisk *); int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, struct scatterlist *); int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); @@ -35,14 +40,14 @@ int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); static inline bool blk_integrity_queue_supports_integrity(struct request_queue *q) { - return q->integrity.tuple_size; + return q->limits.integrity.tuple_size; } static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) { if (!blk_integrity_queue_supports_integrity(disk->queue)) return NULL; - return &disk->queue->integrity; + return &disk->queue->limits.integrity; } static inline struct blk_integrity * @@ -119,17 +124,6 @@ blk_integrity_queue_supports_integrity(struct request_queue *q) { return false; } -static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b) -{ - return 0; -} -static inline void blk_integrity_register(struct gendisk *d, - struct blk_integrity *b) -{ -} -static inline void blk_integrity_unregister(struct gendisk *d) -{ -} static inline unsigned short queue_max_integrity_segments(const struct request_queue *q) { @@ -157,4 +151,5 @@ static inline struct bio_vec *rq_integrity_vec(struct request *rq) return NULL; } #endif /* CONFIG_BLK_DEV_INTEGRITY */ + #endif /* _LINUX_BLK_INTEGRITY_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f9089750919c..0c247a716885 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -334,6 +334,8 @@ struct queue_limits { * due to possible offsets. */ unsigned int dma_alignment; + + struct blk_integrity integrity; }; typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, @@ -419,10 +421,6 @@ struct request_queue { struct queue_limits limits; -#ifdef CONFIG_BLK_DEV_INTEGRITY - struct blk_integrity integrity; -#endif /* CONFIG_BLK_DEV_INTEGRITY */ - #ifdef CONFIG_PM struct device *dev; enum rpm_status rpm_status; @@ -1300,11 +1298,9 @@ static inline bool bdev_stable_writes(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); -#ifdef CONFIG_BLK_DEV_INTEGRITY - /* BLK_INTEGRITY_CSUM_NONE is not available in blkdev.h */ - if (q->integrity.csum_type != 0) + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && + q->limits.integrity.csum_type != BLK_INTEGRITY_CSUM_NONE) return true; -#endif return test_bit(QUEUE_FLAG_STABLE_WRITES, &q->queue_flags); } diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h index d2bafb76badf..1773610010eb 100644 --- a/include/linux/t10-pi.h +++ b/include/linux/t10-pi.h @@ -39,12 +39,8 @@ struct t10_pi_tuple { static inline u32 t10_pi_ref_tag(struct request *rq) { - unsigned int shift = ilog2(queue_logical_block_size(rq->q)); + unsigned int shift = rq->q->limits.integrity.interval_exp; -#ifdef CONFIG_BLK_DEV_INTEGRITY - if (rq->q->integrity.interval_exp) - shift = rq->q->integrity.interval_exp; -#endif return blk_rq_pos(rq) >> (shift - SECTOR_SHIFT) & 0xffffffff; } @@ -65,12 +61,8 @@ static inline u64 lower_48_bits(u64 n) static inline u64 ext_pi_ref_tag(struct request *rq) { - unsigned int shift = ilog2(queue_logical_block_size(rq->q)); + unsigned int shift = rq->q->limits.integrity.interval_exp; -#ifdef CONFIG_BLK_DEV_INTEGRITY - if (rq->q->integrity.interval_exp) - shift = rq->q->integrity.interval_exp; -#endif return lower_48_bits(blk_rq_pos(rq) >> (shift - SECTOR_SHIFT)); } -- cgit v1.2.3 From 0a5d3258d7c97295a89d22e54733b54aacb62562 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 3 Jun 2024 22:23:15 -0700 Subject: compiler_types.h: Define __retain for __attribute__((__retain__)) Some code includes the __used macro to prevent functions and data from being optimized out. This macro implements __attribute__((__used__)), which operates at the compiler and IR-level, and so still allows a linker to remove objects intended to be kept. Compilers supporting __attribute__((__retain__)) can address this gap by setting the flag SHF_GNU_RETAIN on the section of a function/variable, indicating to the linker the object should be retained. This attribute is available since gcc 11, clang 13, and binutils 2.36. Provide a __retain macro implementing __attribute__((__retain__)), whose first user will be the '__bpf_kfunc' tag. [ Additional remark from discussion: Why is CONFIG_LTO_CLANG added here? The __used macro permits garbage collection at section level, so CLANG_LTO_CLANG without CONFIG_LD_DEAD_CODE_DATA_ELIMINATION should not change final section dynamics? The conditional guard was included to ensure consistent behaviour between __retain and other features forcing split sections. In particular, the same guard is used in vmlinux.lds.h to merge split sections where needed. For example, using __retain in LLVM builds without CONFIG_LTO was failing CI tests on kernel-patches/bpf because the kernel didn't boot properly. And in further testing, the kernel had no issues loading BPF kfunc modules with such split sections, so the module (partial) linking scripts were left alone. ] Signed-off-by: Tony Ambardar Signed-off-by: Daniel Borkmann Cc: Yonghong Song Link: https://lore.kernel.org/bpf/ZlmGoT9KiYLZd91S@krava/T/ Link: https://lore.kernel.org/bpf/b31bca5a5e6765a0f32cc8c19b1d9cdbfaa822b5.1717477560.git.Tony.Ambardar@gmail.com --- include/linux/compiler_types.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 93600de3800b..f14c275950b5 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -143,6 +143,29 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } # define __preserve_most #endif +/* + * Annotating a function/variable with __retain tells the compiler to place + * the object in its own section and set the flag SHF_GNU_RETAIN. This flag + * instructs the linker to retain the object during garbage-cleanup or LTO + * phases. + * + * Note that the __used macro is also used to prevent functions or data + * being optimized out, but operates at the compiler/IR-level and may still + * allow unintended removal of objects during linking. + * + * Optional: only supported since gcc >= 11, clang >= 13 + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-retain-function-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#retain + */ +#if __has_attribute(__retain__) && \ + (defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || \ + defined(CONFIG_LTO_CLANG)) +# define __retain __attribute__((__retain__)) +#else +# define __retain +#endif + /* Compiler specific macros. */ #ifdef __clang__ #include -- cgit v1.2.3 From 7bdcedd5c8fb88e7176b93812b139eca5fe0aa46 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 3 Jun 2024 22:23:16 -0700 Subject: bpf: Harden __bpf_kfunc tag against linker kfunc removal BPF kfuncs are often not directly referenced and may be inadvertently removed by optimization steps during kernel builds, thus the __bpf_kfunc tag mitigates against this removal by including the __used macro. However, this macro alone does not prevent removal during linking, and may still yield build warnings (e.g. on mips64el): [...] LD vmlinux BTFIDS vmlinux WARN: resolve_btfids: unresolved symbol bpf_verify_pkcs7_signature WARN: resolve_btfids: unresolved symbol bpf_lookup_user_key WARN: resolve_btfids: unresolved symbol bpf_lookup_system_key WARN: resolve_btfids: unresolved symbol bpf_key_put WARN: resolve_btfids: unresolved symbol bpf_iter_task_next WARN: resolve_btfids: unresolved symbol bpf_iter_css_task_new WARN: resolve_btfids: unresolved symbol bpf_get_file_xattr WARN: resolve_btfids: unresolved symbol bpf_ct_insert_entry WARN: resolve_btfids: unresolved symbol bpf_cgroup_release WARN: resolve_btfids: unresolved symbol bpf_cgroup_from_id WARN: resolve_btfids: unresolved symbol bpf_cgroup_acquire WARN: resolve_btfids: unresolved symbol bpf_arena_free_pages NM System.map SORTTAB vmlinux OBJCOPY vmlinux.32 [...] Update the __bpf_kfunc tag to better guard against linker optimization by including the new __retain compiler macro, which fixes the warnings above. Verify the __retain macro with readelf by checking object flags for 'R': $ readelf -Wa kernel/trace/bpf_trace.o Section Headers: [Nr] Name Type Address Off Size ES Flg Lk Inf Al [...] [178] .text.bpf_key_put PROGBITS 00000000 6420 0050 00 AXR 0 0 8 [...] Key to Flags: [...] R (retain), D (mbind), p (processor specific) Fixes: 57e7c169cd6a ("bpf: Add __bpf_kfunc tag for marking kernel functions as kfuncs") Reported-by: kernel test robot Signed-off-by: Tony Ambardar Signed-off-by: Daniel Borkmann Tested-by: Jiri Olsa Reviewed-by: Jiri Olsa Cc: Yonghong Song Closes: https://lore.kernel.org/r/202401211357.OCX9yllM-lkp@intel.com/ Link: https://lore.kernel.org/bpf/ZlmGoT9KiYLZd91S@krava/T/ Link: https://lore.kernel.org/bpf/e9c64e9b5c073dabd457ff45128aabcab7630098.1717477560.git.Tony.Ambardar@gmail.com --- include/linux/btf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index f9e56fd12a9f..7c3e40c3295e 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -82,7 +82,7 @@ * as to avoid issues such as the compiler inlining or eliding either a static * kfunc, or a global kfunc in an LTO build. */ -#define __bpf_kfunc __used noinline +#define __bpf_kfunc __used __retain noinline #define __bpf_kfunc_start_defs() \ __diag_push(); \ -- cgit v1.2.3 From 98d7ca374ba4b39e7535613d40e159f09ca14da2 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 12 Jun 2024 18:38:13 -0700 Subject: bpf: Track delta between "linked" registers. Compilers can generate the code r1 = r2 r1 += 0x1 if r2 < 1000 goto ... use knowledge of r2 range in subsequent r1 operations So remember constant delta between r2 and r1 and update r1 after 'if' condition. Unfortunately LLVM still uses this pattern for loops with 'can_loop' construct: for (i = 0; i < 1000 && can_loop; i++) The "undo" pass was introduced in LLVM https://reviews.llvm.org/D121937 to prevent this optimization, but it cannot cover all cases. Instead of fighting middle end optimizer in BPF backend teach the verifier about this pattern. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240613013815.953-3-alexei.starovoitov@gmail.com --- include/linux/bpf_verifier.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 50aa87f8d77f..2b54e25d2364 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -73,7 +73,10 @@ enum bpf_iter_state { struct bpf_reg_state { /* Ordering of fields matters. See states_equal() */ enum bpf_reg_type type; - /* Fixed part of pointer offset, pointer types only */ + /* + * Fixed part of pointer offset, pointer types only. + * Or constant delta between "linked" scalars with the same ID. + */ s32 off; union { /* valid when type == PTR_TO_PACKET */ @@ -167,6 +170,13 @@ struct bpf_reg_state { * Similarly to dynptrs, we use ID to track "belonging" of a reference * to a specific instance of bpf_iter. */ + /* + * Upper bit of ID is used to remember relationship between "linked" + * registers. Example: + * r1 = r2; both will have r1->id == r2->id == N + * r1 += 10; r1->id == N | BPF_ADD_CONST and r1->off == 10 + */ +#define BPF_ADD_CONST (1U << 31) u32 id; /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned * from a pointer-cast helper, bpf_sk_fullsock() and -- cgit v1.2.3 From 894376385a2d80a96816449e4991587a9a5ef0dd Mon Sep 17 00:00:00 2001 From: Sebastian Ene Date: Thu, 13 Jun 2024 13:20:33 +0000 Subject: KVM: arm64: Add support for FFA_PARTITION_INFO_GET Handle the FFA_PARTITION_INFO_GET host call inside the pKVM hypervisor and copy the response message back to the host buffers. Signed-off-by: Sebastian Ene Reviewed-by: Sudeep Holla Tested-by: Sudeep Holla Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20240613132035.1070360-3-sebastianene@google.com Signed-off-by: Oliver Upton --- include/linux/arm_ffa.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index c82d56768101..c6d18f50f671 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -212,6 +212,9 @@ bool ffa_device_is_valid(struct ffa_device *ffa_dev) { return false; } extern const struct bus_type ffa_bus_type; +/* The FF-A 1.0 partition structure lacks the uuid[4] */ +#define FFA_1_0_PARTITON_INFO_SZ (8) + /* FFA transport related */ struct ffa_partition_info { u16 id; -- cgit v1.2.3 From 51104c19d8570eb23208e08eac0e9ae09ced1c15 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 12 Jun 2024 12:59:18 -0700 Subject: kunit: test: Add vm_mmap() allocation resource manager For tests that need to allocate using vm_mmap() (e.g. usercopy and execve), provide the interface to have the allocation tracked by KUnit itself. This requires bringing up a placeholder userspace mm. This combines my earlier attempt at this with Mark Rutland's version[1]. Normally alloc_mm() and arch_pick_mmap_layout() aren't exported for modules, so export these only for KUnit testing. Link: https://lore.kernel.org/lkml/20230321122514.1743889-2-mark.rutland@arm.com/ [1] Co-developed-by: Mark Rutland Signed-off-by: Mark Rutland Reviewed-by: David Gow Signed-off-by: Kees Cook Signed-off-by: Shuah Khan --- include/kunit/test.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/kunit/test.h b/include/kunit/test.h index e32b4cb7afa2..ec61cad6b71d 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -480,6 +480,23 @@ static inline void *kunit_kcalloc(struct kunit *test, size_t n, size_t size, gfp return kunit_kmalloc_array(test, n, size, gfp | __GFP_ZERO); } +/** + * kunit_vm_mmap() - Allocate KUnit-tracked vm_mmap() area + * @test: The test context object. + * @file: struct file pointer to map from, if any + * @addr: desired address, if any + * @len: how many bytes to allocate + * @prot: mmap PROT_* bits + * @flag: mmap flags + * @offset: offset into @file to start mapping from. + * + * See vm_mmap() for more information. + */ +unsigned long kunit_vm_mmap(struct kunit *test, struct file *file, + unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flag, + unsigned long offset); + void kunit_cleanup(struct kunit *test); void __printf(2, 3) kunit_log_append(struct string_stream *log, const char *fmt, ...); -- cgit v1.2.3 From e575d3a6dd22123888defb622b1742aa2d45b942 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Fri, 14 Jun 2024 00:00:31 +0300 Subject: net/mlx5: Correct TASR typo into TSAR TSAR is the correct spelling (Transmit Scheduling ARbiter). Signed-off-by: Cosmin Ratiu Reviewed-by: Gal Pressman Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240613210036.1125203-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/mlx5/mlx5_ifc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 17acd0f3ca8e..466dcda40bb5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -3914,7 +3914,7 @@ enum { }; enum { - ELEMENT_TYPE_CAP_MASK_TASR = 1 << 0, + ELEMENT_TYPE_CAP_MASK_TSAR = 1 << 0, ELEMENT_TYPE_CAP_MASK_VPORT = 1 << 1, ELEMENT_TYPE_CAP_MASK_VPORT_TC = 1 << 2, ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC = 1 << 3, -- cgit v1.2.3 From 296eaab825060d0f25e89b2f85ab9fc26128cea8 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Fri, 14 Jun 2024 00:00:36 +0300 Subject: net/mlx5e: Support SWP-mode offload L4 csum calculation Calculate the pseudo-header checksum for both IPSec transport mode and IPSec tunnel mode for mlx5 devices that do not implement a pure hardware checksum offload for L4 checksum calculation. Introduce a capability bit that identifies such mlx5 devices. Signed-off-by: Rahul Rameshbabu Reviewed-by: Gal Pressman Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240613210036.1125203-7-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 466dcda40bb5..66b921c81c0f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1093,7 +1093,8 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 tunnel_stateless_ip_over_ip_tx[0x1]; u8 reserved_at_2e[0x2]; u8 max_vxlan_udp_ports[0x8]; - u8 reserved_at_38[0x6]; + u8 swp_csum_l4_partial[0x1]; + u8 reserved_at_39[0x5]; u8 max_geneve_opt_len[0x1]; u8 tunnel_stateless_geneve_rx[0x1]; -- cgit v1.2.3 From 6c3282a6b296385bee2c383442c39f507b0d51dd Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 13 Jun 2024 11:36:06 +0100 Subject: net: stmmac: add select_pcs() platform method Allow platform drivers to provide their logic to select an appropriate PCS. Tested-by: Romain Gantois Reviewed-by: Romain Gantois Signed-off-by: Russell King (Oracle) Link: https://lore.kernel.org/r/E1sHhoM-00Fesu-8E@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/linux/stmmac.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 8f0f156d50d3..9c54f82901a1 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -13,7 +13,7 @@ #define __STMMAC_PLATFORM_DATA #include -#include +#include #define MTL_MAX_RX_QUEUES 8 #define MTL_MAX_TX_QUEUES 8 @@ -271,6 +271,8 @@ struct plat_stmmacenet_data { void (*dump_debug_regs)(void *priv); int (*pcs_init)(struct stmmac_priv *priv); void (*pcs_exit)(struct stmmac_priv *priv); + struct phylink_pcs *(*select_pcs)(struct stmmac_priv *priv, + phy_interface_t interface); void *bsp_priv; struct clk *stmmac_clk; struct clk *pclk; -- cgit v1.2.3 From e7da16abf030b39c3598574d5457f324a6f0e4a7 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:33 +0900 Subject: firewire: core: record card index in tracepoinrts events derived from async_outbound_complete_template The asynchronous transaction is initiated on one of 1394 OHCI controller, however the existing tracepoints events has the lack of data about it. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-2-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index d695a560673f..ca6ea9bd1eba 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -71,10 +71,11 @@ DECLARE_EVENT_CLASS(async_outbound_initiate_template, // The value of status is one of ack codes and rcodes specific to Linux FireWire subsystem. DECLARE_EVENT_CLASS(async_outbound_complete_template, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp), - TP_ARGS(transaction, generation, scode, status, timestamp), + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp), + TP_ARGS(transaction, card_index, generation, scode, status, timestamp), TP_STRUCT__entry( __field(u64, transaction) + __field(u8, card_index) __field(u8, generation) __field(u8, scode) __field(u8, status) @@ -82,14 +83,16 @@ DECLARE_EVENT_CLASS(async_outbound_complete_template, ), TP_fast_assign( __entry->transaction = transaction; + __entry->card_index = card_index; __entry->generation = generation; __entry->scode = scode; __entry->status = status; __entry->timestamp = timestamp; ), TP_printk( - "transaction=0x%llx generation=%u scode=%u status=%u timestamp=0x%04x", + "transaction=0x%llx card_index=%u generation=%u scode=%u status=%u timestamp=0x%04x", __entry->transaction, + __entry->card_index, __entry->generation, __entry->scode, __entry->status, @@ -144,8 +147,8 @@ DEFINE_EVENT(async_outbound_initiate_template, async_request_outbound_initiate, ); DEFINE_EVENT(async_outbound_complete_template, async_request_outbound_complete, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp), - TP_ARGS(transaction, generation, scode, status, timestamp) + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp), + TP_ARGS(transaction, card_index, generation, scode, status, timestamp) ); DEFINE_EVENT(async_inbound_template, async_response_inbound, @@ -194,8 +197,8 @@ DEFINE_EVENT_PRINT(async_outbound_initiate_template, async_response_outbound_ini ); DEFINE_EVENT(async_outbound_complete_template, async_response_outbound_complete, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp), - TP_ARGS(transaction, generation, scode, status, timestamp) + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp), + TP_ARGS(transaction, card_index, generation, scode, status, timestamp) ); #undef ASYNC_HEADER_GET_DESTINATION -- cgit v1.2.3 From 64e02b64fb1d832a9a5254055a829db64750421a Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:34 +0900 Subject: firewire: core: record card index in tracepoinrts events derived from async_outbound_initiate_template The asynchronous transaction is initiated on one of 1394 OHCI controller, however the existing tracepoints events has the lack of data about it. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-3-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index ca6ea9bd1eba..a3d9916cbad1 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -36,10 +36,11 @@ #define QUADLET_SIZE 4 DECLARE_EVENT_CLASS(async_outbound_initiate_template, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count), - TP_ARGS(transaction, generation, scode, header, data, data_count), + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count), + TP_ARGS(transaction, card_index, generation, scode, header, data, data_count), TP_STRUCT__entry( __field(u64, transaction) + __field(u8, card_index) __field(u8, generation) __field(u8, scode) __array(u32, header, ASYNC_HEADER_QUADLET_COUNT) @@ -47,6 +48,7 @@ DECLARE_EVENT_CLASS(async_outbound_initiate_template, ), TP_fast_assign( __entry->transaction = transaction; + __entry->card_index = card_index; __entry->generation = generation; __entry->scode = scode; memcpy(__entry->header, header, QUADLET_SIZE * ASYNC_HEADER_QUADLET_COUNT); @@ -54,8 +56,9 @@ DECLARE_EVENT_CLASS(async_outbound_initiate_template, ), // This format is for the request subaction. TP_printk( - "transaction=0x%llx generation=%u scode=%u dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x offset=0x%012llx header=%s data=%s", + "transaction=0x%llx card_index=%u generation=%u scode=%u dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x offset=0x%012llx header=%s data=%s", __entry->transaction, + __entry->card_index, __entry->generation, __entry->scode, ASYNC_HEADER_GET_DESTINATION(__entry->header), @@ -142,8 +145,8 @@ DECLARE_EVENT_CLASS(async_inbound_template, ); DEFINE_EVENT(async_outbound_initiate_template, async_request_outbound_initiate, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count), - TP_ARGS(transaction, generation, scode, header, data, data_count) + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count), + TP_ARGS(transaction, card_index, generation, scode, header, data, data_count) ); DEFINE_EVENT(async_outbound_complete_template, async_request_outbound_complete, @@ -178,11 +181,12 @@ DEFINE_EVENT_PRINT(async_inbound_template, async_request_inbound, ); DEFINE_EVENT_PRINT(async_outbound_initiate_template, async_response_outbound_initiate, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count), - TP_ARGS(transaction, generation, scode, header, data, data_count), + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count), + TP_ARGS(transaction, card_index, generation, scode, header, data, data_count), TP_printk( - "transaction=0x%llx generation=%u scode=%u dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x rcode=%u header=%s data=%s", + "transaction=0x%llx card_index=%u generation=%u scode=%u dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x rcode=%u header=%s data=%s", __entry->transaction, + __entry->card_index, __entry->generation, __entry->scode, ASYNC_HEADER_GET_DESTINATION(__entry->header), -- cgit v1.2.3 From 65ec7ebefe7de01281cce1f552ebd4dd00386665 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:35 +0900 Subject: firewire: core: record card index in tracepoinrts events derived from async_inbound_template The asynchronous transaction is initiated on one of 1394 OHCI controller, however the existing tracepoints events has the lack of data about it. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-4-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index a3d9916cbad1..b72f613cfa02 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -105,10 +105,11 @@ DECLARE_EVENT_CLASS(async_outbound_complete_template, // The value of status is one of ack codes and rcodes specific to Linux FireWire subsystem. DECLARE_EVENT_CLASS(async_inbound_template, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count), - TP_ARGS(transaction, generation, scode, status, timestamp, header, data, data_count), + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count), + TP_ARGS(transaction, card_index, generation, scode, status, timestamp, header, data, data_count), TP_STRUCT__entry( __field(u64, transaction) + __field(u8, card_index) __field(u8, generation) __field(u8, scode) __field(u8, status) @@ -118,6 +119,7 @@ DECLARE_EVENT_CLASS(async_inbound_template, ), TP_fast_assign( __entry->transaction = transaction; + __entry->card_index = card_index; __entry->generation = generation; __entry->scode = scode; __entry->status = status; @@ -127,8 +129,9 @@ DECLARE_EVENT_CLASS(async_inbound_template, ), // This format is for the response subaction. TP_printk( - "transaction=0x%llx generation=%u scode=%u status=%u timestamp=0x%04x dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x rcode=%u header=%s data=%s", + "transaction=0x%llx card_index=%u generation=%u scode=%u status=%u timestamp=0x%04x dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x rcode=%u header=%s data=%s", __entry->transaction, + __entry->card_index, __entry->generation, __entry->scode, __entry->status, @@ -155,16 +158,17 @@ DEFINE_EVENT(async_outbound_complete_template, async_request_outbound_complete, ); DEFINE_EVENT(async_inbound_template, async_response_inbound, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count), - TP_ARGS(transaction, generation, scode, status, timestamp, header, data, data_count) + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count), + TP_ARGS(transaction, card_index, generation, scode, status, timestamp, header, data, data_count) ); DEFINE_EVENT_PRINT(async_inbound_template, async_request_inbound, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count), - TP_ARGS(transaction, generation, scode, status, timestamp, header, data, data_count), + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count), + TP_ARGS(transaction, card_index, generation, scode, status, timestamp, header, data, data_count), TP_printk( - "transaction=0x%llx generation=%u scode=%u status=%u timestamp=0x%04x dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x offset=0x%012llx header=%s data=%s", + "transaction=0x%llx card_index=%u generation=%u scode=%u status=%u timestamp=0x%04x dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x offset=0x%012llx header=%s data=%s", __entry->transaction, + __entry->card_index, __entry->generation, __entry->scode, __entry->status, -- cgit v1.2.3 From 3cb44a72a39835b368ab78d739819330089aa2bf Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:36 +0900 Subject: firewire: core: record card index in async_phy_outbound_initiate tracepoints event The asynchronous transaction is initiated on one of 1394 OHCI controller, however the existing tracepoints events has the lack of data about it. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-5-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index b72f613cfa02..a59dc26b2a53 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -217,23 +217,26 @@ DEFINE_EVENT(async_outbound_complete_template, async_response_outbound_complete, #undef ASYNC_HEADER_GET_RCODE TRACE_EVENT(async_phy_outbound_initiate, - TP_PROTO(u64 packet, unsigned int generation, u32 first_quadlet, u32 second_quadlet), - TP_ARGS(packet, generation, first_quadlet, second_quadlet), + TP_PROTO(u64 packet, unsigned int card_index, unsigned int generation, u32 first_quadlet, u32 second_quadlet), + TP_ARGS(packet, card_index, generation, first_quadlet, second_quadlet), TP_STRUCT__entry( __field(u64, packet) + __field(u8, card_index) __field(u8, generation) __field(u32, first_quadlet) __field(u32, second_quadlet) ), TP_fast_assign( __entry->packet = packet; + __entry->card_index = card_index; __entry->generation = generation; __entry->first_quadlet = first_quadlet; __entry->second_quadlet = second_quadlet ), TP_printk( - "packet=0x%llx generation=%u first_quadlet=0x%08x second_quadlet=0x%08x", + "packet=0x%llx card_index=%u generation=%u first_quadlet=0x%08x second_quadlet=0x%08x", __entry->packet, + __entry->card_index, __entry->generation, __entry->first_quadlet, __entry->second_quadlet -- cgit v1.2.3 From 810f2aa83563020d834425018303f2aaecef1e6e Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:37 +0900 Subject: firewire: core: record card index in async_phy_outbound_complete tracepoints event The asynchronous transmission of phy packet is initiated on one of 1394 OHCI controller, however the existing tracepoints events has the lack of data about it. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-6-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index a59dc26b2a53..61c7a2461fbc 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -244,23 +244,26 @@ TRACE_EVENT(async_phy_outbound_initiate, ); TRACE_EVENT(async_phy_outbound_complete, - TP_PROTO(u64 packet, unsigned int generation, unsigned int status, unsigned int timestamp), - TP_ARGS(packet, generation, status, timestamp), + TP_PROTO(u64 packet, unsigned int card_index, unsigned int generation, unsigned int status, unsigned int timestamp), + TP_ARGS(packet, card_index, generation, status, timestamp), TP_STRUCT__entry( __field(u64, packet) + __field(u8, card_index) __field(u8, generation) __field(u8, status) __field(u16, timestamp) ), TP_fast_assign( __entry->packet = packet; + __entry->card_index = card_index; __entry->generation = generation; __entry->status = status; __entry->timestamp = timestamp; ), TP_printk( - "packet=0x%llx generation=%u status=%u timestamp=0x%04x", + "packet=0x%llx card_index=%u generation=%u status=%u timestamp=0x%04x", __entry->packet, + __entry->card_index, __entry->generation, __entry->status, __entry->timestamp -- cgit v1.2.3 From abbb4bd96d7f871b10bd7058f7284ffaf4e8257f Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:38 +0900 Subject: firewire: core: record card index in async_phy_inbound tracepoints event The asynchronous transmission of phy packet is initiated on one of 1394 OHCI controller, however the existing tracepoints events has the lack of data about it. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-7-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index 61c7a2461fbc..e5524fc71880 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -271,10 +271,11 @@ TRACE_EVENT(async_phy_outbound_complete, ); TRACE_EVENT(async_phy_inbound, - TP_PROTO(u64 packet, unsigned int generation, unsigned int status, unsigned int timestamp, u32 first_quadlet, u32 second_quadlet), - TP_ARGS(packet, generation, status, timestamp, first_quadlet, second_quadlet), + TP_PROTO(u64 packet, unsigned int card_index, unsigned int generation, unsigned int status, unsigned int timestamp, u32 first_quadlet, u32 second_quadlet), + TP_ARGS(packet, card_index, generation, status, timestamp, first_quadlet, second_quadlet), TP_STRUCT__entry( __field(u64, packet) + __field(u8, card_index) __field(u8, generation) __field(u8, status) __field(u16, timestamp) @@ -290,8 +291,9 @@ TRACE_EVENT(async_phy_inbound, __entry->second_quadlet = second_quadlet ), TP_printk( - "packet=0x%llx generation=%u status=%u timestamp=0x%04x first_quadlet=0x%08x second_quadlet=0x%08x", + "packet=0x%llx card_index=%u generation=%u status=%u timestamp=0x%04x first_quadlet=0x%08x second_quadlet=0x%08x", __entry->packet, + __entry->card_index, __entry->generation, __entry->status, __entry->timestamp, -- cgit v1.2.3 From 7507dbc46b780e9fa2ec3d4db7cd9ce07e722927 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:39 +0900 Subject: firewire: core: record card index in tracepoinrts events derived from bus_reset_arrange_template The asynchronous transmission of phy packet is initiated on one of 1394 OHCI controller, however the existing tracepoints events has the lack of data about it. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-8-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index e5524fc71880..e6485051f546 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -303,36 +303,39 @@ TRACE_EVENT(async_phy_inbound, ); DECLARE_EVENT_CLASS(bus_reset_arrange_template, - TP_PROTO(unsigned int generation, bool short_reset), - TP_ARGS(generation, short_reset), + TP_PROTO(unsigned int card_index, unsigned int generation, bool short_reset), + TP_ARGS(card_index, generation, short_reset), TP_STRUCT__entry( + __field(u8, card_index) __field(u8, generation) __field(bool, short_reset) ), TP_fast_assign( + __entry->card_index = card_index; __entry->generation = generation; __entry->short_reset = short_reset; ), TP_printk( - "generation=%u short_reset=%s", + "card_index=%u generation=%u short_reset=%s", + __entry->card_index, __entry->generation, __entry->short_reset ? "true" : "false" ) ); DEFINE_EVENT(bus_reset_arrange_template, bus_reset_initiate, - TP_PROTO(unsigned int generation, bool short_reset), - TP_ARGS(generation, short_reset) + TP_PROTO(unsigned int card_index, unsigned int generation, bool short_reset), + TP_ARGS(card_index, generation, short_reset) ); DEFINE_EVENT(bus_reset_arrange_template, bus_reset_schedule, - TP_PROTO(unsigned int generation, bool short_reset), - TP_ARGS(generation, short_reset) + TP_PROTO(unsigned int card_index, unsigned int generation, bool short_reset), + TP_ARGS(card_index, generation, short_reset) ); DEFINE_EVENT(bus_reset_arrange_template, bus_reset_postpone, - TP_PROTO(unsigned int generation, bool short_reset), - TP_ARGS(generation, short_reset) + TP_PROTO(unsigned int card_index, unsigned int generation, bool short_reset), + TP_ARGS(card_index, generation, short_reset) ); TRACE_EVENT(bus_reset_handle, -- cgit v1.2.3 From 893098b2af3ea12bab2f505aa825662b379df67d Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:40 +0900 Subject: firewire: core: record card index in bus_reset_handle tracepoints event The bus reset event occurs in the bus managed by one of 1394 OHCI controller in Linux system, however the existing tracepoints events has the lack of data about it to distinguish the issued hardware from the others. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-9-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index e6485051f546..5ccc0d91b220 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -339,22 +339,25 @@ DEFINE_EVENT(bus_reset_arrange_template, bus_reset_postpone, ); TRACE_EVENT(bus_reset_handle, - TP_PROTO(unsigned int generation, unsigned int node_id, bool bm_abdicate, u32 *self_ids, unsigned int self_id_count), - TP_ARGS(generation, node_id, bm_abdicate, self_ids, self_id_count), + TP_PROTO(unsigned int card_index, unsigned int generation, unsigned int node_id, bool bm_abdicate, u32 *self_ids, unsigned int self_id_count), + TP_ARGS(card_index, generation, node_id, bm_abdicate, self_ids, self_id_count), TP_STRUCT__entry( + __field(u8, card_index) __field(u8, generation) __field(u8, node_id) __field(bool, bm_abdicate) __dynamic_array(u32, self_ids, self_id_count) ), TP_fast_assign( + __entry->card_index = card_index; __entry->generation = generation; __entry->node_id = node_id; __entry->bm_abdicate = bm_abdicate; memcpy(__get_dynamic_array(self_ids), self_ids, __get_dynamic_array_len(self_ids)); ), TP_printk( - "generation=%u node_id=0x%04x bm_abdicate=%s self_ids=%s", + "card_index=%u generation=%u node_id=0x%04x bm_abdicate=%s self_ids=%s", + __entry->card_index, __entry->generation, __entry->node_id, __entry->bm_abdicate ? "true" : "false", -- cgit v1.2.3 From 384a746bb55960aa5ffb3a67de08f11fc2f51042 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 5 Jun 2024 11:17:10 +0200 Subject: Revert "mm: init_mlocked_on_free_v3" There was insufficient review and no agreement that this is the right approach. There are serious flaws with the implementation that make processes using mlock() not even work with simple fork() [1] and we get reliable crashes when rebooting. Further, simply because we might be unmapping a single PTE of a large mlocked folio, we shouldn't zero out the whole folio. ... especially because the code can also *corrupt* urelated memory because kernel_init_pages(page, folio_nr_pages(folio)); Could end up writing outside of the actual folio if we work with a tail page. Let's revert it. Once there is agreement that this is the right approach, the issues were fixed and there was reasonable review and proper testing, we can consider it again. [1] https://lkml.kernel.org/r/4da9da2f-73e4-45fd-b62f-a8a513314057@redhat.com Link: https://lkml.kernel.org/r/20240605091710.38961-1-david@redhat.com Fixes: ba42b524a040 ("mm: init_mlocked_on_free_v3") Signed-off-by: David Hildenbrand Reported-by: David Wang <00107082@163.com> Closes: https://lore.kernel.org/lkml/20240528151340.4282-1-00107082@163.com/ Reported-by: Lance Yang Closes: https://lkml.kernel.org/r/20240601140917.43562-1-ioworker0@gmail.com Acked-by: Lance Yang Cc: York Jasper Niebuhr Cc: Matthew Wilcox (Oracle) Cc: Kees Cook Signed-off-by: Andrew Morton --- include/linux/mm.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9849dfda44d4..9a5652c5fadd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3776,14 +3776,7 @@ DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free); static inline bool want_init_on_free(void) { return static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON, - &init_on_free); -} - -DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, init_mlocked_on_free); -static inline bool want_init_mlocked_on_free(void) -{ - return static_branch_maybe(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, - &init_mlocked_on_free); + &init_on_free); } extern bool _debug_pagealloc_enabled_early; -- cgit v1.2.3 From a273559e9eb68cb58c57803d76a1622b8324a878 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Sat, 1 Jun 2024 16:38:40 -0700 Subject: lib/alloc_tag: fix RCU imbalance in pgalloc_tag_get() put_page_tag_ref() should be called only when get_page_tag_ref() returns a valid reference because only in that case get_page_tag_ref() enters RCU read section while put_page_tag_ref() will call rcu_read_unlock() even if the provided reference is NULL. Fix pgalloc_tag_get() which does not follow this rule causing RCU imbalance. Add a warning in put_page_tag_ref() to catch any future mistakes. Link: https://lkml.kernel.org/r/20240601233840.617458-1-surenb@google.com Fixes: cc92eba1c88b ("mm: fix non-compound multi-order memory accounting in __free_pages") Signed-off-by: Suren Baghdasaryan Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202405271029.6d2f9c4c-lkp@intel.com Acked-by: Vlastimil Babka Cc: Kent Overstreet Cc: Kees Cook Cc: Pasha Tatashin Signed-off-by: Andrew Morton --- include/linux/pgalloc_tag.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 86ba5d33e43b..9cacadbd61f8 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -37,6 +37,9 @@ static inline union codetag_ref *get_page_tag_ref(struct page *page) static inline void put_page_tag_ref(union codetag_ref *ref) { + if (WARN_ON(!ref)) + return; + page_ext_put(page_ext_from_codetag_ref(ref)); } @@ -102,9 +105,11 @@ static inline struct alloc_tag *pgalloc_tag_get(struct page *page) union codetag_ref *ref = get_page_tag_ref(page); alloc_tag_sub_check(ref); - if (ref && ref->ct) - tag = ct_to_alloc_tag(ref->ct); - put_page_tag_ref(ref); + if (ref) { + if (ref->ct) + tag = ct_to_alloc_tag(ref->ct); + put_page_tag_ref(ref); + } } return tag; -- cgit v1.2.3 From 6a50c9b512f7734bc356f4bd47885a6f7c98491a Mon Sep 17 00:00:00 2001 From: Ran Xiaokai Date: Fri, 7 Jun 2024 17:40:48 +0800 Subject: mm: huge_memory: fix misused mapping_large_folio_support() for anon folios When I did a large folios split test, a WARNING "[ 5059.122759][ T166] Cannot split file folio to non-0 order" was triggered. But the test cases are only for anonmous folios. while mapping_large_folio_support() is only reasonable for page cache folios. In split_huge_page_to_list_to_order(), the folio passed to mapping_large_folio_support() maybe anonmous folio. The folio_test_anon() check is missing. So the split of the anonmous THP is failed. This is also the same for shmem_mapping(). We'd better add a check for both. But the shmem_mapping() in __split_huge_page() is not involved, as for anonmous folios, the end parameter is set to -1, so (head[i].index >= end) is always false. shmem_mapping() is not called. Also add a VM_WARN_ON_ONCE() in mapping_large_folio_support() for anon mapping, So we can detect the wrong use more easily. THP folios maybe exist in the pagecache even the file system doesn't support large folio, it is because when CONFIG_TRANSPARENT_HUGEPAGE is enabled, khugepaged will try to collapse read-only file-backed pages to THP. But the mapping does not actually support multi order large folios properly. Using /sys/kernel/debug/split_huge_pages to verify this, with this patch, large anon THP is successfully split and the warning is ceased. Link: https://lkml.kernel.org/r/202406071740485174hcFl7jRxncsHDtI-Pz-o@zte.com.cn Fixes: c010d47f107f ("mm: thp: split huge page to any lower order pages") Reviewed-by: Barry Song Reviewed-by: Zi Yan Acked-by: David Hildenbrand Signed-off-by: Ran Xiaokai Cc: Michal Hocko Cc: xu xin Cc: Yang Yang Cc: Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ee633712bba0..59f1df0cde5a 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -381,6 +381,10 @@ static inline void mapping_set_large_folios(struct address_space *mapping) */ static inline bool mapping_large_folio_support(struct address_space *mapping) { + /* AS_LARGE_FOLIO_SUPPORT is only reasonable for pagecache folios */ + VM_WARN_ONCE((unsigned long)mapping & PAGE_MAPPING_ANON, + "Anonymous mapping always supports large folio"); + return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); } -- cgit v1.2.3 From 01c8f9806bde438ca1c8cbbc439f0a14a6694f6c Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Tue, 11 Jun 2024 15:32:29 +0200 Subject: kcov: don't lose track of remote references during softirqs In kcov_remote_start()/kcov_remote_stop(), we swap the previous KCOV metadata of the current task into a per-CPU variable. However, the kcov_mode_enabled(mode) check is not sufficient in the case of remote KCOV coverage: current->kcov_mode always remains KCOV_MODE_DISABLED for remote KCOV objects. If the original task that has invoked the KCOV_REMOTE_ENABLE ioctl happens to get interrupted and kcov_remote_start() is called, it ultimately leads to kcov_remote_stop() NOT restoring the original KCOV reference. So when the task exits, all registered remote KCOV handles remain active forever. The most uncomfortable effect (at least for syzkaller) is that the bug prevents the reuse of the same /sys/kernel/debug/kcov descriptor. If we obtain it in the parent process and then e.g. drop some capabilities and continuously fork to execute individual programs, at some point current->kcov of the forked process is lost, kcov_task_exit() takes no action, and all KCOV_REMOTE_ENABLE ioctls calls from subsequent forks fail. And, yes, the efficiency is also affected if we keep on losing remote kcov objects. a) kcov_remote_map keeps on growing forever. b) (If I'm not mistaken), we're also not freeing the memory referenced by kcov->area. Fix it by introducing a special kcov_mode that is assigned to the task that owns a KCOV remote object. It makes kcov_mode_enabled() return true and yet does not trigger coverage collection in __sanitizer_cov_trace_pc() and write_comp_data(). [nogikh@google.com: replace WRITE_ONCE() with an ordinary assignment] Link: https://lkml.kernel.org/r/20240614171221.2837584-1-nogikh@google.com Link: https://lkml.kernel.org/r/20240611133229.527822-1-nogikh@google.com Fixes: 5ff3b30ab57d ("kcov: collect coverage from interrupts") Signed-off-by: Aleksandr Nogikh Reviewed-by: Dmitry Vyukov Reviewed-by: Andrey Konovalov Tested-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Arnd Bergmann Cc: Marco Elver Cc: Signed-off-by: Andrew Morton --- include/linux/kcov.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/kcov.h b/include/linux/kcov.h index b851ba415e03..3b479a3d235a 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -21,6 +21,8 @@ enum kcov_mode { KCOV_MODE_TRACE_PC = 2, /* Collecting comparison operands mode. */ KCOV_MODE_TRACE_CMP = 3, + /* The process owns a KCOV remote reference. */ + KCOV_MODE_REMOTE = 4, }; #define KCOV_IN_CTXSW (1 << 30) -- cgit v1.2.3 From 4604b3888f614a353c7afa17bdc8e7393bb1f9a1 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 5 Jun 2024 21:51:33 +0300 Subject: hwrng: core - Remove list.h from the hw_random.h The 'struct list' type is defined in types.h, no need to include list.h for that. Signed-off-by: Andy Shevchenko Signed-off-by: Herbert Xu --- include/linux/hw_random.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 136e9842120e..b424555753b1 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -13,9 +13,8 @@ #define LINUX_HWRANDOM_H_ #include -#include -#include #include +#include /** * struct hwrng - Hardware Random Number Generator driver -- cgit v1.2.3 From 0eb3bed57a06a20c612012127f4405d48fa4a50f Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Fri, 7 Jun 2024 18:34:17 -0400 Subject: crypto: ecc - Add comment to ecc_digits_from_bytes about input byte array Add comment to ecc_digits_from_bytes kdoc that the first byte is expected to hold the most significant bits of the large integer that is converted into an array of digits. Signed-off-by: Stefan Berger Signed-off-by: Herbert Xu --- include/crypto/internal/ecc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/crypto/internal/ecc.h b/include/crypto/internal/ecc.h index f7e75e1e71f3..0717a53ae732 100644 --- a/include/crypto/internal/ecc.h +++ b/include/crypto/internal/ecc.h @@ -63,6 +63,9 @@ static inline void ecc_swap_digits(const void *in, u64 *out, unsigned int ndigit * @nbytes Size of input byte array * @out Output digits array * @ndigits: Number of digits to create from byte array + * + * The first byte in the input byte array is expected to hold the most + * significant bits of the large integer. */ void ecc_digits_from_bytes(const u8 *in, unsigned int nbytes, u64 *out, unsigned int ndigits); -- cgit v1.2.3 From 8043832e2a123fd9372007a29192f2f3ba328cd6 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Fri, 14 Jun 2024 11:05:43 +0300 Subject: memblock: use numa_valid_node() helper to check for invalid node ID Introduce numa_valid_node(nid) that verifies that nid is a valid node ID and use that instead of comparing nid parameter with either NUMA_NO_NODE or MAX_NUMNODES. This makes the checks for valid node IDs consistent and more robust and allows to get rid of multiple WARNings. Suggested-by: Linus Torvalds Signed-off-by: Mike Rapoport (IBM) --- include/linux/numa.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/numa.h b/include/linux/numa.h index 1d43371fafd2..eb19503604fe 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -15,6 +15,11 @@ #define NUMA_NO_NODE (-1) #define NUMA_NO_MEMBLK (-1) +static inline bool numa_valid_node(int nid) +{ + return nid >= 0 && nid < MAX_NUMNODES; +} + /* optionally keep NUMA memory info available post init */ #ifdef CONFIG_NUMA_KEEP_MEMINFO #define __initdata_or_meminfo -- cgit v1.2.3 From d98995b4bf981519dde4af0a081c393d62474039 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Mon, 3 Jun 2024 13:26:37 +0300 Subject: net/mlx5: Reimplement write combining test The test of write combining was added before in mlx5_ib driver. It opens UD QP and posts NOP WQEs, and uses BlueFlame doorbell. When BlueFlame is used, WQEs get written directly to a PCI BAR of the device (in addition to memory) so that the device handles them without having to access memory. In this test, the WQEs written in memory are different from the ones written to the BlueFlame which request CQE update. By checking the completion reports posted on CQ, we can know if BlueFlame succeeds or not. The write combining must be supported if BlueFlame succeeds as its register is written using write combining. This patch reimplements the test in the same way, but using a pair of SQ and CQ only. It is moved to mlx5_core as a general feature used by both mlx5_core and mlx5_ib. Besides, save write combine test result of the PCI function, so that its thousands of child functions such as SF can query without paying the time and resource penalty by itself. The test function is called only after failing to get the cached result. With this enhancement, all thousands of SFs of the PF attached to same driver no longer need to perform WC check explicitly, which is already done in the system. This saves several commands per SF, thereby speeds up SF creation and also saves completion EQ creation. Signed-off-by: Jianbo Liu Reviewed-by: Tariq Toukan Link: https://lore.kernel.org/r/4ff5a8cc4c5b5b0d98397baa45a5019bcdbf096e.1717409369.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 779cfdf2e9d6..0d31f77396fc 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -766,6 +766,12 @@ struct mlx5_hca_cap { u32 max[MLX5_UN_SZ_DW(hca_cap_union)]; }; +enum mlx5_wc_state { + MLX5_WC_STATE_UNINITIALIZED, + MLX5_WC_STATE_UNSUPPORTED, + MLX5_WC_STATE_SUPPORTED, +}; + struct mlx5_core_dev { struct device *device; enum mlx5_coredev_type coredev_type; @@ -824,6 +830,9 @@ struct mlx5_core_dev { #endif u64 num_ipsec_offloads; struct mlx5_sd *sd; + enum mlx5_wc_state wc_state; + /* sync write combining state */ + struct mutex wc_state_lock; }; struct mlx5_db { @@ -1375,4 +1384,6 @@ static inline bool mlx5_is_macsec_roce_supported(struct mlx5_core_dev *mdev) enum { MLX5_OCTWORD = 16, }; + +bool mlx5_wc_support_get(struct mlx5_core_dev *mdev); #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From b339e0a39dc37726712b9f0485d78fe4306d1667 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 13 Jun 2024 21:00:04 +0300 Subject: RDMA/mlx5: Add Qcounters req_transport_retries_exceeded/req_rnr_retries_exceeded The req_transport_retries_exceeded counter shows the number of times requester detected transport retries exceed error. The req_rnr_retries_exceeded counter show the number of times the requester detected RNR NAKs retries exceed error. Signed-off-by: Patrisious Haddad Link: https://lore.kernel.org/r/250466af94f4989d638fab168e246035530e912f.1718301543.git.leon@kernel.org Reviewed-by: Simon Horman Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5df52e15f7d6..09d9d87d62c6 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5629,7 +5629,11 @@ struct mlx5_ifc_query_q_counter_out_bits { u8 local_ack_timeout_err[0x20]; - u8 reserved_at_320[0xa0]; + u8 reserved_at_320[0x60]; + + u8 req_rnr_retries_exceeded[0x20]; + + u8 reserved_at_3a0[0x20]; u8 resp_local_length_error[0x20]; -- cgit v1.2.3 From 81cc927d9c5eefd4a1b08e16b0ab2263f36d03f7 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Thu, 23 May 2024 17:45:17 -0400 Subject: io_uring: Drop per-ctx dummy_ubuf Commit 19a63c402170 ("io_uring/rsrc: keep one global dummy_ubuf") replaced it with a global static object but this stayed behind. Fixes: 19a63c402170 ("io_uring/rsrc: keep one global dummy_ubuf") Signed-off-by: Gabriel Krisman Bertazi Link: https://lore.kernel.org/r/20240523214517.31803-1-krisman@suse.de Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index b48570eaa449..93c9044ec3fe 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -373,7 +373,6 @@ struct io_ring_ctx { struct io_restriction restrictions; /* slow path rsrc auxilary data, used by update/register */ - struct io_mapped_ubuf *dummy_ubuf; struct io_rsrc_data *file_data; struct io_rsrc_data *buf_data; -- cgit v1.2.3 From 200f3abd14db55f9aadcb74f4e7a678f1c469ba1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 3 Jun 2024 11:51:19 -0600 Subject: io_uring/eventfd: move eventfd handling to separate file This is pretty nicely abstracted already, but let's move it to a separate file rather than have it in the main io_uring file. With that, we can also move the io_ev_fd struct and enum out of global scope. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 93c9044ec3fe..850e30be9322 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -211,14 +211,6 @@ struct io_submit_state { struct blk_plug plug; }; -struct io_ev_fd { - struct eventfd_ctx *cq_ev_fd; - unsigned int eventfd_async: 1; - struct rcu_head rcu; - atomic_t refs; - atomic_t ops; -}; - struct io_alloc_cache { void **entries; unsigned int nr_cached; -- cgit v1.2.3 From 3474d1b93f897ab33ce160e759afd47d5f412de4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 13 Jun 2024 19:28:27 +0000 Subject: io_uring/io-wq: make io_wq_work flags atomic The work flags can be set/accessed from different tasks, both the originator of the request, and the io-wq workers. While modifications aren't concurrent, it still makes KMSAN unhappy. There's no real downside to just making the flag reading/manipulation use proper atomics here. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 850e30be9322..1052a68fd68d 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -50,7 +50,7 @@ struct io_wq_work_list { struct io_wq_work { struct io_wq_work_node list; - unsigned flags; + atomic_t flags; /* place it here instead of io_kiocb as it fills padding and saves 4B */ int cancel_seq; }; -- cgit v1.2.3 From c3042a5403ef2be622023fcc3b11fc1aa08ba7fa Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 14 Jun 2024 09:03:44 +0000 Subject: block: Drop locking annotation for limits_lock Currently compiling block/blk-settings.c with C=1 gives the following warning: block/blk-settings.c:262:9: warning: context imbalance in 'queue_limits_commit_update' - wrong count at exit request_queue.limits_lock is a mutex. Sparse locking annotation for mutexes are currently not supported - see [0] - so drop that locking annotation. [0] https://lore.kernel.org/lkml/cover.1579893447.git.jbi.octave@gmail.com/T/#mbb8bda6c0a7ca7ce19f46df976a8e3b489745488 Fixes: d690cb8ae14bd ("block: add an API to atomically update queue limits") Signed-off-by: John Garry Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240614090345.655716-3-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0c247a716885..83af6ac5aa48 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -893,7 +893,6 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset, */ static inline struct queue_limits queue_limits_start_update(struct request_queue *q) - __acquires(q->limits_lock) { mutex_lock(&q->limits_lock); return q->limits; -- cgit v1.2.3 From 1ccfd1a4c809d99891142c1bb9851daa42e40f0d Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 6 Jun 2024 08:51:54 +0900 Subject: firewire: core: arrangement header inclusion for tracepoints events It is a bit inconvenient to put the relative path to local header from tree-wide header. This commit delegates the selection to include headers into users. Link: https://lore.kernel.org/r/20240605235155.116468-11-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index 5ccc0d91b220..1a18533778f1 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -11,7 +11,7 @@ #include -#include "../../../drivers/firewire/packet-header-definitions.h" +// Some macros are defined in 'drivers/firewire/packet-header-definitions.h'. // The content of TP_printk field is preprocessed, then put to the module binary. #define ASYNC_HEADER_GET_DESTINATION(header) \ -- cgit v1.2.3 From 677ceae190732ee844f940d1e4860af4022d2be3 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 6 Jun 2024 08:51:55 +0900 Subject: firewire: core: add tracepoints event for self_id_sequence It is helpful to trace the content of self ID sequence when the core function building bus topology. This commit adds a tracepoints event fot the purpose. It seems not to achieve printing variable length of array in print time without any storage, thus the structure of event includes a superfluous array to store the state of port. Additionally, there is no helper function to print symbol array, thus the state of port is printed as raw value. Link: https://lore.kernel.org/r/20240605235155.116468-12-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 59 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'include') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index 1a18533778f1..1d0d63b0f8e7 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -366,6 +366,65 @@ TRACE_EVENT(bus_reset_handle, ) ); +// Some macros are defined in 'drivers/firewire/phy-packet-definitions.h'. + +// The content of TP_printk field is preprocessed, then put to the module binary. + +#define PHY_PACKET_SELF_ID_GET_PHY_ID(quads) \ + ((((const u32 *)quads)[0] & SELF_ID_PHY_ID_MASK) >> SELF_ID_PHY_ID_SHIFT) + +#define PHY_PACKET_SELF_ID_GET_LINK_ACTIVE(quads) \ + ((((const u32 *)quads)[0] & SELF_ID_ZERO_LINK_ACTIVE_MASK) >> SELF_ID_ZERO_LINK_ACTIVE_SHIFT) + +#define PHY_PACKET_SELF_ID_GET_GAP_COUNT(quads) \ + ((((const u32 *)quads)[0] & SELF_ID_ZERO_GAP_COUNT_MASK) >> SELF_ID_ZERO_GAP_COUNT_SHIFT) + +#define PHY_PACKET_SELF_ID_GET_SCODE(quads) \ + ((((const u32 *)quads)[0] & SELF_ID_ZERO_SCODE_MASK) >> SELF_ID_ZERO_SCODE_SHIFT) + +#define PHY_PACKET_SELF_ID_GET_CONTENDER(quads) \ + ((((const u32 *)quads)[0] & SELF_ID_ZERO_CONTENDER_MASK) >> SELF_ID_ZERO_CONTENDER_SHIFT) + +#define PHY_PACKET_SELF_ID_GET_POWER_CLASS(quads) \ + ((((const u32 *)quads)[0] & SELF_ID_ZERO_POWER_CLASS_MASK) >> SELF_ID_ZERO_POWER_CLASS_SHIFT) + +#define PHY_PACKET_SELF_ID_GET_INITIATED_RESET(quads) \ + ((((const u32 *)quads)[0] & SELF_ID_ZERO_INITIATED_RESET_MASK) >> SELF_ID_ZERO_INITIATED_RESET_SHIFT) + +void copy_port_status(u8 *port_status, unsigned int port_capacity, const u32 *self_id_sequence, + unsigned int quadlet_count); + +TRACE_EVENT(self_id_sequence, + TP_PROTO(const u32 *self_id_sequence, unsigned int quadlet_count, unsigned int generation), + TP_ARGS(self_id_sequence, quadlet_count, generation), + TP_STRUCT__entry( + __field(u8, generation) + __dynamic_array(u8, port_status, self_id_sequence_get_port_capacity(quadlet_count)) + __dynamic_array(u32, self_id_sequence, quadlet_count) + ), + TP_fast_assign( + __entry->generation = generation; + copy_port_status(__get_dynamic_array(port_status), __get_dynamic_array_len(port_status), + self_id_sequence, quadlet_count); + memcpy(__get_dynamic_array(self_id_sequence), self_id_sequence, + __get_dynamic_array_len(self_id_sequence)); + ), + TP_printk( + "generation=%u phy_id=0x%02x link_active=%s gap_count=%u scode=%u contender=%s power_class=%u initiated_reset=%s port_status=%s self_id_sequence=%s", + __entry->generation, + PHY_PACKET_SELF_ID_GET_PHY_ID(__get_dynamic_array(self_id_sequence)), + PHY_PACKET_SELF_ID_GET_LINK_ACTIVE(__get_dynamic_array(self_id_sequence)) ? "true" : "false", + PHY_PACKET_SELF_ID_GET_GAP_COUNT(__get_dynamic_array(self_id_sequence)), + PHY_PACKET_SELF_ID_GET_SCODE(__get_dynamic_array(self_id_sequence)), + PHY_PACKET_SELF_ID_GET_CONTENDER(__get_dynamic_array(self_id_sequence)) ? "true" : "false", + PHY_PACKET_SELF_ID_GET_POWER_CLASS(__get_dynamic_array(self_id_sequence)), + PHY_PACKET_SELF_ID_GET_INITIATED_RESET(__get_dynamic_array(self_id_sequence)) ? "true" : "false", + __print_array(__get_dynamic_array(port_status), __get_dynamic_array_len(port_status), 1), + __print_array(__get_dynamic_array(self_id_sequence), + __get_dynamic_array_len(self_id_sequence) / QUADLET_SIZE, QUADLET_SIZE) + ) +); + #undef QUADLET_SIZE #endif // _FIREWIRE_TRACE_EVENT_H -- cgit v1.2.3 From 2fd22faf0e9f120c39e6f47e5622b0039bb10817 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Fri, 14 Jun 2024 09:42:51 +0900 Subject: firewire: core: record card index in tracepoints event for self ID sequence This patch is for for-next branch. The selfIDComplete event occurs in the bus managed by one of 1394 OHCI controller in Linux system, while the existing tracepoints events has the lack of data about it to distinguish the issued hardware from the others. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240614004251.460649-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index 1d0d63b0f8e7..d237a30d197b 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -395,14 +395,16 @@ void copy_port_status(u8 *port_status, unsigned int port_capacity, const u32 *se unsigned int quadlet_count); TRACE_EVENT(self_id_sequence, - TP_PROTO(const u32 *self_id_sequence, unsigned int quadlet_count, unsigned int generation), - TP_ARGS(self_id_sequence, quadlet_count, generation), + TP_PROTO(unsigned int card_index, const u32 *self_id_sequence, unsigned int quadlet_count, unsigned int generation), + TP_ARGS(card_index, self_id_sequence, quadlet_count, generation), TP_STRUCT__entry( + __field(u8, card_index) __field(u8, generation) __dynamic_array(u8, port_status, self_id_sequence_get_port_capacity(quadlet_count)) __dynamic_array(u32, self_id_sequence, quadlet_count) ), TP_fast_assign( + __entry->card_index = card_index; __entry->generation = generation; copy_port_status(__get_dynamic_array(port_status), __get_dynamic_array_len(port_status), self_id_sequence, quadlet_count); @@ -410,7 +412,8 @@ TRACE_EVENT(self_id_sequence, __get_dynamic_array_len(self_id_sequence)); ), TP_printk( - "generation=%u phy_id=0x%02x link_active=%s gap_count=%u scode=%u contender=%s power_class=%u initiated_reset=%s port_status=%s self_id_sequence=%s", + "card_index=%u generation=%u phy_id=0x%02x link_active=%s gap_count=%u scode=%u contender=%s power_class=%u initiated_reset=%s port_status=%s self_id_sequence=%s", + __entry->card_index, __entry->generation, PHY_PACKET_SELF_ID_GET_PHY_ID(__get_dynamic_array(self_id_sequence)), PHY_PACKET_SELF_ID_GET_LINK_ACTIVE(__get_dynamic_array(self_id_sequence)) ? "true" : "false", -- cgit v1.2.3 From 1c75adb22d49ca9389333ca5e6939052a7203111 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 13 Jun 2024 22:59:09 +0200 Subject: ASoC: SOF: mediatek: Constify struct mtk_adsp_ipc_ops 'struct mtk_adsp_ipc_ops' is not modified in these drivers. Constifying this structure moves some data to a read-only section, so increase overall security. In order to do it, "struct mtk_adsp_ipc" also needs to be adjusted to this new const qualifier. On a x86_64, with allmodconfig: Before: ====== text data bss dec hex filename 15533 2383 0 17916 45fc sound/soc/sof/mediatek/mt8195/mt8195.o After: ===== text data bss dec hex filename 15557 2367 0 17924 4604 sound/soc/sof/mediatek/mt8195/mt8195.o Signed-off-by: Christophe JAILLET Link: https://msgid.link/r/a45d6b2b5ec040ea0fc78fca662c2dca3f13a49f.1718312321.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- include/linux/firmware/mediatek/mtk-adsp-ipc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/firmware/mediatek/mtk-adsp-ipc.h b/include/linux/firmware/mediatek/mtk-adsp-ipc.h index 5b1d16fa3f56..6e86799a7dc4 100644 --- a/include/linux/firmware/mediatek/mtk-adsp-ipc.h +++ b/include/linux/firmware/mediatek/mtk-adsp-ipc.h @@ -40,7 +40,7 @@ struct mtk_adsp_chan { struct mtk_adsp_ipc { struct mtk_adsp_chan chans[MTK_ADSP_MBOX_NUM]; struct device *dev; - struct mtk_adsp_ipc_ops *ops; + const struct mtk_adsp_ipc_ops *ops; void *private_data; }; -- cgit v1.2.3 From f22b4b55edb507a2b30981e133b66b642be4d13f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 13 Jun 2024 14:33:16 -0700 Subject: net: make for_each_netdev_dump() a little more bug-proof I find the behavior of xa_for_each_start() slightly counter-intuitive. It doesn't end the iteration by making the index point after the last element. IOW calling xa_for_each_start() again after it "finished" will run the body of the loop for the last valid element, instead of doing nothing. This works fine for netlink dumps if they terminate correctly (i.e. coalesce or carefully handle NLM_DONE), but as we keep getting reminded legacy dumps are unlikely to go away. Fixing this generically at the xa_for_each_start() level seems hard - there is no index reserved for "end of iteration". ifindexes are 31b wide, tho, and iterator is ulong so for for_each_netdev_dump() it's safe to go to the next element. Signed-off-by: Jakub Kicinski Reviewed-by: Przemek Kitszel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f148a01dd1d1..85111502cf8f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3021,7 +3021,8 @@ int call_netdevice_notifiers_info(unsigned long val, #define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) #define for_each_netdev_dump(net, d, ifindex) \ - xa_for_each_start(&(net)->dev_by_index, (ifindex), (d), (ifindex)) + for (; (d = xa_find(&(net)->dev_by_index, &ifindex, \ + ULONG_MAX, XA_PRESENT)); ifindex++) static inline struct net_device *next_net_device(struct net_device *dev) { -- cgit v1.2.3 From d25a92ccae6bed02327b63d138e12e7806830f78 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Fri, 14 Jun 2024 02:00:30 +0800 Subject: net/smc: Introduce IPPROTO_SMC This patch allows to create smc socket via AF_INET, similar to the following code, /* create v4 smc sock */ v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC); /* create v6 smc sock */ v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC); There are several reasons why we believe it is appropriate here: 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6) address. There is no AF_SMC address at all. 2. Create smc socket in the AF_INET(6) path, which allows us to reuse the infrastructure of AF_INET(6) path, such as common ebpf hooks. Otherwise, smc have to implement it again in AF_SMC path. Signed-off-by: D. Wythe Reviewed-by: Wenjia Zhang Reviewed-by: Dust Li Tested-by: Niklas Schnelle Tested-by: Wenjia Zhang Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index e682ab628dfa..d358add1611c 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -81,6 +81,8 @@ enum { #define IPPROTO_ETHERNET IPPROTO_ETHERNET IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW + IPPROTO_SMC = 256, /* Shared Memory Communications */ +#define IPPROTO_SMC IPPROTO_SMC IPPROTO_MPTCP = 262, /* Multipath TCP connection */ #define IPPROTO_MPTCP IPPROTO_MPTCP IPPROTO_MAX -- cgit v1.2.3 From a43b9ec091b1b1924ea18883a715e5aadba2543e Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 29 May 2024 10:19:20 -0700 Subject: peci, hwmon: Switch to new Intel CPU model defines Update peci subsystem to use the same vendor-family-model combined definition that core x86 code uses. Signed-off-by: Tony Luck Acked-by: Guenter Roeck Reviewed-by: Iwona Winiarska Link: https://lore.kernel.org/r/20240529171920.62571-1-tony.luck@intel.com Signed-off-by: Iwona Winiarska --- include/linux/peci-cpu.h | 24 ++++++++++++++++++++++++ include/linux/peci.h | 6 ++---- 2 files changed, 26 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/peci-cpu.h b/include/linux/peci-cpu.h index ff8ae9c26c80..601cdd086bf6 100644 --- a/include/linux/peci-cpu.h +++ b/include/linux/peci-cpu.h @@ -6,6 +6,30 @@ #include +/* Copied from x86 */ +#define X86_VENDOR_INTEL 0 + +/* Copied from x86 */ +#define VFM_MODEL_BIT 0 +#define VFM_FAMILY_BIT 8 +#define VFM_VENDOR_BIT 16 +#define VFM_RSVD_BIT 24 + +#define VFM_MODEL_MASK GENMASK(VFM_FAMILY_BIT - 1, VFM_MODEL_BIT) +#define VFM_FAMILY_MASK GENMASK(VFM_VENDOR_BIT - 1, VFM_FAMILY_BIT) +#define VFM_VENDOR_MASK GENMASK(VFM_RSVD_BIT - 1, VFM_VENDOR_BIT) + +#define VFM_MODEL(vfm) (((vfm) & VFM_MODEL_MASK) >> VFM_MODEL_BIT) +#define VFM_FAMILY(vfm) (((vfm) & VFM_FAMILY_MASK) >> VFM_FAMILY_BIT) +#define VFM_VENDOR(vfm) (((vfm) & VFM_VENDOR_MASK) >> VFM_VENDOR_BIT) + +#define VFM_MAKE(_vendor, _family, _model) ( \ + ((_model) << VFM_MODEL_BIT) | \ + ((_family) << VFM_FAMILY_BIT) | \ + ((_vendor) << VFM_VENDOR_BIT) \ +) +/* End of copied code */ + #include "../../arch/x86/include/asm/intel-family.h" #define PECI_PCS_PKG_ID 0 /* Package Identifier Read */ diff --git a/include/linux/peci.h b/include/linux/peci.h index 90e241458ef6..3e0bc37591d6 100644 --- a/include/linux/peci.h +++ b/include/linux/peci.h @@ -59,8 +59,7 @@ static inline struct peci_controller *to_peci_controller(void *d) * struct peci_device - PECI device * @dev: device object to register PECI device to the device model * @info: PECI device characteristics - * @info.family: device family - * @info.model: device model + * @info.x86_vfm: device vendor-family-model * @info.peci_revision: PECI revision supported by the PECI device * @info.socket_id: the socket ID represented by the PECI device * @addr: address used on the PECI bus connected to the parent controller @@ -73,8 +72,7 @@ static inline struct peci_controller *to_peci_controller(void *d) struct peci_device { struct device dev; struct { - u16 family; - u8 model; + u32 x86_vfm; u8 peci_revision; u8 socket_id; } info; -- cgit v1.2.3 From f45a6051d582f613f4abc383ae238661f7813302 Mon Sep 17 00:00:00 2001 From: Costa Shulyupin Date: Mon, 25 Mar 2024 18:38:10 +0200 Subject: cpu/hotplug: Fix typo in comment Signed-off-by: Costa Shulyupin Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240325163810.669459-1-costa.shul@redhat.com --- include/linux/cpuhotplug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 7a5785f405b6..7f6c820c12eb 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -27,7 +27,7 @@ * startup callbacks sequentially from CPUHP_OFFLINE + 1 to CPUHP_ONLINE * during a CPU online operation. During a CPU offline operation the * installed teardown callbacks are invoked in the reverse order from - * CPU_ONLINE - 1 down to CPUHP_OFFLINE. + * CPUHP_ONLINE - 1 down to CPUHP_OFFLINE. * * The state space has three sections: PREPARE, STARTING and ONLINE. * -- cgit v1.2.3 From 299d623f5c9ab48e53255cf6b510627f1ef26dfe Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:03 +0200 Subject: irqdomain: Introduce irq_domain_instantiate() The existing irq_domain_add_*() functions used to instantiate an IRQ domain are wrappers built on top of __irq_domain_add() and describe the domain properties using a bunch of parameters. Adding more parameters and wrappers to hide new parameters in the existing code lead to more and more code without any relevant value and without any flexibility. Introduce irq_domain_instantiate() where the interrupt domain properties are given using a irq_domain_info structure instead of the bunch of parameters to allow flexibility and easy evolution. irq_domain_instantiate() performs the same operation as the one done by __irq_domain_add(). For compatibility reason with existing code, keep __irq_domain_add() but convert it to irq_domain_instantiate(). [ tglx: Fixed up struct initializer coding style ] Suggested-by: Thomas Gleixner Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-3-herve.codina@bootlin.com --- include/linux/irqdomain.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 21ecf582a0fe..ab8939c8724d 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -257,6 +257,27 @@ static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa) } void irq_domain_free_fwnode(struct fwnode_handle *fwnode); +/** + * struct irq_domain_info - Domain information structure + * @fwnode: firmware node for the interrupt controller + * @size: Size of linear map; 0 for radix mapping only + * @hwirq_max: Maximum number of interrupts supported by controller + * @direct_max: Maximum value of direct maps; + * Use ~0 for no limit; 0 for no direct mapping + * @ops: Domain operation callbacks + * @host_data: Controller private data pointer + */ +struct irq_domain_info { + struct fwnode_handle *fwnode; + unsigned int size; + irq_hw_number_t hwirq_max; + int direct_max; + const struct irq_domain_ops *ops; + void *host_data; +}; + +struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info); + struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size, irq_hw_number_t hwirq_max, int direct_max, const struct irq_domain_ops *ops, -- cgit v1.2.3 From 922ac2cf9fe444c4aff165b9f7e158a9b651647d Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:05 +0200 Subject: irqdomain: Constify parameter in is_fwnode_irqchip() The fwnode parameter has no reason to be a pointer to an un-const struct fwnode_handle. Indeed, struct fwnode_handle is not supposed to be modified by the function. Be consistent with other function performing the same kind of operation such as is_of_node(), is_acpi_device_node() or is_software_node(): constify the fwnode parameter. Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-5-herve.codina@bootlin.com --- include/linux/irqdomain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index ab8939c8724d..a3b43e357009 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -314,7 +314,7 @@ static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node) extern const struct fwnode_operations irqchip_fwnode_ops; -static inline bool is_fwnode_irqchip(struct fwnode_handle *fwnode) +static inline bool is_fwnode_irqchip(const struct fwnode_handle *fwnode) { return fwnode && fwnode->ops == &irqchip_fwnode_ops; } -- cgit v1.2.3 From 757398541c30a5e898169763b43f08dab71ea3bd Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:08 +0200 Subject: irqdomain: Handle additional domain flags in irq_domain_instantiate() In order to use irq_domain_instantiate() from several places such as irq_domain_create_hierarchy(), irq_domain_instantiate() needs to handle additional domain flags. Add the required infrastructure. Suggested-by: Thomas Gleixner Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-8-herve.codina@bootlin.com --- include/linux/irqdomain.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index a3b43e357009..4683b66eded9 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -260,6 +260,7 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode); /** * struct irq_domain_info - Domain information structure * @fwnode: firmware node for the interrupt controller + * @domain_flags: Additional flags to add to the domain flags * @size: Size of linear map; 0 for radix mapping only * @hwirq_max: Maximum number of interrupts supported by controller * @direct_max: Maximum value of direct maps; @@ -269,6 +270,7 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode); */ struct irq_domain_info { struct fwnode_handle *fwnode; + unsigned int domain_flags; unsigned int size; irq_hw_number_t hwirq_max; int direct_max; -- cgit v1.2.3 From 419e3778ff295c00aa158d9f2854a70b47ba1136 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:09 +0200 Subject: irqdomain: Handle domain hierarchy parent in irq_domain_instantiate() To use irq_domain_instantiate() from irq_domain_create_hierarchy(), irq_domain_instantiate() needs to handle the domain hierarchy parent. Add the required functionality. Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-9-herve.codina@bootlin.com --- include/linux/irqdomain.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 4683b66eded9..e52fd5e5494c 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -276,6 +276,12 @@ struct irq_domain_info { int direct_max; const struct irq_domain_ops *ops; void *host_data; +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + /** + * @parent: Pointer to the parent irq domain used in a hierarchy domain + */ + struct irq_domain *parent; +#endif }; struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info); -- cgit v1.2.3 From 0b21add71bd9cfa2bd6677a0300e15fd4c4b84ed Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:12 +0200 Subject: irqdomain: Handle domain bus token in irq_domain_create() irq_domain_update_bus_token() is the only way to set the domain bus token. This is sub-optimal as irq_domain_update_bus_token() can be called only once the domain is created and needs to revert some operations, change the domain name and redo the operations. In order to avoid this revert/change/redo sequence, take the domain bus into account token during the domain creation. Suggested-by: Thomas Gleixner Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-12-herve.codina@bootlin.com --- include/linux/irqdomain.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index e52fd5e5494c..52bed23e5c61 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -265,6 +265,7 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode); * @hwirq_max: Maximum number of interrupts supported by controller * @direct_max: Maximum value of direct maps; * Use ~0 for no limit; 0 for no direct mapping + * @bus_token: Domain bus token * @ops: Domain operation callbacks * @host_data: Controller private data pointer */ @@ -274,6 +275,7 @@ struct irq_domain_info { unsigned int size; irq_hw_number_t hwirq_max; int direct_max; + enum irq_domain_bus_token bus_token; const struct irq_domain_ops *ops; void *host_data; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY -- cgit v1.2.3 From 44b68de9b8e3dfde12308e8567548799d7ded0de Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:13 +0200 Subject: irqdomain: Introduce init() and exit() hooks The current API does not allow additional initialization before the domain is published. This can lead to a race condition between consumers and supplier as a domain can be available for consumers before being fully ready. Introduce the init() hook to allow additional initialization before plublishing the domain. Also introduce the exit() hook to revert operations done in init() on domain removal. Suggested-by: Thomas Gleixner Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-13-herve.codina@bootlin.com --- include/linux/irqdomain.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 52bed23e5c61..2c927edc4d43 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -141,6 +141,7 @@ struct irq_domain_chip_generic; * purposes related to the irq domain. * @parent: Pointer to parent irq_domain to support hierarchy irq_domains * @msi_parent_ops: Pointer to MSI parent domain methods for per device domain init + * @exit: Function called when the domain is destroyed * * Revmap data, used internally by the irq domain code: * @revmap_size: Size of the linear map table @revmap[] @@ -169,6 +170,7 @@ struct irq_domain { #ifdef CONFIG_GENERIC_MSI_IRQ const struct msi_parent_ops *msi_parent_ops; #endif + void (*exit)(struct irq_domain *d); /* reverse map data. The linear map gets appended to the irq_domain */ irq_hw_number_t hwirq_max; @@ -268,6 +270,10 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode); * @bus_token: Domain bus token * @ops: Domain operation callbacks * @host_data: Controller private data pointer + * @init: Function called when the domain is created. + * Allow to do some additional domain initialisation. + * @exit: Function called when the domain is destroyed. + * Allow to do some additional cleanup operation. */ struct irq_domain_info { struct fwnode_handle *fwnode; @@ -284,6 +290,8 @@ struct irq_domain_info { */ struct irq_domain *parent; #endif + int (*init)(struct irq_domain *d); + void (*exit)(struct irq_domain *d); }; struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info); -- cgit v1.2.3 From e25f553a92973eaf59ff3a00fe7f61ab01b2877f Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:14 +0200 Subject: genirq/generic_chip: Introduce irq_domain_{alloc,remove}_generic_chips() The existing __irq_alloc_domain_generic_chips() uses a bunch of parameters to describe the generic chips that need to be allocated. Adding more parameters and wrappers to hide new parameters in the existing code leads to more and more code without any relevant values and without any flexibility. Introduce irq_domain_alloc_generic_chips() where the generic chips description is done using the irq_domain_chip_generic_info structure instead of the bunch of parameters to allow flexibility and easy evolution. Also introduce irq_domain_remove_generic_chips() to revert the operations done by irq_domain_alloc_generic_chips(). Suggested-by: Thomas Gleixner Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-14-herve.codina@bootlin.com --- include/linux/irq.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index a217e1029c1d..58264b236cbf 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1117,6 +1117,27 @@ struct irq_domain_chip_generic { struct irq_chip_generic *gc[]; }; +/** + * struct irq_domain_chip_generic_info - Generic chip information structure + * @name: Name of the generic interrupt chip + * @handler: Interrupt handler used by the generic interrupt chip + * @irqs_per_chip: Number of interrupts each chip handles (max 32) + * @num_ct: Number of irq_chip_type instances associated with each + * chip + * @irq_flags_to_clear: IRQ_* bits to clear in the mapping function + * @irq_flags_to_set: IRQ_* bits to set in the mapping function + * @gc_flags: Generic chip specific setup flags + */ +struct irq_domain_chip_generic_info { + const char *name; + irq_flow_handler_t handler; + unsigned int irqs_per_chip; + unsigned int num_ct; + unsigned int irq_flags_to_clear; + unsigned int irq_flags_to_set; + enum irq_gc_flags gc_flags; +}; + /* Generic chip callback functions */ void irq_gc_noop(struct irq_data *d); void irq_gc_mask_disable_reg(struct irq_data *d); @@ -1153,6 +1174,10 @@ int devm_irq_setup_generic_chip(struct device *dev, struct irq_chip_generic *gc, struct irq_chip_generic *irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq); +int irq_domain_alloc_generic_chips(struct irq_domain *d, + const struct irq_domain_chip_generic_info *info); +void irq_domain_remove_generic_chips(struct irq_domain *d); + int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, int num_ct, const char *name, irq_flow_handler_t handler, -- cgit v1.2.3 From fea922ee9f8ffd3c2a8e8dfbc68de42905a3982a Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:15 +0200 Subject: genirq/generic_chip: Introduce init() and exit() hooks Most of generic chip drivers need to perform some more additional initializations on the generic chips allocated before they can be fully ready. These additional initializations need to be performed before the IRQ domain is published to avoid a race condition between IRQ consumers and suppliers. Introduce the init() hook to perform these initializations at the right place just after the generic chip creation. Also introduce the exit() hook to allow reverting operations done by the init() hook just before the generic chip is destroyed. Suggested-by: Thomas Gleixner Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-15-herve.codina@bootlin.com --- include/linux/irq.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 58264b236cbf..712bcee56efc 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1106,6 +1106,7 @@ enum irq_gc_flags { * @irq_flags_to_set: IRQ* flags to set on irq setup * @irq_flags_to_clear: IRQ* flags to clear on irq setup * @gc_flags: Generic chip specific setup flags + * @exit: Function called on each chip when they are destroyed. * @gc: Array of pointers to generic interrupt chips */ struct irq_domain_chip_generic { @@ -1114,6 +1115,7 @@ struct irq_domain_chip_generic { unsigned int irq_flags_to_clear; unsigned int irq_flags_to_set; enum irq_gc_flags gc_flags; + void (*exit)(struct irq_chip_generic *gc); struct irq_chip_generic *gc[]; }; @@ -1127,6 +1129,10 @@ struct irq_domain_chip_generic { * @irq_flags_to_clear: IRQ_* bits to clear in the mapping function * @irq_flags_to_set: IRQ_* bits to set in the mapping function * @gc_flags: Generic chip specific setup flags + * @init: Function called on each chip when they are created. + * Allow to do some additional chip initialisation. + * @exit: Function called on each chip when they are destroyed. + * Allow to do some chip cleanup operation. */ struct irq_domain_chip_generic_info { const char *name; @@ -1136,6 +1142,8 @@ struct irq_domain_chip_generic_info { unsigned int irq_flags_to_clear; unsigned int irq_flags_to_set; enum irq_gc_flags gc_flags; + int (*init)(struct irq_chip_generic *gc); + void (*exit)(struct irq_chip_generic *gc); }; /* Generic chip callback functions */ -- cgit v1.2.3 From e6f67ce32e8e6dcbadf42dc435fbc9002cabf1f9 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:16 +0200 Subject: irqdomain: Add support for generic irq chips creation before publishing a domain The current API functions create an irq_domain and also publish this newly created to domain. Once an irq_domain is published, consumers can request IRQ in order to use them. Some interrupt controller drivers have to perform some more operations with the created irq_domain in order to have it ready to be used. For instance: - Allocate generic irq chips with irq_alloc_domain_generic_chips() - Retrieve the generic irq chips with irq_get_domain_generic_chip() - Initialize retrieved chips: set register base address and offsets, set several hooks such as irq_mask, irq_unmask, ... With the newly introduced irq_domain_alloc_generic_chips(), an interrupt controller driver can use the irq_domain_chip_generic_info structure and set the init() hook to perform its generic chips initialization. In order to avoid a window where the domain is published but not yet ready to be used, handle the generic chip creation (i.e the irq_domain_alloc_generic_chips() call) before the domain is published. Suggested-by: Thomas Gleixner Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-16-herve.codina@bootlin.com --- include/linux/irqdomain.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 2c927edc4d43..5540b22a755c 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -210,6 +210,9 @@ enum { /* Irq domain is a MSI device domain */ IRQ_DOMAIN_FLAG_MSI_DEVICE = (1 << 9), + /* Irq domain must destroy generic chips when removed */ + IRQ_DOMAIN_FLAG_DESTROY_GC = (1 << 10), + /* * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved * for implementation specific purposes and ignored by the @@ -259,6 +262,9 @@ static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa) } void irq_domain_free_fwnode(struct fwnode_handle *fwnode); + +struct irq_domain_chip_generic_info; + /** * struct irq_domain_info - Domain information structure * @fwnode: firmware node for the interrupt controller @@ -270,6 +276,8 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode); * @bus_token: Domain bus token * @ops: Domain operation callbacks * @host_data: Controller private data pointer + * @dgc_info: Geneneric chip information structure pointer used to + * create generic chips for the domain if not NULL. * @init: Function called when the domain is created. * Allow to do some additional domain initialisation. * @exit: Function called when the domain is destroyed. @@ -290,6 +298,7 @@ struct irq_domain_info { */ struct irq_domain *parent; #endif + struct irq_domain_chip_generic_info *dgc_info; int (*init)(struct irq_domain *d); void (*exit)(struct irq_domain *d); }; -- cgit v1.2.3 From 0c5b29a6dc7b463b6072da8cef43800008728ff3 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:17 +0200 Subject: irqdomain: Add a resource managed version of irq_domain_instantiate() Add a devres version of irq_domain_instantiate(). Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-17-herve.codina@bootlin.com --- include/linux/irqdomain.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 5540b22a755c..8820317582c4 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -304,6 +304,8 @@ struct irq_domain_info { }; struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info); +struct irq_domain *devm_irq_domain_instantiate(struct device *dev, + const struct irq_domain_info *info); struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size, irq_hw_number_t hwirq_max, int direct_max, -- cgit v1.2.3 From 7c53626cd11820a11f9cb2c54c02d47fc062a265 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:18 +0200 Subject: irqdomain: Convert __irq_domain_add() wrappers to irq_domain_instantiate() __irq_domain_add() wrappers use directly __irq_domain_add(). With the introduction of irq_domain_instantiate(), __irq_domain_add() becomes obsolete. In order to fully remove __irq_domain_add(), convert wrappers to irq_domain_instantiate() [ tglx: Fixup struct initializers ] Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-18-herve.codina@bootlin.com --- include/linux/irqdomain.h | 58 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 8820317582c4..33a968fbdda2 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -400,7 +400,17 @@ static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_no const struct irq_domain_ops *ops, void *host_data) { - return __irq_domain_add(of_node_to_fwnode(of_node), size, size, 0, ops, host_data); + struct irq_domain_info info = { + .fwnode = of_node_to_fwnode(of_node), + .size = size, + .hwirq_max = size, + .ops = ops, + .host_data = host_data, + }; + struct irq_domain *d; + + d = irq_domain_instantiate(&info); + return IS_ERR(d) ? NULL : d; } #ifdef CONFIG_IRQ_DOMAIN_NOMAP @@ -409,7 +419,17 @@ static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_nod const struct irq_domain_ops *ops, void *host_data) { - return __irq_domain_add(of_node_to_fwnode(of_node), 0, max_irq, max_irq, ops, host_data); + struct irq_domain_info info = { + .fwnode = of_node_to_fwnode(of_node), + .hwirq_max = max_irq, + .direct_max = max_irq, + .ops = ops, + .host_data = host_data, + }; + struct irq_domain *d; + + d = irq_domain_instantiate(&info); + return IS_ERR(d) ? NULL : d; } extern unsigned int irq_create_direct_mapping(struct irq_domain *host); @@ -419,7 +439,16 @@ static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node const struct irq_domain_ops *ops, void *host_data) { - return __irq_domain_add(of_node_to_fwnode(of_node), 0, ~0, 0, ops, host_data); + struct irq_domain_info info = { + .fwnode = of_node_to_fwnode(of_node), + .hwirq_max = ~0U, + .ops = ops, + .host_data = host_data, + }; + struct irq_domain *d; + + d = irq_domain_instantiate(&info); + return IS_ERR(d) ? NULL : d; } static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle *fwnode, @@ -427,14 +456,33 @@ static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle * const struct irq_domain_ops *ops, void *host_data) { - return __irq_domain_add(fwnode, size, size, 0, ops, host_data); + struct irq_domain_info info = { + .fwnode = fwnode, + .size = size, + .hwirq_max = size, + .ops = ops, + .host_data = host_data, + }; + struct irq_domain *d; + + d = irq_domain_instantiate(&info); + return IS_ERR(d) ? NULL : d; } static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fwnode, const struct irq_domain_ops *ops, void *host_data) { - return __irq_domain_add(fwnode, 0, ~0, 0, ops, host_data); + struct irq_domain_info info = { + .fwnode = fwnode, + .hwirq_max = ~0, + .ops = ops, + .host_data = host_data, + }; + struct irq_domain *d; + + d = irq_domain_instantiate(&info); + return IS_ERR(d) ? NULL : d; } extern void irq_domain_remove(struct irq_domain *host); -- cgit v1.2.3 From 0b4b172b760efabf8a77ea17644d333fbb444d39 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:21 +0200 Subject: irqdomain: Remove __irq_domain_add() __irq_domain_add() has been replaced by irq_domain_instanciate() and so, it is no more used. Simply remove it. Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-21-herve.codina@bootlin.com --- include/linux/irqdomain.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 33a968fbdda2..02cd486ac354 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -184,7 +184,7 @@ enum { /* Irq domain is hierarchical */ IRQ_DOMAIN_FLAG_HIERARCHY = (1 << 0), - /* Irq domain name was allocated in __irq_domain_add() */ + /* Irq domain name was allocated internally */ IRQ_DOMAIN_NAME_ALLOCATED = (1 << 1), /* Irq domain is an IPI domain with virq per cpu */ @@ -307,10 +307,6 @@ struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info); struct irq_domain *devm_irq_domain_instantiate(struct device *dev, const struct irq_domain_info *info); -struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size, - irq_hw_number_t hwirq_max, int direct_max, - const struct irq_domain_ops *ops, - void *host_data); struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode, unsigned int size, unsigned int first_irq, -- cgit v1.2.3 From 1037e4c53e851682ff8d1ab656567a4d5a333c93 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 14 Jun 2024 12:58:48 +0300 Subject: cpu/hotplug: Add support for declaring CPU offlining not supported The ACPI MADT mailbox wakeup method doesn't allow to offline a CPU after it has been woken up. Currently, offlining is prevented based on the confidential computing attribute which is set for Intel TDX. But TDX is not the only possible user of the wake up method. The MADT wakeup can be implemented outside of a confidential computing environment. Offline support is a property of the wakeup method, not the CoCo implementation. Introduce cpu_hotplug_disable_offlining() that can be called to indicate that CPU offlining should be disabled. This function is going to replace CC_ATTR_HOTPLUG_DISABLED for ACPI MADT wakeup method. Signed-off-by: Kirill A. Shutemov Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Tested-by: Tao Liu Link: https://lore.kernel.org/r/20240614095904.1345461-4-kirill.shutemov@linux.intel.com --- include/linux/cpuhplock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/cpuhplock.h b/include/linux/cpuhplock.h index 431560bbd045..f7aa20f62b87 100644 --- a/include/linux/cpuhplock.h +++ b/include/linux/cpuhplock.h @@ -21,6 +21,7 @@ void cpus_read_lock(void); void cpus_read_unlock(void); int cpus_read_trylock(void); void lockdep_assert_cpus_held(void); +void cpu_hotplug_disable_offlining(void); void cpu_hotplug_disable(void); void cpu_hotplug_enable(void); void clear_tasks_mm_cpumask(int cpu); @@ -36,6 +37,7 @@ static inline void cpus_read_lock(void) { } static inline void cpus_read_unlock(void) { } static inline int cpus_read_trylock(void) { return true; } static inline void lockdep_assert_cpus_held(void) { } +static inline void cpu_hotplug_disable_offlining(void) { } static inline void cpu_hotplug_disable(void) { } static inline void cpu_hotplug_enable(void) { } static inline int remove_cpu(unsigned int cpu) { return -EPERM; } -- cgit v1.2.3 From 66e48e491d1e1a0f243ebfcb9639b23de1a5db5e Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 14 Jun 2024 12:58:49 +0300 Subject: cpu/hotplug, x86/acpi: Disable CPU offlining for ACPI MADT wakeup ACPI MADT doesn't allow to offline a CPU after it has been woken up. Currently, CPU hotplug is prevented based on the confidential computing attribute which is set for Intel TDX. But TDX is not the only possible user of the wake up method. Any platform that uses ACPI MADT wakeup method cannot offline CPU. Disable CPU offlining on ACPI MADT wakeup enumeration. This has no visible effects for users: currently, TDX guest is the only platform that uses the ACPI MADT wakeup method. Signed-off-by: Kirill A. Shutemov Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Tested-by: Tao Liu Link: https://lore.kernel.org/r/20240614095904.1345461-5-kirill.shutemov@linux.intel.com --- include/linux/cc_platform.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h index 60693a145894..caa4b4430634 100644 --- a/include/linux/cc_platform.h +++ b/include/linux/cc_platform.h @@ -81,16 +81,6 @@ enum cc_attr { */ CC_ATTR_GUEST_SEV_SNP, - /** - * @CC_ATTR_HOTPLUG_DISABLED: Hotplug is not supported or disabled. - * - * The platform/OS is running as a guest/virtual machine does not - * support CPU hotplug feature. - * - * Examples include TDX Guest. - */ - CC_ATTR_HOTPLUG_DISABLED, - /** * @CC_ATTR_HOST_SEV_SNP: AMD SNP enabled on the host. * -- cgit v1.2.3 From 6630cbce7cd7785f76b1055f33a71199ef28510b Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 14 Jun 2024 12:58:59 +0300 Subject: x86/acpi: Rename fields in the acpi_madt_multiproc_wakeup structure In order to support MADT wakeup structure version 1, provide more appropriate names for the fields in the structure. Rename 'mailbox_version' to 'version'. This field signifies the version of the structure and the related protocols, rather than the version of the mailbox. This field has not been utilized in the code thus far. Rename 'base_address' to 'mailbox_address' to clarify the kind of address it represents. In version 1, the structure includes the reset vector address. Clear and distinct naming helps to prevent any confusion. Signed-off-by: Kirill A. Shutemov Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Kai Huang Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Tested-by: Tao Liu Link: https://lore.kernel.org/r/20240614095904.1345461-15-kirill.shutemov@linux.intel.com --- include/acpi/actbl2.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index ae747c89d92c..fa63362469aa 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -1194,9 +1194,9 @@ struct acpi_madt_generic_translator { struct acpi_madt_multiproc_wakeup { struct acpi_subtable_header header; - u16 mailbox_version; + u16 version; u32 reserved; /* reserved - must be zero */ - u64 base_address; + u64 mailbox_address; }; #define ACPI_MULTIPROC_WAKEUP_MB_OS_SIZE 2032 -- cgit v1.2.3 From 1ceebe2e46720de02af4cf626dc847ecc4a263fd Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 14 Jun 2024 12:59:03 +0300 Subject: x86/acpi: Add support for CPU offlining for ACPI MADT wakeup method MADT Multiprocessor Wakeup structure version 1 brings support for CPU offlining: BIOS provides a reset vector where the CPU has to jump to for offlining itself. The new TEST mailbox command can be used to test whether the CPU offlined itself which means the BIOS has control over the CPU and can online it again via the ACPI MADT wakeup method. Add CPU offlining support for the ACPI MADT wakeup method by implementing custom cpu_die(), play_dead() and stop_this_cpu() SMP operations. CPU offlining makes it possible to hand over secondary CPUs over kexec, not limiting the second kernel to a single CPU. The change conforms to the approved ACPI spec change proposal. See the Link. Signed-off-by: Kirill A. Shutemov Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Thomas Gleixner Acked-by: Kai Huang Acked-by: Rafael J. Wysocki Tested-by: Tao Liu Link: https://lore.kernel.org/all/13356251.uLZWGnKmhe@kreacher Link: https://lore.kernel.org/r/20240614095904.1345461-19-kirill.shutemov@linux.intel.com --- include/acpi/actbl2.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index fa63362469aa..e27958ef8264 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -1197,8 +1197,20 @@ struct acpi_madt_multiproc_wakeup { u16 version; u32 reserved; /* reserved - must be zero */ u64 mailbox_address; + u64 reset_vector; }; +/* Values for Version field above */ + +enum acpi_madt_multiproc_wakeup_version { + ACPI_MADT_MP_WAKEUP_VERSION_NONE = 0, + ACPI_MADT_MP_WAKEUP_VERSION_V1 = 1, + ACPI_MADT_MP_WAKEUP_VERSION_RESERVED = 2, /* 2 and greater are reserved */ +}; + +#define ACPI_MADT_MP_WAKEUP_SIZE_V0 16 +#define ACPI_MADT_MP_WAKEUP_SIZE_V1 24 + #define ACPI_MULTIPROC_WAKEUP_MB_OS_SIZE 2032 #define ACPI_MULTIPROC_WAKEUP_MB_FIRMWARE_SIZE 2048 @@ -1211,7 +1223,8 @@ struct acpi_madt_multiproc_wakeup_mailbox { u8 reserved_firmware[ACPI_MULTIPROC_WAKEUP_MB_FIRMWARE_SIZE]; /* reserved for firmware use */ }; -#define ACPI_MP_WAKE_COMMAND_WAKEUP 1 +#define ACPI_MP_WAKE_COMMAND_WAKEUP 1 +#define ACPI_MP_WAKE_COMMAND_TEST 2 /* 17: CPU Core Interrupt Controller (ACPI 6.5) */ -- cgit v1.2.3 From d4a7d067e061c95c6387cf537258082074a4d299 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 17 Jun 2024 14:57:34 +0200 Subject: ASoC: soc-dai.h: Constify DAI ops auto_selectable_formats The core ASoC code does not modify contents of the 'auto_selectable_formats' array passed in 'struct snd_soc_dai_ops', so make it const for code safety. Reviewed-by: Herve Codina Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240617125735.582963-1-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 15ef268c9845..279223c4ef5e 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -361,7 +361,7 @@ struct snd_soc_dai_ops { * see * snd_soc_dai_get_fmt() */ - u64 *auto_selectable_formats; + const u64 *auto_selectable_formats; int num_auto_selectable_formats; /* probe ordering - for components with runtime dependencies */ -- cgit v1.2.3 From 614dc0fb76327dbd81abd4612fbc2e4ba8f205e6 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 5 Jun 2024 10:18:52 -0500 Subject: sev-guest: configfs-tsm: Allow the privlevel_floor attribute to be updated With the introduction of an SVSM, Linux will be running at a non-zero VMPL. Any request for an attestation report at a higher privilege VMPL than what Linux is currently running will result in an error. Allow for the privlevel_floor attribute to be updated dynamically. [ bp: Trim commit message. ] Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/5a736be9384aebd98a0b7c929660f8a97cbdc366.1717600736.git.thomas.lendacky@amd.com --- include/linux/tsm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tsm.h b/include/linux/tsm.h index de8324a2223c..50c5769657d8 100644 --- a/include/linux/tsm.h +++ b/include/linux/tsm.h @@ -54,7 +54,7 @@ struct tsm_report { */ struct tsm_ops { const char *name; - const unsigned int privlevel_floor; + unsigned int privlevel_floor; int (*report_new)(struct tsm_report *report, void *data); }; -- cgit v1.2.3 From 0e6a35b93745bb2d8b921fd0520ef730489d41a2 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 5 Jun 2024 10:18:53 -0500 Subject: fs/configfs: Add a callback to determine attribute visibility In order to support dynamic decisions as to whether an attribute should be created, add a callback that returns a bool to indicate whether the attribute should be displayed. If no callback is registered, the attribute is displayed by default. Co-developed-by: Dan Williams Signed-off-by: Dan Williams Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/e555c8740a263fab9f83b2cbb44da1af49a2813c.1717600736.git.thomas.lendacky@amd.com --- include/linux/configfs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 2606711adb18..c771e9d0d0b9 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -216,6 +216,9 @@ struct configfs_group_operations { struct config_group *(*make_group)(struct config_group *group, const char *name); void (*disconnect_notify)(struct config_group *group, struct config_item *item); void (*drop_item)(struct config_group *group, struct config_item *item); + bool (*is_visible)(struct config_item *item, struct configfs_attribute *attr, int n); + bool (*is_bin_visible)(struct config_item *item, struct configfs_bin_attribute *attr, + int n); }; struct configfs_subsystem { -- cgit v1.2.3 From 20dfee95936413708701eb151f419597fdd9d948 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 5 Jun 2024 10:18:54 -0500 Subject: x86/sev: Take advantage of configfs visibility support in TSM The TSM attestation report support provides multiple configfs attribute types (both for standard and binary attributes) to allow for additional attributes to be displayed for SNP as compared to TDX. With the ability to hide attributes via configfs, consolidate the multiple attribute groups into a single standard attribute group and a single binary attribute group. Modify the TDX support to hide the attributes that were previously "hidden" as a result of registering the selective attribute groups. Co-developed-by: Dan Williams Signed-off-by: Dan Williams Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Kuppuswamy Sathyanarayanan Link: https://lore.kernel.org/r/8873c45d0c8abc35aaf01d7833a55788a6905727.1717600736.git.thomas.lendacky@amd.com --- include/linux/tsm.h | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/tsm.h b/include/linux/tsm.h index 50c5769657d8..30d9d270b446 100644 --- a/include/linux/tsm.h +++ b/include/linux/tsm.h @@ -42,12 +42,40 @@ struct tsm_report { u8 *auxblob; }; +/** + * enum tsm_attr_index - index used to reference report attributes + * @TSM_REPORT_GENERATION: index of the report generation number attribute + * @TSM_REPORT_PROVIDER: index of the provider name attribute + * @TSM_REPORT_PRIVLEVEL: index of the desired privilege level attribute + * @TSM_REPORT_PRIVLEVEL_FLOOR: index of the minimum allowed privileg level attribute + */ +enum tsm_attr_index { + TSM_REPORT_GENERATION, + TSM_REPORT_PROVIDER, + TSM_REPORT_PRIVLEVEL, + TSM_REPORT_PRIVLEVEL_FLOOR, +}; + +/** + * enum tsm_bin_attr_index - index used to reference binary report attributes + * @TSM_REPORT_INBLOB: index of the binary report input attribute + * @TSM_REPORT_OUTBLOB: index of the binary report output attribute + * @TSM_REPORT_AUXBLOB: index of the binary auxiliary data attribute + */ +enum tsm_bin_attr_index { + TSM_REPORT_INBLOB, + TSM_REPORT_OUTBLOB, + TSM_REPORT_AUXBLOB, +}; + /** * struct tsm_ops - attributes and operations for tsm instances * @name: tsm id reflected in /sys/kernel/config/tsm/report/$report/provider * @privlevel_floor: convey base privlevel for nested scenarios * @report_new: Populate @report with the report blob and auxblob * (optional), return 0 on successful population, or -errno otherwise + * @report_attr_visible: show or hide a report attribute entry + * @report_bin_attr_visible: show or hide a report binary attribute entry * * Implementation specific ops, only one is expected to be registered at * a time i.e. only one of "sev-guest", "tdx-guest", etc. @@ -56,14 +84,10 @@ struct tsm_ops { const char *name; unsigned int privlevel_floor; int (*report_new)(struct tsm_report *report, void *data); + bool (*report_attr_visible)(int n); + bool (*report_bin_attr_visible)(int n); }; -extern const struct config_item_type tsm_report_default_type; - -/* publish @privlevel, @privlevel_floor, and @auxblob attributes */ -extern const struct config_item_type tsm_report_extra_type; - -int tsm_register(const struct tsm_ops *ops, void *priv, - const struct config_item_type *type); +int tsm_register(const struct tsm_ops *ops, void *priv); int tsm_unregister(const struct tsm_ops *ops); #endif /* __TSM_H */ -- cgit v1.2.3 From 627dc671518b7f004ce04c45e8711f8dca94a57c Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 5 Jun 2024 10:18:55 -0500 Subject: x86/sev: Extend the config-fs attestation support for an SVSM When an SVSM is present, the guest can also request attestation reports from it. These SVSM attestation reports can be used to attest the SVSM and any services running within the SVSM. Extend the config-fs attestation support to provide such. This involves creating four new config-fs attributes: - 'service-provider' (input) This attribute is used to determine whether the attestation request should be sent to the specified service provider or to the SEV firmware. The SVSM service provider is represented by the value 'svsm'. - 'service_guid' (input) Used for requesting the attestation of a single service within the service provider. A null GUID implies that the SVSM_ATTEST_SERVICES call should be used to request the attestation report. A non-null GUID implies that the SVSM_ATTEST_SINGLE_SERVICE call should be used. - 'service_manifest_version' (input) Used with the SVSM_ATTEST_SINGLE_SERVICE call, the service version represents a specific service manifest version be used for the attestation report. - 'manifestblob' (output) Used to return the service manifest associated with the attestation report. Only display these new attributes when running under an SVSM. [ bp: Massage. - s/svsm_attestation_call/svsm_attest_call/g ] Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/965015dce3c76bb8724839d50c5dea4e4b5d598f.1717600736.git.thomas.lendacky@amd.com --- include/linux/tsm.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/tsm.h b/include/linux/tsm.h index 30d9d270b446..11b0c525be30 100644 --- a/include/linux/tsm.h +++ b/include/linux/tsm.h @@ -4,6 +4,7 @@ #include #include +#include #define TSM_INBLOB_MAX 64 #define TSM_OUTBLOB_MAX SZ_32K @@ -19,11 +20,17 @@ * @privlevel: optional privilege level to associate with @outblob * @inblob_len: sizeof @inblob * @inblob: arbitrary input data + * @service_provider: optional name of where to obtain the tsm report blob + * @service_guid: optional service-provider service guid to attest + * @service_manifest_version: optional service-provider service manifest version requested */ struct tsm_desc { unsigned int privlevel; size_t inblob_len; u8 inblob[TSM_INBLOB_MAX]; + char *service_provider; + guid_t service_guid; + unsigned int service_manifest_version; }; /** @@ -33,6 +40,8 @@ struct tsm_desc { * @outblob: generated evidence to provider to the attestation agent * @auxblob_len: sizeof(@auxblob) * @auxblob: (optional) auxiliary data to the report (e.g. certificate data) + * @manifestblob_len: sizeof(@manifestblob) + * @manifestblob: (optional) manifest data associated with the report */ struct tsm_report { struct tsm_desc desc; @@ -40,6 +49,8 @@ struct tsm_report { u8 *outblob; size_t auxblob_len; u8 *auxblob; + size_t manifestblob_len; + u8 *manifestblob; }; /** @@ -48,12 +59,18 @@ struct tsm_report { * @TSM_REPORT_PROVIDER: index of the provider name attribute * @TSM_REPORT_PRIVLEVEL: index of the desired privilege level attribute * @TSM_REPORT_PRIVLEVEL_FLOOR: index of the minimum allowed privileg level attribute + * @TSM_REPORT_SERVICE_PROVIDER: index of the service provider identifier attribute + * @TSM_REPORT_SERVICE_GUID: index of the service GUID attribute + * @TSM_REPORT_SERVICE_MANIFEST_VER: index of the service manifest version attribute */ enum tsm_attr_index { TSM_REPORT_GENERATION, TSM_REPORT_PROVIDER, TSM_REPORT_PRIVLEVEL, TSM_REPORT_PRIVLEVEL_FLOOR, + TSM_REPORT_SERVICE_PROVIDER, + TSM_REPORT_SERVICE_GUID, + TSM_REPORT_SERVICE_MANIFEST_VER, }; /** @@ -61,11 +78,13 @@ enum tsm_attr_index { * @TSM_REPORT_INBLOB: index of the binary report input attribute * @TSM_REPORT_OUTBLOB: index of the binary report output attribute * @TSM_REPORT_AUXBLOB: index of the binary auxiliary data attribute + * @TSM_REPORT_MANIFESTBLOB: index of the binary manifest data attribute */ enum tsm_bin_attr_index { TSM_REPORT_INBLOB, TSM_REPORT_OUTBLOB, TSM_REPORT_AUXBLOB, + TSM_REPORT_MANIFESTBLOB, }; /** -- cgit v1.2.3 From 8cb2dbf94e44bcde4cff0223f2f900f8fb9083a4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 17 Jun 2024 20:31:31 +0200 Subject: irqdomain: Make build work for CONFIG_GENERIC_IRQ_CHIP=n ld: kernel/irq/irqdomain.o: in function `irq_domain_instantiate': kernel/irq/irqdomain.c:296:(.text+0x10dd): undefined reference to `irq_domain_alloc_generic_chips' ld: kernel/irq/irqdomain.c:313:(.text+0x1218): undefined reference to `irq_domain_remove_generic_chips' ld: kernel/irq/irqdomain.o: in function `irq_domain_remove': kernel/irq/irqdomain.c:349:(.text+0x1ddf): undefined reference to `irq_domain_remove_generic_chips' Provide the required stubs. Fixes: e6f67ce32e8e ("irqdomain: Add support for generic irq chips creation before publishing a domain") Reported-by: Borislav Betkov Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 712bcee56efc..1f5dbf1f92c9 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1182,9 +1182,19 @@ int devm_irq_setup_generic_chip(struct device *dev, struct irq_chip_generic *gc, struct irq_chip_generic *irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq); +#ifdef CONFIG_GENERIC_IRQ_CHIP int irq_domain_alloc_generic_chips(struct irq_domain *d, const struct irq_domain_chip_generic_info *info); void irq_domain_remove_generic_chips(struct irq_domain *d); +#else +static inline int +irq_domain_alloc_generic_chips(struct irq_domain *d, + const struct irq_domain_chip_generic_info *info) +{ + return -EINVAL; +} +static inline void irq_domain_remove_generic_chips(struct irq_domain *d) { } +#endif /* CONFIG_GENERIC_IRQ_CHIP */ int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, int num_ct, const char *name, -- cgit v1.2.3 From efb459303dd5dd6e198a0d58322dc04c3356dc23 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Wed, 12 Jun 2024 17:04:02 +0200 Subject: net: Move dev_set_hwtstamp_phylib to net/core/dev.h This declaration was added to the header to be called from ethtool. ethtool is separated from core for code organization but it is not really a separate entity, it controls very core things. As ethtool is an internal stuff it is not wise to have it in netdevice.h. Move the declaration to net/core/dev.h instead. Remove the EXPORT_SYMBOL_GPL call as ethtool can not be built as a module. Reviewed-by: Willem de Bruijn Signed-off-by: Kory Maincent Link: https://lore.kernel.org/r/20240612-feature_ptp_netnext-v15-2-b2a086257b63@bootlin.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 85111502cf8f..c83b390191d4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3904,9 +3904,6 @@ int generic_hwtstamp_get_lower(struct net_device *dev, int generic_hwtstamp_set_lower(struct net_device *dev, struct kernel_hwtstamp_config *kernel_cfg, struct netlink_ext_ack *extack); -int dev_set_hwtstamp_phylib(struct net_device *dev, - struct kernel_hwtstamp_config *cfg, - struct netlink_ext_ack *extack); int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata); unsigned int dev_get_flags(const struct net_device *); int __dev_change_flags(struct net_device *dev, unsigned int flags, -- cgit v1.2.3 From 41474d25bec56900e3a018907784b0abfe5a6a9e Mon Sep 17 00:00:00 2001 From: Nick Hollinghurst Date: Fri, 16 Feb 2024 18:48:55 +0000 Subject: drm: Add DRM_MODE_TV_MODE_MONOCHROME Add this as a value for enum_drm_connector_tv_mode, represented by the string "Mono", to generate video with no colour encoding or bursts. Define it to have no pedestal (since only NTSC-M calls for a pedestal). Change default mode creation to acommodate the new tv_mode value which comprises both 525-line and 625-line formats. Acked-by: Daniel Vetter Signed-off-by: Nick Hollinghurst Signed-off-by: Dave Stevenson Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240216184857.245372-2-dave.stevenson@raspberrypi.com --- include/drm/drm_connector.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index f750765d8fbc..c754651044d4 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -201,6 +201,13 @@ enum drm_connector_tv_mode { */ DRM_MODE_TV_MODE_SECAM, + /** + * @DRM_MODE_TV_MODE_MONOCHROME: Use timings appropriate to + * the DRM mode, including equalizing pulses for a 525-line + * or 625-line mode, with no pedestal or color encoding. + */ + DRM_MODE_TV_MODE_MONOCHROME, + /** * @DRM_MODE_TV_MODE_MAX: Number of analog TV output modes. * -- cgit v1.2.3 From 777b8afb8179155353ec14b1d8153122410aba29 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 15 Jun 2024 20:00:27 +0800 Subject: net: phy: introduce core support for phy-mode = "10g-qxgmii" 10G-QXGMII is a MAC-to-PHY interface defined by the USXGMII multiport specification. It uses the same signaling as USXGMII, but it multiplexes 4 ports over the link, resulting in a maximum speed of 2.5G per port. Some in-tree SoCs like the NXP LS1028A use "usxgmii" when they mean either the single-port USXGMII or the quad-port 10G-QXGMII variant, and they could get away just fine with that thus far. But there is a need to distinguish between the 2 as far as SerDes drivers are concerned. Signed-off-by: Vladimir Oltean Signed-off-by: Luo Jie Reviewed-by: Andrew Lunn Reviewed-by: Russell King (Oracle) Signed-off-by: Paolo Abeni --- include/linux/phy.h | 4 ++++ include/linux/phylink.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index e6e83304558e..205fccfc0f60 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -128,6 +128,7 @@ extern const int phy_10gbit_features_array[1]; * @PHY_INTERFACE_MODE_10GKR: 10GBASE-KR - with Clause 73 AN * @PHY_INTERFACE_MODE_QUSGMII: Quad Universal SGMII * @PHY_INTERFACE_MODE_1000BASEKX: 1000Base-KX - with Clause 73 AN + * @PHY_INTERFACE_MODE_10G_QXGMII: 10G-QXGMII - 4 ports over 10G USXGMII * @PHY_INTERFACE_MODE_MAX: Book keeping * * Describes the interface between the MAC and PHY. @@ -168,6 +169,7 @@ typedef enum { PHY_INTERFACE_MODE_10GKR, PHY_INTERFACE_MODE_QUSGMII, PHY_INTERFACE_MODE_1000BASEKX, + PHY_INTERFACE_MODE_10G_QXGMII, PHY_INTERFACE_MODE_MAX, } phy_interface_t; @@ -289,6 +291,8 @@ static inline const char *phy_modes(phy_interface_t interface) return "100base-x"; case PHY_INTERFACE_MODE_QUSGMII: return "qusgmii"; + case PHY_INTERFACE_MODE_10G_QXGMII: + return "10g-qxgmii"; default: return "unknown"; } diff --git a/include/linux/phylink.h b/include/linux/phylink.h index a30a692acc32..2381e07429a2 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -654,6 +654,7 @@ static inline int phylink_get_link_timer_ns(phy_interface_t interface) case PHY_INTERFACE_MODE_SGMII: case PHY_INTERFACE_MODE_QSGMII: case PHY_INTERFACE_MODE_USXGMII: + case PHY_INTERFACE_MODE_10G_QXGMII: return 1600000; case PHY_INTERFACE_MODE_1000BASEX: -- cgit v1.2.3 From c2bc958b2b03e361f14df99983bc64a39a7323a3 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 17 Jun 2024 13:06:27 +0200 Subject: fbdev: vesafb: Detect VGA compatibility from screen info's VESA attributes Test the vesa_attributes field in struct screen_info for compatibility with VGA hardware. Vesafb currently tests bit 1 in screen_info's capabilities field which indicates a 64-bit lfb address and is unrelated to VGA compatibility. Section 4.4 of the Vesa VBE 2.0 specifications defines that bit 5 in the mode's attributes field signals VGA compatibility. The mode is compatible with VGA hardware if the bit is clear. In that case, the driver can access VGA state of the VBE's underlying hardware. The vesafb driver uses this feature to program the color LUT in palette modes. Without, colors might be incorrect. The problem got introduced in commit 89ec4c238e7a ("[PATCH] vesafb: Fix incorrect logo colors in x86_64"). It incorrectly stores the mode attributes in the screen_info's capabilities field and updates vesafb accordingly. Later, commit 5e8ddcbe8692 ("Video mode probing support for the new x86 setup code") fixed the screen_info, but did not update vesafb. Color output still tends to work, because bit 1 in capabilities is usually 0. Besides fixing the bug in vesafb, this commit introduces a helper that reads the correct bit from screen_info. Signed-off-by: Thomas Zimmermann Fixes: 5e8ddcbe8692 ("Video mode probing support for the new x86 setup code") Reviewed-by: Javier Martinez Canillas Cc: # v2.6.23+ Signed-off-by: Helge Deller --- include/linux/screen_info.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h index 75303c126285..6a4a3cec4638 100644 --- a/include/linux/screen_info.h +++ b/include/linux/screen_info.h @@ -49,6 +49,16 @@ static inline u64 __screen_info_lfb_size(const struct screen_info *si, unsigned return lfb_size; } +static inline bool __screen_info_vbe_mode_nonvga(const struct screen_info *si) +{ + /* + * VESA modes typically run on VGA hardware. Set bit 5 signals that this + * is not the case. Drivers can then not make use of VGA resources. See + * Sec 4.4 of the VBE 2.0 spec. + */ + return si->vesa_attributes & BIT(5); +} + static inline unsigned int __screen_info_video_type(unsigned int type) { switch (type) { -- cgit v1.2.3 From 2fbafecb0f05818e25f6c926c6f9ad9ef597429c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 17 Jun 2024 15:03:19 +0200 Subject: ASoC: Constify of_phandle_args in snd_soc_dai_driver ASoC core code does not modify contents of 'of_phandle_args' in 'struct snd_soc_dai_driver', so the pointer can be made as a pointer to const. This makes code safer, serves as clear annotation of core's intentions and allows putting pointed structures in rodata (if ever applicable). Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240617-n-asoc-const-auto-selectable-formats-v1-1-8004f346ee38@linaro.org Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 15ef268c9845..e6c61c79c483 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -413,7 +413,7 @@ struct snd_soc_dai_driver { unsigned int id; unsigned int base; struct snd_soc_dobj dobj; - struct of_phandle_args *dai_args; + const struct of_phandle_args *dai_args; /* ops */ const struct snd_soc_dai_ops *ops; -- cgit v1.2.3 From 020b37d06f97de289940805bc821190d5858eda0 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 17 Jun 2024 15:03:20 +0200 Subject: ASoC: Constify of_phandle_args in snd_soc_dai_link_component ASoC core code does not modify contents of 'of_phandle_args' in 'struct snd_soc_dai_link_component', so the pointer can be made as a pointer to const. This makes code safer, serves as clear annotation of core's intentions and allows putting pointed structures in rodata (if ever applicable). Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240617-n-asoc-const-auto-selectable-formats-v1-2-8004f346ee38@linaro.org Signed-off-by: Mark Brown --- include/sound/soc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 33671437ee89..f02a51694ab4 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -675,7 +675,7 @@ struct snd_soc_dai_link_component { const char *name; struct device_node *of_node; const char *dai_name; - struct of_phandle_args *dai_args; + const struct of_phandle_args *dai_args; }; /* -- cgit v1.2.3 From f3ac3da7e4d0957b3402fb31a4ca480e739e086f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 17 Jun 2024 15:03:21 +0200 Subject: ASoC: Constify passed data to core function Several ASoC functions receive pointers to data which is not modified, e.g. pointers to 'snd_soc_dai', 'snd_soc_pcm_runtime', 'snd_pcm_hw_params' and 'snd_soc_dai_link'. All these pointers can be made as a pointer to const. This makes code safer, serves as clear annotation of function's intentions (no ownership passed to the function, no modifications) and allows putting pointed structures in rodata (if ever applicable). Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240617-n-asoc-const-auto-selectable-formats-v1-3-8004f346ee38@linaro.org Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 18 ++++++++++-------- include/sound/soc.h | 17 +++++++++-------- 2 files changed, 19 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index e6c61c79c483..3c45b418270e 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -180,8 +180,8 @@ int snd_soc_dai_set_pll(struct snd_soc_dai *dai, int snd_soc_dai_set_bclk_ratio(struct snd_soc_dai *dai, unsigned int ratio); /* Digital Audio interface formatting */ -int snd_soc_dai_get_fmt_max_priority(struct snd_soc_pcm_runtime *rtd); -u64 snd_soc_dai_get_fmt(struct snd_soc_dai *dai, int priority); +int snd_soc_dai_get_fmt_max_priority(const struct snd_soc_pcm_runtime *rtd); +u64 snd_soc_dai_get_fmt(const struct snd_soc_dai *dai, int priority); int snd_soc_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt); int snd_soc_dai_set_tdm_slot(struct snd_soc_dai *dai, @@ -202,7 +202,7 @@ int snd_soc_dai_get_channel_map(struct snd_soc_dai *dai, unsigned int *tx_num, unsigned int *tx_slot, unsigned int *rx_num, unsigned int *rx_slot); -int snd_soc_dai_is_dummy(struct snd_soc_dai *dai); +int snd_soc_dai_is_dummy(const struct snd_soc_dai *dai); int snd_soc_dai_hw_params(struct snd_soc_dai *dai, struct snd_pcm_substream *substream, @@ -218,7 +218,7 @@ void snd_soc_dai_suspend(struct snd_soc_dai *dai); void snd_soc_dai_resume(struct snd_soc_dai *dai); int snd_soc_dai_compress_new(struct snd_soc_dai *dai, struct snd_soc_pcm_runtime *rtd, int num); -bool snd_soc_dai_stream_valid(struct snd_soc_dai *dai, int stream); +bool snd_soc_dai_stream_valid(const struct snd_soc_dai *dai, int stream); void snd_soc_dai_link_set_capabilities(struct snd_soc_dai_link *dai_link); void snd_soc_dai_action(struct snd_soc_dai *dai, int stream, int action); @@ -232,7 +232,7 @@ static inline void snd_soc_dai_deactivate(struct snd_soc_dai *dai, { snd_soc_dai_action(dai, stream, -1); } -int snd_soc_dai_active(struct snd_soc_dai *dai); +int snd_soc_dai_active(const struct snd_soc_dai *dai); int snd_soc_pcm_dai_probe(struct snd_soc_pcm_runtime *rtd, int order); int snd_soc_pcm_dai_remove(struct snd_soc_pcm_runtime *rtd, int order); @@ -271,7 +271,7 @@ int snd_soc_dai_compr_get_metadata(struct snd_soc_dai *dai, struct snd_compr_stream *cstream, struct snd_compr_metadata *metadata); -const char *snd_soc_dai_name_get(struct snd_soc_dai *dai); +const char *snd_soc_dai_name_get(const struct snd_soc_dai *dai); struct snd_soc_dai_ops { /* DAI driver callbacks */ @@ -518,7 +518,8 @@ static inline void snd_soc_dai_init_dma_data(struct snd_soc_dai *dai, void *play snd_soc_dai_dma_data_set_capture(dai, capture); } -static inline unsigned int snd_soc_dai_tdm_mask_get(struct snd_soc_dai *dai, int stream) +static inline unsigned int snd_soc_dai_tdm_mask_get(const struct snd_soc_dai *dai, + int stream) { return dai->stream[stream].tdm_mask; } @@ -529,7 +530,8 @@ static inline void snd_soc_dai_tdm_mask_set(struct snd_soc_dai *dai, int stream, dai->stream[stream].tdm_mask = tdm_mask; } -static inline unsigned int snd_soc_dai_stream_active(struct snd_soc_dai *dai, int stream) +static inline unsigned int snd_soc_dai_stream_active(const struct snd_soc_dai *dai, + int stream) { /* see snd_soc_dai_action() for setup */ return dai->stream[stream].active; diff --git a/include/sound/soc.h b/include/sound/soc.h index f02a51694ab4..a8e66bbf932b 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -534,10 +534,10 @@ static inline int snd_soc_set_dmi_name(struct snd_soc_card *card, /* Utility functions to get clock rates from various things */ int snd_soc_calc_frame_size(int sample_size, int channels, int tdm_slots); -int snd_soc_params_to_frame_size(struct snd_pcm_hw_params *params); +int snd_soc_params_to_frame_size(const struct snd_pcm_hw_params *params); int snd_soc_calc_bclk(int fs, int sample_size, int channels, int tdm_slots); -int snd_soc_params_to_bclk(struct snd_pcm_hw_params *parms); -int snd_soc_tdm_params_to_bclk(struct snd_pcm_hw_params *params, +int snd_soc_params_to_bclk(const struct snd_pcm_hw_params *parms); +int snd_soc_tdm_params_to_bclk(const struct snd_pcm_hw_params *params, int tdm_width, int tdm_slots, int slot_multiple); /* set runtime hw params */ @@ -837,7 +837,8 @@ struct snd_soc_dai_link { #endif }; -static inline int snd_soc_link_num_ch_map(struct snd_soc_dai_link *link) { +static inline int snd_soc_link_num_ch_map(const struct snd_soc_dai_link *link) +{ return max(link->num_cpus, link->num_codecs); } @@ -1299,7 +1300,7 @@ struct soc_enum { #endif }; -static inline bool snd_soc_volsw_is_stereo(struct soc_mixer_control *mc) +static inline bool snd_soc_volsw_is_stereo(const struct soc_mixer_control *mc) { if (mc->reg == mc->rreg && mc->shift == mc->rshift) return false; @@ -1311,7 +1312,7 @@ static inline bool snd_soc_volsw_is_stereo(struct soc_mixer_control *mc) return true; } -static inline unsigned int snd_soc_enum_val_to_item(struct soc_enum *e, +static inline unsigned int snd_soc_enum_val_to_item(const struct soc_enum *e, unsigned int val) { unsigned int i; @@ -1326,7 +1327,7 @@ static inline unsigned int snd_soc_enum_val_to_item(struct soc_enum *e, return 0; } -static inline unsigned int snd_soc_enum_item_to_val(struct soc_enum *e, +static inline unsigned int snd_soc_enum_item_to_val(const struct soc_enum *e, unsigned int item) { if (!e->values) @@ -1401,7 +1402,7 @@ unsigned int snd_soc_daifmt_parse_clock_provider_raw(struct device_node *np, snd_soc_daifmt_clock_provider_from_bitmap( \ snd_soc_daifmt_parse_clock_provider_as_bitmap(np, prefix)) -int snd_soc_get_stream_cpu(struct snd_soc_dai_link *dai_link, int stream); +int snd_soc_get_stream_cpu(const struct snd_soc_dai_link *dai_link, int stream); int snd_soc_get_dlc(const struct of_phandle_args *args, struct snd_soc_dai_link_component *dlc); int snd_soc_of_get_dlc(struct device_node *of_node, -- cgit v1.2.3 From 785d64c4941221044940ab199e6625af17296470 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 17 Jun 2024 15:03:22 +0200 Subject: ASoC: Constify DAI passed to get_channel_map get_channel_map() is supposed to obtain map of channels without modifying the state of the given DAI, so make the pointer to 'struct snd_soc_dai' as pointing to const. Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240617-n-asoc-const-auto-selectable-formats-v1-4-8004f346ee38@linaro.org Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 3c45b418270e..f22969298de1 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -198,7 +198,7 @@ int snd_soc_dai_digital_mute(struct snd_soc_dai *dai, int mute, int direction); -int snd_soc_dai_get_channel_map(struct snd_soc_dai *dai, +int snd_soc_dai_get_channel_map(const struct snd_soc_dai *dai, unsigned int *tx_num, unsigned int *tx_slot, unsigned int *rx_num, unsigned int *rx_slot); @@ -307,7 +307,7 @@ struct snd_soc_dai_ops { int (*set_channel_map)(struct snd_soc_dai *dai, unsigned int tx_num, const unsigned int *tx_slot, unsigned int rx_num, const unsigned int *rx_slot); - int (*get_channel_map)(struct snd_soc_dai *dai, + int (*get_channel_map)(const struct snd_soc_dai *dai, unsigned int *tx_num, unsigned int *tx_slot, unsigned int *rx_num, unsigned int *rx_slot); int (*set_tristate)(struct snd_soc_dai *dai, int tristate); -- cgit v1.2.3 From de267e7a6ea8e6fa29af2287adfc9fc9d87e6dc9 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 17 Jun 2024 15:03:23 +0200 Subject: ASoC: Constify return of snd_soc_dai_get_pcm_stream() Returned 'struct snd_soc_pcm_stream' by snd_soc_dai_get_pcm_stream() is not modified by the users, so it can be changed as pointer to const. This is a necessary step towards making the 'dai->driver' a pointer to const. Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240617-n-asoc-const-auto-selectable-formats-v1-5-8004f346ee38@linaro.org Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index f22969298de1..bd249710d716 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -473,7 +473,7 @@ struct snd_soc_dai { unsigned int probed:1; }; -static inline struct snd_soc_pcm_stream * +static inline const struct snd_soc_pcm_stream * snd_soc_dai_get_pcm_stream(const struct snd_soc_dai *dai, int stream) { return (stream == SNDRV_PCM_STREAM_PLAYBACK) ? -- cgit v1.2.3 From d6a711a898672dd873aab3844f754a3ca40723a5 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Tue, 18 Jun 2024 15:29:51 +0200 Subject: spi: Fix OCTAL mode support Add OCTAL mode support. Issue detected using "--octal" spidev_test's option. Signed-off-by: Patrice Chotard Link: https://msgid.link/r/20240618132951.2743935-4-patrice.chotard@foss.st.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index e8e1e798924f..98fdef6e28f2 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -1085,12 +1085,13 @@ struct spi_transfer { unsigned dummy_data:1; unsigned cs_off:1; unsigned cs_change:1; - unsigned tx_nbits:3; - unsigned rx_nbits:3; + unsigned tx_nbits:4; + unsigned rx_nbits:4; unsigned timestamped:1; #define SPI_NBITS_SINGLE 0x01 /* 1-bit transfer */ #define SPI_NBITS_DUAL 0x02 /* 2-bit transfer */ #define SPI_NBITS_QUAD 0x04 /* 4-bit transfer */ +#define SPI_NBITS_OCTAL 0x08 /* 8-bit transfer */ u8 bits_per_word; struct spi_delay delay; struct spi_delay cs_change_delay; -- cgit v1.2.3 From 702eb71fd6501b3566283f8c96d7ccc6ddd662e9 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 17 Jun 2024 18:23:00 +0200 Subject: fsnotify: Do not generate events for O_PATH file descriptors Currently we will not generate FS_OPEN events for O_PATH file descriptors but we will generate FS_CLOSE events for them. This is asymmetry is confusing. Arguably no fsnotify events should be generated for O_PATH file descriptors as they cannot be used to access or modify file content, they are just convenient handles to file objects like paths. So fix the asymmetry by stopping to generate FS_CLOSE for O_PATH file descriptors. Cc: Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20240617162303.1596-1-jack@suse.cz Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- include/linux/fsnotify.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 4da80e92f804..278620e063ab 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -112,7 +112,13 @@ static inline int fsnotify_file(struct file *file, __u32 mask) { const struct path *path; - if (file->f_mode & FMODE_NONOTIFY) + /* + * FMODE_NONOTIFY are fds generated by fanotify itself which should not + * generate new events. We also don't want to generate events for + * FMODE_PATH fds (involves open & close events) as they are just + * handle creation / destruction events and not "real" file events. + */ + if (file->f_mode & (FMODE_NONOTIFY | FMODE_PATH)) return 0; path = &file->f_path; -- cgit v1.2.3 From 9f774c757e3fb2ac32dc4377e8f21f3364a8df81 Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Fri, 14 Jun 2024 21:36:45 +0800 Subject: ASoc: tas2781: Enable RCA-based playback without DSP firmware download In only loading RCA (Reconfigurable Architecture) binary case, no DSP program will be working inside tas2563/tas2781, that is dsp-bypass mode, do not support speaker protection, or audio acoustic algorithms in this mode. Fixes: ef3bcde75d06 ("ASoC: tas2781: Add tas2781 driver") Signed-off-by: Shenghao Ding Reviewed-by: Pierre-Louis Bossart Link: https://msgid.link/r/20240614133646.910-1-shenghao-ding@ti.com Signed-off-by: Mark Brown --- include/sound/tas2781-dsp.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/tas2781-dsp.h b/include/sound/tas2781-dsp.h index 7fba7ea26a4b..3cda9da14f6d 100644 --- a/include/sound/tas2781-dsp.h +++ b/include/sound/tas2781-dsp.h @@ -117,10 +117,17 @@ struct tasdevice_fw { struct device *dev; }; -enum tasdevice_dsp_fw_state { - TASDEVICE_DSP_FW_NONE = 0, +enum tasdevice_fw_state { + /* Driver in startup mode, not load any firmware. */ TASDEVICE_DSP_FW_PENDING, + /* DSP firmware in the system, but parsing error. */ TASDEVICE_DSP_FW_FAIL, + /* + * Only RCA (Reconfigurable Architecture) firmware load + * successfully. + */ + TASDEVICE_RCA_FW_OK, + /* Both RCA and DSP firmware load successfully. */ TASDEVICE_DSP_FW_ALL_OK, }; -- cgit v1.2.3 From a6816314af5749cd88944bfdceb270c627cdf348 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 3 May 2024 11:17:32 -0700 Subject: KVM: Introduce vcpu->wants_to_run Introduce vcpu->wants_to_run to indicate when a vCPU is in its core run loop, i.e. when the vCPU is running the KVM_RUN ioctl and immediate_exit was not set. Replace all references to vcpu->run->immediate_exit with !vcpu->wants_to_run to avoid TOCTOU races with userspace. For example, a malicious userspace could invoked KVM_RUN with immediate_exit=true and then after KVM reads it to set wants_to_run=false, flip it to false. This would result in the vCPU running in KVM_RUN with wants_to_run=false. This wouldn't cause any real bugs today but is a dangerous landmine. Signed-off-by: David Matlack Link: https://lore.kernel.org/r/20240503181734.1467938-2-dmatlack@google.com Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7b9d2633a931..d72ced3e74d1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -378,6 +378,7 @@ struct kvm_vcpu { bool dy_eligible; } spin_loop; #endif + bool wants_to_run; bool preempted; bool ready; bool scheduled_out; -- cgit v1.2.3 From 4b23e0c199b20fa6fe9655b3d0e12d6c6f18c27f Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 3 May 2024 11:17:33 -0700 Subject: KVM: Ensure new code that references immediate_exit gets extra scrutiny Ensure that any new KVM code that references immediate_exit gets extra scrutiny by renaming it to immediate_exit__unsafe in kernel code. All fields in struct kvm_run are subject to TOCTOU races since they are mapped into userspace, which may be malicious or buggy. To protect KVM, introduces a new macro that appends __unsafe to select field names in struct kvm_run, hinting to developers and reviewers that accessing such fields must be done carefully. Apply the new macro to immediate_exit, since userspace can make immediate_exit inconsistent with vcpu->wants_to_run, i.e. accessing immediate_exit directly could lead to unexpected bugs in the future. Signed-off-by: David Matlack Link: https://lore.kernel.org/r/20240503181734.1467938-3-dmatlack@google.com [sean: massage changelog] Signed-off-by: Sean Christopherson --- include/uapi/linux/kvm.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d03842abae57..795773f5db63 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -192,11 +192,24 @@ struct kvm_xen_exit { /* Flags that describe what fields in emulation_failure hold valid data. */ #define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0) +/* + * struct kvm_run can be modified by userspace at any time, so KVM must be + * careful to avoid TOCTOU bugs. In order to protect KVM, HINT_UNSAFE_IN_KVM() + * renames fields in struct kvm_run from to __unsafe when + * compiled into the kernel, ensuring that any use within KVM is obvious and + * gets extra scrutiny. + */ +#ifdef __KERNEL__ +#define HINT_UNSAFE_IN_KVM(_symbol) _symbol##__unsafe +#else +#define HINT_UNSAFE_IN_KVM(_symbol) _symbol +#endif + /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ __u8 request_interrupt_window; - __u8 immediate_exit; + __u8 HINT_UNSAFE_IN_KVM(immediate_exit); __u8 padding1[6]; /* out */ -- cgit v1.2.3 From 395e73bd8d35fa9b8505e44f63a2a10abde09cce Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 9 Jun 2024 20:12:19 -0700 Subject: srcu: Add NUM_ACTIVE_SRCU_POLL_OLDSTATE This commit adds NUM_ACTIVE_SRCU_POLL_OLDSTATE, which gives the maximum number of distinct return values from get_state_synchronize_rcu() that can, at a given point in time, correspond to not-completed SRCU grace periods. Reported-by: Kent Overstreet Closes: https://lore.kernel.org/all/irycqy4sinjdgm2hkyix2bffunpcmuwgeufsx6nlljvqme3wiu@ify3zdnrmzph/ Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 236610e4a8fa..f664cba7a80c 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -61,6 +61,10 @@ unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp); unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp); bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie); +// Maximum number of unsigned long values corresponding to +// not-yet-completed SRCU grace periods. +#define NUM_ACTIVE_SRCU_POLL_OLDSTATE 2 + #ifdef CONFIG_NEED_SRCU_NMI_SAFE int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp); void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp); -- cgit v1.2.3 From e206f33e2c0774276a0497fe538472e12016a362 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 14 Jun 2024 13:26:44 -0700 Subject: srcu: Fill out polled grace-period APIs This commit adds the get_completed_synchronize_srcu() and the same_state_synchronize_srcu() functions. The first returns a cookie that is always interpreted as corresponding to an expired grace period. The second does an equality comparison of a pair of cookies. Signed-off-by: Paul E. McKenney Cc: Kent Overstreet --- include/linux/srcu.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index f664cba7a80c..6f6cb5fc1242 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -57,6 +57,20 @@ void cleanup_srcu_struct(struct srcu_struct *ssp); int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp); void synchronize_srcu(struct srcu_struct *ssp); + +#define SRCU_GET_STATE_COMPLETED 0x1 + +/** + * get_completed_synchronize_srcu - Return a pre-completed polled state cookie + * + * Returns a value that poll_state_synchronize_srcu() will always treat + * as a cookie whose grace period has already completed. + */ +static inline unsigned long get_completed_synchronize_srcu(void) +{ + return SRCU_GET_STATE_COMPLETED; +} + unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp); unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp); bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie); @@ -65,6 +79,23 @@ bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie); // not-yet-completed SRCU grace periods. #define NUM_ACTIVE_SRCU_POLL_OLDSTATE 2 +/** + * same_state_synchronize_srcu - Are two old-state values identical? + * @oldstate1: First old-state value. + * @oldstate2: Second old-state value. + * + * The two old-state values must have been obtained from either + * get_state_synchronize_srcu(), start_poll_synchronize_srcu(), or + * get_completed_synchronize_srcu(). Returns @true if the two values are + * identical and @false otherwise. This allows structures whose lifetimes + * are tracked by old-state values to push these values to a list header, + * allowing those structures to be slightly smaller. + */ +static inline bool same_state_synchronize_srcu(unsigned long oldstate1, unsigned long oldstate2) +{ + return oldstate1 == oldstate2; +} + #ifdef CONFIG_NEED_SRCU_NMI_SAFE int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp); void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp); -- cgit v1.2.3 From 5c563ee90a22d3295bcd6217e3ecd7bf9f4d9d48 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Fri, 24 May 2024 11:58:28 -0700 Subject: cpumask: introduce assign_cpu() macro Now that assign_bit() is a thin macro wrapper around set_bit() and clear_bit(), we can use it in cpumask API and drop duplicating implementations of set_cpu_xxx() helpers with no additional overhead. Bloat-o-meter reports almost 2k less of generated code for allyesconfig, mostly in kernel/cpu.c: add/remove: 2/4 grow/shrink: 3/4 up/down: 498/-2228 (-1730) Reviewed-by: Alexander Lobakin Signed-off-by: Yury Norov --- include/linux/cpumask.h | 40 ++++++---------------------------------- 1 file changed, 6 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 23686bed441d..18410acdbc9e 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -1083,44 +1083,16 @@ void init_cpu_present(const struct cpumask *src); void init_cpu_possible(const struct cpumask *src); void init_cpu_online(const struct cpumask *src); -static inline void -set_cpu_possible(unsigned int cpu, bool possible) -{ - if (possible) - cpumask_set_cpu(cpu, &__cpu_possible_mask); - else - cpumask_clear_cpu(cpu, &__cpu_possible_mask); -} +#define assign_cpu(cpu, mask, val) \ + assign_bit(cpumask_check(cpu), cpumask_bits(mask), (val)) -static inline void -set_cpu_present(unsigned int cpu, bool present) -{ - if (present) - cpumask_set_cpu(cpu, &__cpu_present_mask); - else - cpumask_clear_cpu(cpu, &__cpu_present_mask); -} +#define set_cpu_possible(cpu, possible) assign_cpu((cpu), &__cpu_possible_mask, (possible)) +#define set_cpu_present(cpu, present) assign_cpu((cpu), &__cpu_present_mask, (present)) +#define set_cpu_active(cpu, active) assign_cpu((cpu), &__cpu_active_mask, (active)) +#define set_cpu_dying(cpu, dying) assign_cpu((cpu), &__cpu_dying_mask, (dying)) void set_cpu_online(unsigned int cpu, bool online); -static inline void -set_cpu_active(unsigned int cpu, bool active) -{ - if (active) - cpumask_set_cpu(cpu, &__cpu_active_mask); - else - cpumask_clear_cpu(cpu, &__cpu_active_mask); -} - -static inline void -set_cpu_dying(unsigned int cpu, bool dying) -{ - if (dying) - cpumask_set_cpu(cpu, &__cpu_dying_mask); - else - cpumask_clear_cpu(cpu, &__cpu_dying_mask); -} - /** * to_cpumask - convert a NR_CPUS bitmap to a struct cpumask * * @bitmap: the bitmap -- cgit v1.2.3 From e0eeb938adb0367de4b0946125a06142d8de7d37 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 11 Jun 2024 15:38:12 +0300 Subject: bitops: Add a comment explaining the double underscore macros Linus Walleij pointed out that a new comer might be confused about the difference between set_bit() and __set_bit(). Add a comment explaining the difference. Link: https://lore.kernel.org/all/CACRpkdZFPG_YLici-BmYfk9HZ36f4WavCN3JNotkk8cPgCODCg@mail.gmail.com/ Signed-off-by: Dan Carpenter Reviewed-by: Linus Walleij Signed-off-by: Yury Norov --- include/linux/bitops.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 46d4bdc634c0..ba35bbf07798 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -47,12 +47,17 @@ extern unsigned long __sw_hweight64(__u64 w); __builtin_constant_p(*(const unsigned long *)(addr))) ? \ const##op(nr, addr) : op(nr, addr)) +/* + * The following macros are non-atomic versions of their non-underscored + * counterparts. + */ #define __set_bit(nr, addr) bitop(___set_bit, nr, addr) #define __clear_bit(nr, addr) bitop(___clear_bit, nr, addr) #define __change_bit(nr, addr) bitop(___change_bit, nr, addr) #define __test_and_set_bit(nr, addr) bitop(___test_and_set_bit, nr, addr) #define __test_and_clear_bit(nr, addr) bitop(___test_and_clear_bit, nr, addr) #define __test_and_change_bit(nr, addr) bitop(___test_and_change_bit, nr, addr) + #define test_bit(nr, addr) bitop(_test_bit, nr, addr) #define test_bit_acquire(nr, addr) bitop(_test_bit_acquire, nr, addr) -- cgit v1.2.3 From 52c2e956dcebecc8901911217a9647203ebcaf3c Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:53 -0700 Subject: drm/xe/perf/uapi: "Perf" layer to support multiple perf counter stream types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Xe, the plan is to support multiple types of perf counter streams (OA is only one type of these streams). Rather than introduce NxM ioctls for these (N perf streams with M ioctl's per perf stream), we decide to multiplex these (N different stream types and the M ops for each of these stream types) through a single PERF ioctl. This multiplexing is the purpose of the PERF layer. In addition to PERF DRM ioctl's, another set of ioctl's on the PERF fd are defined. These are expected to be common to different PERF stream types and therefore defined at the PERF layer itself. v2: Add param_size to 'struct drm_xe_perf_param' (Umesh) v3: Rename 'enum drm_xe_perf_ops' to 'enum drm_xe_perf_ioctls' (Guy Zadicario) Add DRM_ prefix to ioctl names to indicate uapi names v4: Add 'enum drm_xe_perf_op' previously missed out (Guy Zadicario) v5: Squash the ops and PERF layer patches into a single patch (Umesh) Remove param_size from struct 'drm_xe_perf_param' (Umesh) v6: Add DRM_XE_PERF_IOCTL_STATUS v7: Add DRM_XE_PERF_IOCTL_INFO v8: Fix Copyright years, fix DRM_XE_PERF_TYPE_MAX, move '#include "xe_perf.h"' to xe_perf.c, add kernel doc (Michal) Acked-by: Rodrigo Vivi Acked-by: Guy Zadicario Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-2-ashutosh.dixit@intel.com --- include/uapi/drm/xe_drm.h | 66 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index d7b0903c22b2..c1626027dc69 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -80,6 +80,7 @@ extern "C" { * - &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY * - &DRM_IOCTL_XE_EXEC * - &DRM_IOCTL_XE_WAIT_USER_FENCE + * - &DRM_IOCTL_XE_PERF */ /* @@ -100,6 +101,8 @@ extern "C" { #define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x08 #define DRM_XE_EXEC 0x09 #define DRM_XE_WAIT_USER_FENCE 0x0a +#define DRM_XE_PERF 0x0b + /* Must be kept compact -- no holes */ #define DRM_IOCTL_XE_DEVICE_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query) @@ -113,6 +116,7 @@ extern "C" { #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property) #define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) +#define DRM_IOCTL_XE_PERF DRM_IOW(DRM_COMMAND_BASE + DRM_XE_PERF, struct drm_xe_perf_param) /** * DOC: Xe IOCTL Extensions @@ -1370,6 +1374,68 @@ struct drm_xe_wait_user_fence { __u64 reserved[2]; }; +/** + * enum drm_xe_perf_type - Perf stream types + */ +enum drm_xe_perf_type { + __DRM_XE_PERF_TYPE_MAX, /* non-ABI */ +}; + +/** + * enum drm_xe_perf_op - Perf stream ops + */ +enum drm_xe_perf_op { + /** @DRM_XE_PERF_OP_STREAM_OPEN: Open a perf counter stream */ + DRM_XE_PERF_OP_STREAM_OPEN, + + /** @DRM_XE_PERF_OP_ADD_CONFIG: Add perf stream config */ + DRM_XE_PERF_OP_ADD_CONFIG, + + /** @DRM_XE_PERF_OP_REMOVE_CONFIG: Remove perf stream config */ + DRM_XE_PERF_OP_REMOVE_CONFIG, +}; + +/** + * struct drm_xe_perf_param - Input of &DRM_XE_PERF + * + * The perf layer enables multiplexing perf counter streams of multiple + * types. The actual params for a particular stream operation are supplied + * via the @param pointer (use __copy_from_user to get these params). + */ +struct drm_xe_perf_param { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + /** @perf_type: Perf stream type, of enum @drm_xe_perf_type */ + __u64 perf_type; + /** @perf_op: Perf op, of enum @drm_xe_perf_op */ + __u64 perf_op; + /** @param: Pointer to actual stream params */ + __u64 param; +}; + +/** + * enum drm_xe_perf_ioctls - Perf fd ioctl's + * + * Information exchanged between userspace and kernel for perf fd ioctl's + * is stream type specific + */ +enum drm_xe_perf_ioctls { + /** @DRM_XE_PERF_IOCTL_ENABLE: Enable data capture for a stream */ + DRM_XE_PERF_IOCTL_ENABLE = _IO('i', 0x0), + + /** @DRM_XE_PERF_IOCTL_DISABLE: Disable data capture for a stream */ + DRM_XE_PERF_IOCTL_DISABLE = _IO('i', 0x1), + + /** @DRM_XE_PERF_IOCTL_CONFIG: Change stream configuration */ + DRM_XE_PERF_IOCTL_CONFIG = _IO('i', 0x2), + + /** @DRM_XE_PERF_IOCTL_STATUS: Return stream status */ + DRM_XE_PERF_IOCTL_STATUS = _IO('i', 0x3), + + /** @DRM_XE_PERF_IOCTL_INFO: Return stream info */ + DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4), +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 67977882a2f1339f0a7d32576ad61967828b2ca5 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:55 -0700 Subject: drm/xe/oa/uapi: Add OA data formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add and initialize supported OA data formats for various platforms (including Xe2). User can request OA data in any supported format. Bspec: 52198, 60942, 61101 v2: Start 'xe_oa_format_name' enum from 0 (Umesh) Fix error rewind with OA (Umesh) v3: Use graphics versions rather than absolute platform names v4: Add missing kernel doc for struct memebers and enum and other minor changes (Michal) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-4-ashutosh.dixit@intel.com --- include/uapi/drm/xe_drm.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index c1626027dc69..7e10874bfb33 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1436,6 +1436,25 @@ enum drm_xe_perf_ioctls { DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4), }; +/** + * enum drm_xe_oa_format_type - OA format types as specified in PRM/Bspec + * 52198/60942 + */ +enum drm_xe_oa_format_type { + /** @DRM_XE_OA_FMT_TYPE_OAG: OAG report format */ + DRM_XE_OA_FMT_TYPE_OAG, + /** @DRM_XE_OA_FMT_TYPE_OAR: OAR report format */ + DRM_XE_OA_FMT_TYPE_OAR, + /** @DRM_XE_OA_FMT_TYPE_OAM: OAM report format */ + DRM_XE_OA_FMT_TYPE_OAM, + /** @DRM_XE_OA_FMT_TYPE_OAC: OAC report format */ + DRM_XE_OA_FMT_TYPE_OAC, + /** @DRM_XE_OA_FMT_TYPE_OAM_MPEC: OAM SAMEDIA or OAM MPEC report format */ + DRM_XE_OA_FMT_TYPE_OAM_MPEC, + /** @DRM_XE_OA_FMT_TYPE_PEC: PEC report format */ + DRM_XE_OA_FMT_TYPE_PEC, +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From a9f905ae7b6f29a337dda2ad773c08b92dafe9a5 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:56 -0700 Subject: drm/xe/oa/uapi: Initialize OA units MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initialize OA unit data struct's for each gt during device probe. Also assign OA units for hardware engines. v2: Remove XE_OA_UNIT_OAG/XE_OA_UNIT_OAM_SAMEDIA_0 enum (Umesh) Change mtl_oa_base to 0x13000 (Umesh) v3: Switch to drmm_ functions and other cleanups (Michal) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-5-ashutosh.dixit@intel.com --- include/uapi/drm/xe_drm.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 7e10874bfb33..323d899a276b 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1436,6 +1436,20 @@ enum drm_xe_perf_ioctls { DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4), }; +/** + * enum drm_xe_oa_unit_type - OA unit types + */ +enum drm_xe_oa_unit_type { + /** + * @DRM_XE_OA_UNIT_TYPE_OAG: OAG OA unit. OAR/OAC are considered + * sub-types of OAG. For OAR/OAC, use OAG. + */ + DRM_XE_OA_UNIT_TYPE_OAG, + + /** @DRM_XE_OA_UNIT_TYPE_OAM: OAM OA unit */ + DRM_XE_OA_UNIT_TYPE_OAM, +}; + /** * enum drm_xe_oa_format_type - OA format types as specified in PRM/Bspec * 52198/60942 -- cgit v1.2.3 From cdf02fe1a94a768cbcd20f5c4e1a1d805f4a06c0 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:57 -0700 Subject: drm/xe/oa/uapi: Add/remove OA config perf ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce add/remove config perf ops for OA. OA configurations consist of a set of event/counter select register address/value pairs. The add_config perf op validates and stores such configurations and also exposes them in the metrics sysfs. These configurations will be programmed to OA unit HW when an OA stream using a configuration is opened. The OA stream can also switch to other stored configurations. v2: Start config id's from 1 and other minor review comments (Umesh) v3: Add 32 bit build v4: Add kernel doc for non-static functions (Michal) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-6-ashutosh.dixit@intel.com --- include/uapi/drm/xe_drm.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 323d899a276b..fd9a4bd9e3d4 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1378,6 +1378,7 @@ struct drm_xe_wait_user_fence { * enum drm_xe_perf_type - Perf stream types */ enum drm_xe_perf_type { + DRM_XE_PERF_TYPE_OA, __DRM_XE_PERF_TYPE_MAX, /* non-ABI */ }; @@ -1469,6 +1470,30 @@ enum drm_xe_oa_format_type { DRM_XE_OA_FMT_TYPE_PEC, }; +/** + * struct drm_xe_oa_config - OA metric configuration + * + * Multiple OA configs can be added using @DRM_XE_PERF_OP_ADD_CONFIG. A + * particular config can be specified when opening an OA stream using + * @DRM_XE_OA_PROPERTY_OA_METRIC_SET property. + */ +struct drm_xe_oa_config { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @uuid: String formatted like "%\08x-%\04x-%\04x-%\04x-%\012x" */ + char uuid[36]; + + /** @n_regs: Number of regs in @regs_ptr */ + __u32 n_regs; + + /** + * @regs_ptr: Pointer to (register address, value) pairs for OA config + * registers. Expected length of buffer is: (2 * sizeof(u32) * @n_regs). + */ + __u64 regs_ptr; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From b6fd51c6211910b1db072a3fa2a17ba85cb3dd51 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:58 -0700 Subject: drm/xe/oa/uapi: Define and parse OA stream properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Properties for OA streams are specified by user space, when the stream is opened, as a chain of drm_xe_ext_set_property struct's. Parse and validate these stream properties. v2: Remove struct drm_xe_oa_open_param (Harish Chegondi) Drop DRM_XE_OA_PROPERTY_POLL_OA_PERIOD_US (Umesh) Eliminate comparison with xe_oa_max_sample_rate (Umesh) Drop 'struct drm_xe_oa_record_header' (Umesh) v3: s/DRM_XE_OA_PROPERTY_OA_EXPONENT/ \ DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT/ (Jose) v4: Fix 32 bit build v5: Add non-static function kernel doc (Michal) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-7-ashutosh.dixit@intel.com --- include/uapi/drm/xe_drm.h | 72 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index fd9a4bd9e3d4..307409f968e2 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1470,6 +1470,78 @@ enum drm_xe_oa_format_type { DRM_XE_OA_FMT_TYPE_PEC, }; +/** + * enum drm_xe_oa_property_id - OA stream property id's + * + * Stream params are specified as a chain of @drm_xe_ext_set_property + * struct's, with @property values from enum @drm_xe_oa_property_id and + * @drm_xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY. + * @param field in struct @drm_xe_perf_param points to the first + * @drm_xe_ext_set_property struct. + */ +enum drm_xe_oa_property_id { +#define DRM_XE_OA_EXTENSION_SET_PROPERTY 0 + /** + * @DRM_XE_OA_PROPERTY_OA_UNIT_ID: ID of the OA unit on which to open + * the OA stream, see @oa_unit_id in 'struct + * drm_xe_query_oa_units'. Defaults to 0 if not provided. + */ + DRM_XE_OA_PROPERTY_OA_UNIT_ID = 1, + + /** + * @DRM_XE_OA_PROPERTY_SAMPLE_OA: A value of 1 requests inclusion of raw + * OA unit reports or stream samples in a global buffer attached to an + * OA unit. + */ + DRM_XE_OA_PROPERTY_SAMPLE_OA, + + /** + * @DRM_XE_OA_PROPERTY_OA_METRIC_SET: OA metrics defining contents of OA + * reports, previously added via @DRM_XE_PERF_OP_ADD_CONFIG. + */ + DRM_XE_OA_PROPERTY_OA_METRIC_SET, + + /** @DRM_XE_OA_PROPERTY_OA_FORMAT: Perf counter report format */ + DRM_XE_OA_PROPERTY_OA_FORMAT, + /* + * OA_FORMAT's are specified the same way as in PRM/Bspec 52198/60942, + * in terms of the following quantities: a. enum @drm_xe_oa_format_type + * b. Counter select c. Counter size and d. BC report. Also refer to the + * oa_formats array in drivers/gpu/drm/xe/xe_oa.c. + */ +#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xff << 0) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xff << 8) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xff << 16) +#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xff << 24) + + /** + * @DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT: Requests periodic OA unit + * sampling with sampling frequency proportional to 2^(period_exponent + 1) + */ + DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT, + + /** + * @DRM_XE_OA_PROPERTY_OA_DISABLED: A value of 1 will open the OA + * stream in a DISABLED state (see @DRM_XE_PERF_IOCTL_ENABLE). + */ + DRM_XE_OA_PROPERTY_OA_DISABLED, + + /** + * @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID: Open the stream for a specific + * @exec_queue_id. Perf queries can be executed on this exec queue. + */ + DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID, + + /** + * @DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE: Optional engine instance to + * pass along with @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID or will default to 0. + */ + DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE, + + /** @DRM_XE_OA_PROPERTY_MAX: non-ABI */ + DRM_XE_OA_PROPERTY_MAX +}; + /** * struct drm_xe_oa_config - OA metric configuration * -- cgit v1.2.3 From e936f885f1e96f59d9d05fb6cb5a02b9b9b88a05 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:46:00 -0700 Subject: drm/xe/oa/uapi: Expose OA stream fd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The OA stream open perf op returns an fd with its own file_operations for the newly initialized OA stream. These file_operations allow userspace to enable or disable the stream, as well as apply a different metric configuration for the OA stream. Userspace can also poll for data availability. OA stream initialization is completed in this commit by enabling the OA stream. When sampling is enabled this starts a hrtimer which periodically checks for data availablility. v2: Use stream properties for stream reconfiguration with DRM_XE_PERF_IOCTL_CONFIG v3: Hold runtime_pm reference across oa buffer alloc/free v4: Fix 32 bit build Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-9-ashutosh.dixit@intel.com --- include/uapi/drm/xe_drm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 307409f968e2..1e09f786b3e6 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1478,6 +1478,10 @@ enum drm_xe_oa_format_type { * @drm_xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY. * @param field in struct @drm_xe_perf_param points to the first * @drm_xe_ext_set_property struct. + * + * Exactly the same mechanism is also used for stream reconfiguration using + * the @DRM_XE_PERF_IOCTL_CONFIG perf fd ioctl, though only a subset of + * properties below can be specified for stream reconfiguration. */ enum drm_xe_oa_property_id { #define DRM_XE_OA_EXTENSION_SET_PROPERTY 0 -- cgit v1.2.3 From efb315d0a013cdc8b1e49f5c07b1a2972bc624d4 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:46:01 -0700 Subject: drm/xe/oa/uapi: Read file_operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the OA stream read file_operation. Both blocking and non-blocking reads are supported. As part of read system call, the read copies OA perf data from the OA buffer to the user buffer, after appending packet headers for status and data packets. v2: Drop OA report headers, implement DRM_XE_PERF_IOCTL_STATUS (Umesh) v3: Introduce 'struct drm_xe_oa_stream_status' v4: Define oa_status register bitfields (Umesh) v5: Add extensions to 'struct drm_xe_oa_stream_status' v6: Minor cleanup, eliminate report32 variable v7: Use -EIO to signal to userspace to read OASTATUS using DRM_XE_PERF_IOCTL_STATUS, change previous sites returning -EIO to return -EINVAL Make drm_xe_oa_stream_status bits contiguous (Jose, Umesh) rmw oa_status bits (Umesh) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-10-ashutosh.dixit@intel.com --- include/uapi/drm/xe_drm.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 1e09f786b3e6..03a6e479227a 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1570,6 +1570,26 @@ struct drm_xe_oa_config { __u64 regs_ptr; }; +/** + * struct drm_xe_oa_stream_status - OA stream status returned from + * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl. Userspace can call the ioctl to + * query stream status in response to EIO errno from perf fd read(). + */ +struct drm_xe_oa_stream_status { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_status: OA stream status (see Bspec 46717/61226) */ + __u64 oa_status; +#define DRM_XE_OASTATUS_MMIO_TRG_Q_FULL (1 << 3) +#define DRM_XE_OASTATUS_COUNTER_OVERFLOW (1 << 2) +#define DRM_XE_OASTATUS_BUFFER_OVERFLOW (1 << 1) +#define DRM_XE_OASTATUS_REPORT_LOST (1 << 0) + + /** @reserved: reserved for future use */ + __u64 reserved[3]; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From dd6b4718c3bab611588922ae8a7736c58eafcc93 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:46:04 -0700 Subject: drm/xe/oa/uapi: Query OA unit properties Implement query for properties of OA units present on a device. v2: Clean up reserved/pad fields (Umesh) Follow the same scheme as other query structs v3: Skip reporting reserved engines attached to OA units v4: Expose oa_buf_size via DRM_XE_PERF_IOCTL_INFO (Umesh) v5: Don't expose capabilities as OR of properties (Umesh) v6: Add extensions to query output structs: drm_xe_oa_unit, drm_xe_query_oa_units and drm_xe_oa_stream_info v7: Change oa_units[] array to __u64 type Acked-by: Rodrigo Vivi Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-13-ashutosh.dixit@intel.com --- include/uapi/drm/xe_drm.h | 85 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 03a6e479227a..93e00be44b2d 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -689,6 +689,7 @@ struct drm_xe_device_query { #define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY 5 #define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES 6 #define DRM_XE_DEVICE_QUERY_UC_FW_VERSION 7 +#define DRM_XE_DEVICE_QUERY_OA_UNITS 8 /** @query: The type of data to query */ __u32 query; @@ -1451,6 +1452,75 @@ enum drm_xe_oa_unit_type { DRM_XE_OA_UNIT_TYPE_OAM, }; +/** + * struct drm_xe_oa_unit - describe OA unit + */ +struct drm_xe_oa_unit { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_unit_id: OA unit ID */ + __u32 oa_unit_id; + + /** @oa_unit_type: OA unit type of @drm_xe_oa_unit_type */ + __u32 oa_unit_type; + + /** @capabilities: OA capabilities bit-mask */ + __u64 capabilities; +#define DRM_XE_OA_CAPS_BASE (1 << 0) + + /** @oa_timestamp_freq: OA timestamp freq */ + __u64 oa_timestamp_freq; + + /** @reserved: MBZ */ + __u64 reserved[4]; + + /** @num_engines: number of engines in @eci array */ + __u64 num_engines; + + /** @eci: engines attached to this OA unit */ + struct drm_xe_engine_class_instance eci[]; +}; + +/** + * struct drm_xe_query_oa_units - describe OA units + * + * If a query is made with a struct drm_xe_device_query where .query + * is equal to DRM_XE_DEVICE_QUERY_OA_UNITS, then the reply uses struct + * drm_xe_query_oa_units in .data. + * + * OA unit properties for all OA units can be accessed using a code block + * such as the one below: + * + * .. code-block:: C + * + * struct drm_xe_query_oa_units *qoa; + * struct drm_xe_oa_unit *oau; + * u8 *poau; + * + * // malloc qoa and issue DRM_XE_DEVICE_QUERY_OA_UNITS. Then: + * poau = (u8 *)&qoa->oa_units[0]; + * for (int i = 0; i < qoa->num_oa_units; i++) { + * oau = (struct drm_xe_oa_unit *)poau; + * // Access 'struct drm_xe_oa_unit' fields here + * poau += sizeof(*oau) + oau->num_engines * sizeof(oau->eci[0]); + * } + */ +struct drm_xe_query_oa_units { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + /** @num_oa_units: number of OA units returned in oau[] */ + __u32 num_oa_units; + /** @pad: MBZ */ + __u32 pad; + /** + * @oa_units: struct @drm_xe_oa_unit array returned for this device. + * Written below as a u64 array to avoid problems with nested flexible + * arrays with some compilers + */ + __u64 oa_units[]; +}; + /** * enum drm_xe_oa_format_type - OA format types as specified in PRM/Bspec * 52198/60942 @@ -1590,6 +1660,21 @@ struct drm_xe_oa_stream_status { __u64 reserved[3]; }; +/** + * struct drm_xe_oa_stream_info - OA stream info returned from + * @DRM_XE_PERF_IOCTL_INFO perf fd ioctl + */ +struct drm_xe_oa_stream_info { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_buf_size: OA buffer size */ + __u64 oa_buf_size; + + /** @reserved: reserved for future use */ + __u64 reserved[3]; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 382d1741b5b2feffef7942dd074206372afe1a96 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Mon, 17 Jun 2024 13:17:26 -0700 Subject: net: mana: Add support for page sizes other than 4KB on ARM64 As defined by the MANA Hardware spec, the queue size for DMA is 4KB minimal, and power of 2. And, the HWC queue size has to be exactly 4KB. To support page sizes other than 4KB on ARM64, define the minimal queue size as a macro separately from the PAGE_SIZE, which we always assumed it to be 4KB before supporting ARM64. Also, add MANA specific macros and update code related to size alignment, DMA region calculations, etc. Signed-off-by: Haiyang Zhang Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/1718655446-6576-1-git-send-email-haiyangz@microsoft.com Signed-off-by: Jakub Kicinski --- include/net/mana/gdma.h | 10 +++++++++- include/net/mana/mana.h | 3 ++- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index c547756c4284..83963d9e804d 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -224,7 +224,15 @@ struct gdma_dev { struct auxiliary_device *adev; }; -#define MINIMUM_SUPPORTED_PAGE_SIZE PAGE_SIZE +/* MANA_PAGE_SIZE is the DMA unit */ +#define MANA_PAGE_SHIFT 12 +#define MANA_PAGE_SIZE BIT(MANA_PAGE_SHIFT) +#define MANA_PAGE_ALIGN(x) ALIGN((x), MANA_PAGE_SIZE) +#define MANA_PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), MANA_PAGE_SIZE) +#define MANA_PFN(a) ((a) >> MANA_PAGE_SHIFT) + +/* Required by HW */ +#define MANA_MIN_QSIZE MANA_PAGE_SIZE #define GDMA_CQE_SIZE 64 #define GDMA_EQE_SIZE 16 diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 59823901b74f..e39b8676fe54 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -42,7 +42,8 @@ enum TRI_STATE { #define MAX_SEND_BUFFERS_PER_QUEUE 256 -#define EQ_SIZE (8 * PAGE_SIZE) +#define EQ_SIZE (8 * MANA_PAGE_SIZE) + #define LOG2_EQ_THROTTLE 3 #define MAX_PORTS_IN_MANA_DEV 256 -- cgit v1.2.3 From 1e4c64b71c9bf230b25fde12cbcceacfdc8b3332 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 13 Jun 2024 11:55:07 -0400 Subject: mm/memblock: Add "reserve_mem" to reserved named memory at boot up In order to allow for requesting a memory region that can be used for things like pstore on multiple machines where the memory layout is not the same, add a new option to the kernel command line called "reserve_mem". The format is: reserve_mem=nn:align:name Where it will find nn amount of memory at the given alignment of align. The name field is to allow another subsystem to retrieve where the memory was found. For example: reserve_mem=12M:4096:oops ramoops.mem_name=oops Where ramoops.mem_name will tell ramoops that memory was reserved for it via the reserve_mem option and it can find it by calling: if (reserve_mem_find_by_name("oops", &start, &size)) { // start holds the start address and size holds the size given This is typically used for systems that do not wipe the RAM, and this command line will try to reserve the same physical memory on soft reboots. Note, it is not guaranteed to be the same location. For example, if KASLR places the kernel at the location of where the RAM reservation was from a previous boot, the new reservation will be at a different location. Any subsystem using this feature must add a way to verify that the contents of the physical memory is from a previous boot, as there may be cases where the memory will not be located at the same location. Not all systems may work either. There could be bit flips if the reboot goes through the BIOS. Using kexec to reboot the machine is likely to have better results in such cases. Link: https://lore.kernel.org/all/ZjJVnZUX3NZiGW6q@kernel.org/ Suggested-by: Mike Rapoport Tested-by: Guilherme G. Piccoli Signed-off-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20240613155527.437020271@goodmis.org Signed-off-by: Mike Rapoport (IBM) --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9849dfda44d4..077fb589b88a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4263,4 +4263,6 @@ static inline bool pfn_is_unaccepted_memory(unsigned long pfn) void vma_pgtable_walk_begin(struct vm_area_struct *vma); void vma_pgtable_walk_end(struct vm_area_struct *vma); +int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size); + #endif /* _LINUX_MM_H */ -- cgit v1.2.3 From c53795d48ee8f385c6a9e394651e7ee914baaeba Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Mon, 17 Jun 2024 11:09:04 -0700 Subject: net: add rx_sk to trace_kfree_skb skb does not include enough information to find out receiving sockets/services and netns/containers on packet drops. In theory skb->dev tells about netns, but it can get cleared/reused, e.g. by TCP stack for OOO packet lookup. Similarly, skb->sk often identifies a local sender, and tells nothing about a receiver. Allow passing an extra receiving socket to the tracepoint to improve the visibility on receiving drops. Signed-off-by: Yan Zhai Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/trace/events/skb.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 07e0715628ec..b877133cd93a 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -24,13 +24,14 @@ DEFINE_DROP_REASON(FN, FN) TRACE_EVENT(kfree_skb, TP_PROTO(struct sk_buff *skb, void *location, - enum skb_drop_reason reason), + enum skb_drop_reason reason, struct sock *rx_sk), - TP_ARGS(skb, location, reason), + TP_ARGS(skb, location, reason, rx_sk), TP_STRUCT__entry( __field(void *, skbaddr) __field(void *, location) + __field(void *, rx_sk) __field(unsigned short, protocol) __field(enum skb_drop_reason, reason) ), @@ -38,12 +39,14 @@ TRACE_EVENT(kfree_skb, TP_fast_assign( __entry->skbaddr = skb; __entry->location = location; + __entry->rx_sk = rx_sk; __entry->protocol = ntohs(skb->protocol); __entry->reason = reason; ), - TP_printk("skbaddr=%p protocol=%u location=%pS reason: %s", - __entry->skbaddr, __entry->protocol, __entry->location, + TP_printk("skbaddr=%p rx_sk=%p protocol=%u location=%pS reason: %s", + __entry->skbaddr, __entry->rx_sk, __entry->protocol, + __entry->location, __print_symbolic(__entry->reason, DEFINE_DROP_REASON(FN, FNe))) ); -- cgit v1.2.3 From ba8de796baf4bdc03530774fb284fe3c97875566 Mon Sep 17 00:00:00 2001 From: Yan Zhai Date: Mon, 17 Jun 2024 11:09:09 -0700 Subject: net: introduce sk_skb_reason_drop function Long used destructors kfree_skb and kfree_skb_reason do not pass receiving socket to packet drop tracepoints trace_kfree_skb. This makes it hard to track packet drops of a certain netns (container) or a socket (user application). The naming of these destructors are also not consistent with most sk/skb operating functions, i.e. functions named "sk_xxx" or "skb_xxx". Introduce a new functions sk_skb_reason_drop as drop-in replacement for kfree_skb_reason on local receiving path. Callers can now pass receiving sockets to the tracepoints. kfree_skb and kfree_skb_reason are still usable but they are now just inline helpers that call sk_skb_reason_drop. Note it is not feasible to do the same to consume_skb. Packets not dropped can flow through multiple receive handlers, and have multiple receiving sockets. Leave it untouched for now. Suggested-by: Eric Dumazet Signed-off-by: Yan Zhai Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/skbuff.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 813406a9bd6c..f4cda3fbdb75 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1251,8 +1251,14 @@ static inline bool skb_data_unref(const struct sk_buff *skb, return true; } -void __fix_address -kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason); +void __fix_address sk_skb_reason_drop(struct sock *sk, struct sk_buff *skb, + enum skb_drop_reason reason); + +static inline void +kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) +{ + sk_skb_reason_drop(NULL, skb, reason); +} /** * kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason -- cgit v1.2.3 From fe1ff61487ace69cd4e680e36169c90667eb9624 Mon Sep 17 00:00:00 2001 From: Jack Yu Date: Wed, 19 Jun 2024 05:53:42 +0000 Subject: ASoC: rt1318: Add RT1318 audio amplifier driver This is the initial i2s-based amplifier driver for rt1318. Signed-off-by: Jack Yu Link: https://msgid.link/r/b3055442ce6d4994aa01aa1fad6ba1fe@realtek.com Signed-off-by: Mark Brown --- include/sound/rt1318.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/sound/rt1318.h (limited to 'include') diff --git a/include/sound/rt1318.h b/include/sound/rt1318.h new file mode 100644 index 000000000000..fe6bff06036c --- /dev/null +++ b/include/sound/rt1318.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/sound/rt1318.h -- Platform data for RT1318 + * + * Copyright 2024 Realtek Semiconductor Corp. + */ + +#ifndef __LINUX_SND_RT1318_H +#define __LINUX_SND_RT1318_H + +struct rt1318_platform_data { + unsigned int init_r0_l; + unsigned int init_r0_r; +}; + +#endif -- cgit v1.2.3 From dc2e77979412d289df9049d8c693761db8602867 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Fri, 14 Jun 2024 12:30:44 -0400 Subject: net: Split a __sys_bind helper for io_uring io_uring holds a reference to the file and maintains a sockaddr_storage address. Similarly to what was done to __sys_connect_file, split an internal helper for __sys_bind in preparation to supporting an io_uring bind command. Reviewed-by: Jens Axboe Signed-off-by: Gabriel Krisman Bertazi Reviewed-by: Kuniyuki Iwashima Acked-by: Jakub Kicinski Link: https://lore.kernel.org/r/20240614163047.31581-1-krisman@suse.de Signed-off-by: Jens Axboe --- include/linux/socket.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/socket.h b/include/linux/socket.h index 89d16b90370b..b3000f49e9f5 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -442,6 +442,8 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, extern int __sys_socket(int family, int type, int protocol); extern struct file *__sys_socket_file(int family, int type, int protocol); extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); +extern int __sys_bind_socket(struct socket *sock, struct sockaddr_storage *address, + int addrlen); extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr, int addrlen, int file_flags); extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, -- cgit v1.2.3 From bb6aaf736680f0f3c2e6281735c47c64e2042819 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Fri, 14 Jun 2024 12:30:45 -0400 Subject: net: Split a __sys_listen helper for io_uring io_uring holds a reference to the file and maintains a sockaddr_storage address. Similarly to what was done to __sys_connect_file, split an internal helper for __sys_listen in preparation to support an io_uring listen command. Reviewed-by: Jens Axboe Signed-off-by: Gabriel Krisman Bertazi Reviewed-by: Kuniyuki Iwashima Acked-by: Jakub Kicinski Link: https://lore.kernel.org/r/20240614163047.31581-2-krisman@suse.de Signed-off-by: Jens Axboe --- include/linux/socket.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/socket.h b/include/linux/socket.h index b3000f49e9f5..c1f16cdab677 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -449,6 +449,7 @@ extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr, extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen); extern int __sys_listen(int fd, int backlog); +extern int __sys_listen_socket(struct socket *sock, int backlog); extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, -- cgit v1.2.3 From 7481fd93fa0a851740e26026485f56a1305454ce Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Fri, 14 Jun 2024 12:30:46 -0400 Subject: io_uring: Introduce IORING_OP_BIND IORING_OP_BIND provides the semantic of bind(2) via io_uring. While this is an essentially synchronous system call, the main point is to enable a network path to execute fully with io_uring registered and descriptorless files. Signed-off-by: Gabriel Krisman Bertazi Link: https://lore.kernel.org/r/20240614163047.31581-3-krisman@suse.de Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 994bf7af0efe..4ef153d95c87 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -257,6 +257,7 @@ enum io_uring_op { IORING_OP_FUTEX_WAITV, IORING_OP_FIXED_FD_INSTALL, IORING_OP_FTRUNCATE, + IORING_OP_BIND, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From ff140cc8628abfb1755691d16cfa8788d8820ef7 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Fri, 14 Jun 2024 12:30:47 -0400 Subject: io_uring: Introduce IORING_OP_LISTEN IORING_OP_LISTEN provides the semantic of listen(2) via io_uring. While this is an essentially synchronous system call, the main point is to enable a network path to execute fully with io_uring registered and descriptorless files. Signed-off-by: Gabriel Krisman Bertazi Link: https://lore.kernel.org/r/20240614163047.31581-4-krisman@suse.de Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 4ef153d95c87..2aaf7ee256ac 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -258,6 +258,7 @@ enum io_uring_op { IORING_OP_FIXED_FD_INSTALL, IORING_OP_FTRUNCATE, IORING_OP_BIND, + IORING_OP_LISTEN, /* this goes last, obviously */ IORING_OP_LAST, -- cgit v1.2.3 From 1122c0c1cc71f740fa4d5f14f239194e06a1d5e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:40 +0200 Subject: block: move cache control settings out of queue->flags Move the cache control settings into the queue_limits so that the flags can be set atomically with the device queue frozen. Add new features and flags field for the driver set flags, and internal (usually sysfs-controlled) flags in the block layer. Note that we'll eventually remove enough field from queue_limits to bring it back to the previous size. The disable flag is inverted compared to the previous meaning, which means it now survives a rescan, similar to the max_sectors and max_discard_sectors user limits. The FLUSH and FUA flags are now inherited by blk_stack_limits, which simplified the code in dm a lot, but also causes a slight behavior change in that dm-switch and dm-unstripe now advertise a write cache despite setting num_flush_bios to 0. The I/O path will handle this gracefully, but as far as I can tell the lack of num_flush_bios and thus flush support is a pre-existing data integrity bug in those targets that really needs fixing, after which a non-zero num_flush_bios should be required in dm for targets that map to underlying devices. Signed-off-by: Christoph Hellwig Acked-by: Ulf Hansson Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-14-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0c247a716885..acdfe5122faa 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -282,6 +282,28 @@ static inline bool blk_op_is_passthrough(blk_opf_t op) return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT; } +/* flags set by the driver in queue_limits.features */ +enum { + /* supports a volatile write cache */ + BLK_FEAT_WRITE_CACHE = (1u << 0), + + /* supports passing on the FUA bit */ + BLK_FEAT_FUA = (1u << 1), +}; + +/* + * Flags automatically inherited when stacking limits. + */ +#define BLK_FEAT_INHERIT_MASK \ + (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA) + + +/* internal flags in queue_limits.flags */ +enum { + /* do not send FLUSH or FUA command despite advertised write cache */ + BLK_FLAGS_WRITE_CACHE_DISABLED = (1u << 31), +}; + /* * BLK_BOUNCE_NONE: never bounce (default) * BLK_BOUNCE_HIGH: bounce all highmem pages @@ -292,6 +314,8 @@ enum blk_bounce { }; struct queue_limits { + unsigned int features; + unsigned int flags; enum blk_bounce bounce; unsigned long seg_boundary_mask; unsigned long virt_boundary_mask; @@ -536,12 +560,9 @@ struct request_queue { #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ #define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ -#define QUEUE_FLAG_HW_WC 13 /* Write back caching supported */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ -#define QUEUE_FLAG_WC 17 /* Write back caching */ -#define QUEUE_FLAG_FUA 18 /* device supports FUA writes */ #define QUEUE_FLAG_DAX 19 /* device supports DAX */ #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ @@ -951,7 +972,6 @@ void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev, sector_t offset, const char *pfx); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); -extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); struct blk_independent_access_ranges * disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges); @@ -1304,14 +1324,20 @@ static inline bool bdev_stable_writes(struct block_device *bdev) return test_bit(QUEUE_FLAG_STABLE_WRITES, &q->queue_flags); } +static inline bool blk_queue_write_cache(struct request_queue *q) +{ + return (q->limits.features & BLK_FEAT_WRITE_CACHE) && + !(q->limits.flags & BLK_FLAGS_WRITE_CACHE_DISABLED); +} + static inline bool bdev_write_cache(struct block_device *bdev) { - return test_bit(QUEUE_FLAG_WC, &bdev_get_queue(bdev)->queue_flags); + return blk_queue_write_cache(bdev_get_queue(bdev)); } static inline bool bdev_fua(struct block_device *bdev) { - return test_bit(QUEUE_FLAG_FUA, &bdev_get_queue(bdev)->queue_flags); + return bdev_get_queue(bdev)->limits.features & BLK_FEAT_FUA; } static inline bool bdev_nowait(struct block_device *bdev) -- cgit v1.2.3 From bd4a633b6f7c3c6b6ebc1a07317643270e751a94 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:41 +0200 Subject: block: move the nonrot flag to queue_limits Move the nonrot flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Use the chance to switch to defaulting to non-rotational and require the driver to opt into rotational, which matches the polarity of the sysfs interface. For the z2ram, ps3vram, 2x memstick, ubiblock and dcssblk the new rotational flag is not set as they clearly are not rotational despite this being a behavior change. There are some other drivers that unconditionally set the rotational flag to keep the existing behavior as they arguably can be used on rotational devices even if that is probably not their main use today (e.g. virtio_blk and drbd). The flag is automatically inherited in blk_stack_limits matching the existing behavior in dm and md. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-15-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index acdfe5122faa..988e3248cffe 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -289,14 +289,16 @@ enum { /* supports passing on the FUA bit */ BLK_FEAT_FUA = (1u << 1), + + /* rotational device (hard drive or floppy) */ + BLK_FEAT_ROTATIONAL = (1u << 2), }; /* * Flags automatically inherited when stacking limits. */ #define BLK_FEAT_INHERIT_MASK \ - (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA) - + (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL) /* internal flags in queue_limits.flags */ enum { @@ -553,8 +555,6 @@ struct request_queue { #define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */ #define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */ -#define QUEUE_FLAG_NONROT 6 /* non-rotational device (SSD) */ -#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ #define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */ #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ @@ -589,7 +589,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) -#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) +#define blk_queue_nonrot(q) ((q)->limits.features & BLK_FEAT_ROTATIONAL) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) #define blk_queue_zone_resetall(q) \ -- cgit v1.2.3 From 39a9f1c334f9f27b3b3e6d0005c10ed667268346 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:42 +0200 Subject: block: move the add_random flag to queue_limits Move the add_random flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Note that this also removes code from dm to clear the flag based on the underlying devices, which can't be reached as dm devices will always start out without the flag set. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-16-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 988e3248cffe..cf1bbf566b2b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -292,6 +292,9 @@ enum { /* rotational device (hard drive or floppy) */ BLK_FEAT_ROTATIONAL = (1u << 2), + + /* contributes to the random number pool */ + BLK_FEAT_ADD_RANDOM = (1u << 3), }; /* @@ -557,7 +560,6 @@ struct request_queue { #define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */ #define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */ #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ -#define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ #define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ @@ -591,7 +593,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) ((q)->limits.features & BLK_FEAT_ROTATIONAL) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) -#define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) #define blk_queue_zone_resetall(q) \ test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags) #define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags) -- cgit v1.2.3 From cdb2497918cc2929691408bac87b58433b45b6d3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:43 +0200 Subject: block: move the io_stat flag setting to queue_limits Move the io_stat flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Simplify md and dm to set the flag unconditionally instead of avoiding setting a simple flag for cases where it already is set by other means, which is a bit pointless. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-17-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cf1bbf566b2b..5fafb2f95fd1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -295,6 +295,9 @@ enum { /* contributes to the random number pool */ BLK_FEAT_ADD_RANDOM = (1u << 3), + + /* do disk/partitions IO accounting */ + BLK_FEAT_IO_STAT = (1u << 4), }; /* @@ -558,7 +561,6 @@ struct request_queue { #define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */ #define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */ -#define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */ #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ #define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ @@ -577,8 +579,7 @@ struct request_queue { #define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ #define QUEUE_FLAG_SKIP_TAGSET_QUIESCE 31 /* quiesce_tagset skip the queue*/ -#define QUEUE_FLAG_MQ_DEFAULT ((1UL << QUEUE_FLAG_IO_STAT) | \ - (1UL << QUEUE_FLAG_SAME_COMP) | \ +#define QUEUE_FLAG_MQ_DEFAULT ((1UL << QUEUE_FLAG_SAME_COMP) | \ (1UL << QUEUE_FLAG_NOWAIT)) void blk_queue_flag_set(unsigned int flag, struct request_queue *q); @@ -592,7 +593,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_noxmerges(q) \ test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) ((q)->limits.features & BLK_FEAT_ROTATIONAL) -#define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) +#define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT) #define blk_queue_zone_resetall(q) \ test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags) #define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags) -- cgit v1.2.3 From 1a02f3a73f8c670eddeb44bf52a75ae7f67cfc11 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:44 +0200 Subject: block: move the stable_writes flag to queue_limits Move the stable_writes flag into the queue_limits feature field so that it can be set atomically with the queue frozen. The flag is now inherited by blk_stack_limits, which greatly simplifies the code in dm, and fixed md which previously did not pass on the flag set on lower devices. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-18-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5fafb2f95fd1..8936eb6ba609 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -298,13 +298,17 @@ enum { /* do disk/partitions IO accounting */ BLK_FEAT_IO_STAT = (1u << 4), + + /* don't modify data until writeback is done */ + BLK_FEAT_STABLE_WRITES = (1u << 5), }; /* * Flags automatically inherited when stacking limits. */ #define BLK_FEAT_INHERIT_MASK \ - (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL) + (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \ + BLK_FEAT_STABLE_WRITES) /* internal flags in queue_limits.flags */ enum { @@ -565,7 +569,6 @@ struct request_queue { #define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ -#define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ #define QUEUE_FLAG_DAX 19 /* device supports DAX */ #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ @@ -1323,7 +1326,7 @@ static inline bool bdev_stable_writes(struct block_device *bdev) if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && q->limits.integrity.csum_type != BLK_INTEGRITY_CSUM_NONE) return true; - return test_bit(QUEUE_FLAG_STABLE_WRITES, &q->queue_flags); + return q->limits.features & BLK_FEAT_STABLE_WRITES; } static inline bool blk_queue_write_cache(struct request_queue *q) -- cgit v1.2.3 From aadd5c59c910427c0464c217d5ed588ff14e2502 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:45 +0200 Subject: block: move the synchronous flag to queue_limits Move the synchronous flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-19-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8936eb6ba609..cee7b44a1425 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -301,6 +301,9 @@ enum { /* don't modify data until writeback is done */ BLK_FEAT_STABLE_WRITES = (1u << 5), + + /* always completes in submit context */ + BLK_FEAT_SYNCHRONOUS = (1u << 6), }; /* @@ -566,7 +569,6 @@ struct request_queue { #define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */ #define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */ #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ -#define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ @@ -1315,8 +1317,7 @@ static inline bool bdev_nonrot(struct block_device *bdev) static inline bool bdev_synchronous(struct block_device *bdev) { - return test_bit(QUEUE_FLAG_SYNCHRONOUS, - &bdev_get_queue(bdev)->queue_flags); + return bdev->bd_disk->queue->limits.features & BLK_FEAT_SYNCHRONOUS; } static inline bool bdev_stable_writes(struct block_device *bdev) -- cgit v1.2.3 From f76af42f8bf13d2620084f305f01691de9238fc7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:46 +0200 Subject: block: move the nowait flag to queue_limits Move the nowait flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Stacking drivers are simplified in that they now can simply set the flag, and blk_stack_limits will clear it when the features is not supported by any of the underlying devices. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-20-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cee7b44a1425..f3d4519d609d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -304,6 +304,9 @@ enum { /* always completes in submit context */ BLK_FEAT_SYNCHRONOUS = (1u << 6), + + /* supports REQ_NOWAIT */ + BLK_FEAT_NOWAIT = (1u << 7), }; /* @@ -580,12 +583,10 @@ struct request_queue { #define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */ #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ #define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ -#define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */ #define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ #define QUEUE_FLAG_SKIP_TAGSET_QUIESCE 31 /* quiesce_tagset skip the queue*/ -#define QUEUE_FLAG_MQ_DEFAULT ((1UL << QUEUE_FLAG_SAME_COMP) | \ - (1UL << QUEUE_FLAG_NOWAIT)) +#define QUEUE_FLAG_MQ_DEFAULT (1UL << QUEUE_FLAG_SAME_COMP) void blk_queue_flag_set(unsigned int flag, struct request_queue *q); void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); @@ -1348,7 +1349,7 @@ static inline bool bdev_fua(struct block_device *bdev) static inline bool bdev_nowait(struct block_device *bdev) { - return test_bit(QUEUE_FLAG_NOWAIT, &bdev_get_queue(bdev)->queue_flags); + return bdev->bd_disk->queue->limits.features & BLK_FEAT_NOWAIT; } static inline bool bdev_is_zoned(struct block_device *bdev) -- cgit v1.2.3 From f467fee48da4500786e145489787b37adae317c3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:47 +0200 Subject: block: move the dax flag to queue_limits Move the dax flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-21-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f3d4519d609d..7022e06a3dd9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -307,6 +307,9 @@ enum { /* supports REQ_NOWAIT */ BLK_FEAT_NOWAIT = (1u << 7), + + /* supports DAX */ + BLK_FEAT_DAX = (1u << 8), }; /* @@ -575,7 +578,6 @@ struct request_queue { #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ -#define QUEUE_FLAG_DAX 19 /* device supports DAX */ #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ #define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ @@ -602,7 +604,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT) #define blk_queue_zone_resetall(q) \ test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags) -#define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags) +#define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX) #define blk_queue_pci_p2pdma(q) \ test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags) #ifdef CONFIG_BLK_RQ_ALLOC_TIME -- cgit v1.2.3 From 8023e144f9d6e35f8786937e2f0c2fea0aba6dbc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:48 +0200 Subject: block: move the poll flag to queue_limits Move the poll flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Stacking drivers are simplified in that they now can simply set the flag, and blk_stack_limits will clear it when the features is not supported by any of the underlying devices. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-22-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7022e06a3dd9..cd27b66cbacc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -310,6 +310,9 @@ enum { /* supports DAX */ BLK_FEAT_DAX = (1u << 8), + + /* supports I/O polling */ + BLK_FEAT_POLL = (1u << 9), }; /* @@ -577,7 +580,6 @@ struct request_queue { #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ -#define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ #define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ -- cgit v1.2.3 From b1fc937a55f5735b98d9dceae5bb6ba262501f56 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:49 +0200 Subject: block: move the zoned flag into the features field Move the zoned flags into the features field to reclaim a little bit of space. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-23-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cd27b66cbacc..bdc30c1fb1b5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -313,6 +313,9 @@ enum { /* supports I/O polling */ BLK_FEAT_POLL = (1u << 9), + + /* is a zoned device */ + BLK_FEAT_ZONED = (1u << 10), }; /* @@ -320,7 +323,7 @@ enum { */ #define BLK_FEAT_INHERIT_MASK \ (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \ - BLK_FEAT_STABLE_WRITES) + BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED) /* internal flags in queue_limits.flags */ enum { @@ -372,7 +375,6 @@ struct queue_limits { unsigned char misaligned; unsigned char discard_misaligned; unsigned char raid_partial_stripes_expensive; - bool zoned; unsigned int max_open_zones; unsigned int max_active_zones; @@ -654,7 +656,8 @@ static inline enum rpm_status queue_rpm_status(struct request_queue *q) static inline bool blk_queue_is_zoned(struct request_queue *q) { - return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && q->limits.zoned; + return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && + (q->limits.features & BLK_FEAT_ZONED); } #ifdef CONFIG_BLK_DEV_ZONED -- cgit v1.2.3 From a52758a39768f441e468a41da6c15a59d6d6011a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:50 +0200 Subject: block: move the zone_resetall flag to queue_limits Move the zone_resetall flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-24-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bdc30c1fb1b5..1077cb8d8fd8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -316,6 +316,9 @@ enum { /* is a zoned device */ BLK_FEAT_ZONED = (1u << 10), + + /* supports Zone Reset All */ + BLK_FEAT_ZONE_RESETALL = (1u << 11), }; /* @@ -586,7 +589,6 @@ struct request_queue { #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ #define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ #define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */ -#define QUEUE_FLAG_ZONE_RESETALL 26 /* supports Zone Reset All */ #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ #define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ #define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ @@ -607,7 +609,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_nonrot(q) ((q)->limits.features & BLK_FEAT_ROTATIONAL) #define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT) #define blk_queue_zone_resetall(q) \ - test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags) + ((q)->limits.features & BLK_FEAT_ZONE_RESETALL) #define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX) #define blk_queue_pci_p2pdma(q) \ test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags) -- cgit v1.2.3 From 9c1e42e3c876c66796eda23e79836a4d92613a61 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:51 +0200 Subject: block: move the pci_p2pdma flag to queue_limits Move the pci_p2pdma flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-25-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1077cb8d8fd8..ab0f7dfba556 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -319,6 +319,9 @@ enum { /* supports Zone Reset All */ BLK_FEAT_ZONE_RESETALL = (1u << 11), + + /* supports PCI(e) p2p requests */ + BLK_FEAT_PCI_P2PDMA = (1u << 12), }; /* @@ -588,7 +591,6 @@ struct request_queue { #define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ #define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ #define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ -#define QUEUE_FLAG_PCI_P2PDMA 25 /* device supports PCI p2p requests */ #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ #define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ #define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ @@ -611,8 +613,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_zone_resetall(q) \ ((q)->limits.features & BLK_FEAT_ZONE_RESETALL) #define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX) -#define blk_queue_pci_p2pdma(q) \ - test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags) +#define blk_queue_pci_p2pdma(q) ((q)->limits.features & BLK_FEAT_PCI_P2PDMA) #ifdef CONFIG_BLK_RQ_ALLOC_TIME #define blk_queue_rq_alloc_time(q) \ test_bit(QUEUE_FLAG_RQ_ALLOC_TIME, &(q)->queue_flags) -- cgit v1.2.3 From 8c8f5c85b20d0a7dc0ab9b2a17318130d69ceb5a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:52 +0200 Subject: block: move the skip_tagset_quiesce flag to queue_limits Move the skip_tagset_quiesce flag into the queue_limits feature field so that it can be set atomically with the queue frozen. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-26-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ab0f7dfba556..2c433ebf6f20 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -322,6 +322,9 @@ enum { /* supports PCI(e) p2p requests */ BLK_FEAT_PCI_P2PDMA = (1u << 12), + + /* skip this queue in blk_mq_(un)quiesce_tagset */ + BLK_FEAT_SKIP_TAGSET_QUIESCE = (1u << 13), }; /* @@ -594,7 +597,6 @@ struct request_queue { #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ #define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ #define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ -#define QUEUE_FLAG_SKIP_TAGSET_QUIESCE 31 /* quiesce_tagset skip the queue*/ #define QUEUE_FLAG_MQ_DEFAULT (1UL << QUEUE_FLAG_SAME_COMP) @@ -629,7 +631,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags) #define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags) #define blk_queue_skip_tagset_quiesce(q) \ - test_bit(QUEUE_FLAG_SKIP_TAGSET_QUIESCE, &(q)->queue_flags) + ((q)->limits.features & BLK_FEAT_SKIP_TAGSET_QUIESCE) extern void blk_set_pm_only(struct request_queue *q); extern void blk_clear_pm_only(struct request_queue *q); -- cgit v1.2.3 From 339d3948c07b4aa2940aeb874294a7d6782cec16 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Jun 2024 08:04:53 +0200 Subject: block: move the bounce flag into the features field Move the bounce flag into the features field to reclaim a little bit of space. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240617060532.127975-27-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2c433ebf6f20..e96ba7b97288 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -325,6 +325,9 @@ enum { /* skip this queue in blk_mq_(un)quiesce_tagset */ BLK_FEAT_SKIP_TAGSET_QUIESCE = (1u << 13), + + /* bounce all highmem pages */ + BLK_FEAT_BOUNCE_HIGH = (1u << 14), }; /* @@ -332,7 +335,7 @@ enum { */ #define BLK_FEAT_INHERIT_MASK \ (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \ - BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED) + BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | BLK_FEAT_BOUNCE_HIGH) /* internal flags in queue_limits.flags */ enum { @@ -352,7 +355,6 @@ enum blk_bounce { struct queue_limits { unsigned int features; unsigned int flags; - enum blk_bounce bounce; unsigned long seg_boundary_mask; unsigned long virt_boundary_mask; -- cgit v1.2.3 From b9578c49456340ca4d3c7ddbaca054ffc2b51bc1 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Thu, 6 Jun 2024 14:02:13 +1200 Subject: dma-buf/heaps: Correct the types of fd_flags and heap_flags dma_heap_allocation_data defines the UAPI as follows: struct dma_heap_allocation_data { __u64 len; __u32 fd; __u32 fd_flags; __u64 heap_flags; }; But dma heaps are casting both fd_flags and heap_flags into unsigned long. This patch makes dma heaps - cma heap and system heap have consistent types with UAPI. Signed-off-by: Barry Song Acked-by: John Stultz Reviewed-by: Carlos Llamas Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20240606020213.49854-1-21cnbao@gmail.com --- include/linux/dma-heap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dma-heap.h b/include/linux/dma-heap.h index 0c05561cad6e..064bad725061 100644 --- a/include/linux/dma-heap.h +++ b/include/linux/dma-heap.h @@ -23,8 +23,8 @@ struct dma_heap; struct dma_heap_ops { struct dma_buf *(*allocate)(struct dma_heap *heap, unsigned long len, - unsigned long fd_flags, - unsigned long heap_flags); + u32 fd_flags, + u64 heap_flags); }; /** -- cgit v1.2.3 From a2225e0250c5fa397dcebf6ce65a9f05a114e0cf Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Thu, 13 Jun 2024 17:42:47 +0800 Subject: netfilter: move the sysctl nf_hooks_lwtunnel into the netfilter core Currently, the sysctl net.netfilter.nf_hooks_lwtunnel depends on the nf_conntrack module, but the nf_conntrack module is not always loaded. Therefore, accessing net.netfilter.nf_hooks_lwtunnel may have an error. Move sysctl nf_hooks_lwtunnel into the netfilter core. Fixes: 7a3f5b0de364 ("netfilter: add netfilter hooks to SRv6 data plane") Suggested-by: Pablo Neira Ayuso Signed-off-by: Jianguo Wu Signed-off-by: Pablo Neira Ayuso --- include/net/netns/netfilter.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 02bbdc577f8e..a6a0bf4a247e 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -15,6 +15,9 @@ struct netns_nf { const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO]; #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; +#ifdef CONFIG_LWTUNNEL + struct ctl_table_header *nf_lwtnl_dir_header; +#endif #endif struct nf_hook_entries __rcu *hooks_ipv4[NF_INET_NUMHOOKS]; struct nf_hook_entries __rcu *hooks_ipv6[NF_INET_NUMHOOKS]; -- cgit v1.2.3 From e78298556ee5d881f6679effb2a6743969ea6e2d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 4 Jun 2024 12:30:02 -0700 Subject: runtime constants: add default dummy infrastructure This adds the initial dummy support for 'runtime constants' for when an architecture doesn't actually support an implementation of fixing up said runtime constants. This ends up being the fallback to just using the variables as regular __ro_after_init variables, and changes the dcache d_hash() function to use this model. Signed-off-by: Linus Torvalds --- include/asm-generic/Kbuild | 1 + include/asm-generic/runtime-const.h | 15 +++++++++++++++ include/asm-generic/vmlinux.lds.h | 8 ++++++++ 3 files changed, 24 insertions(+) create mode 100644 include/asm-generic/runtime-const.h (limited to 'include') diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index b20fa25a7e8d..052e5c98c105 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -46,6 +46,7 @@ mandatory-y += pci.h mandatory-y += percpu.h mandatory-y += pgalloc.h mandatory-y += preempt.h +mandatory-y += runtime-const.h mandatory-y += rwonce.h mandatory-y += sections.h mandatory-y += serial.h diff --git a/include/asm-generic/runtime-const.h b/include/asm-generic/runtime-const.h new file mode 100644 index 000000000000..670499459514 --- /dev/null +++ b/include/asm-generic/runtime-const.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_RUNTIME_CONST_H +#define _ASM_RUNTIME_CONST_H + +/* + * This is the fallback for when the architecture doesn't + * support the runtime const operations. + * + * We just use the actual symbols as-is. + */ +#define runtime_const_ptr(sym) (sym) +#define runtime_const_shift_right_32(val, sym) ((u32)(val)>>(sym)) +#define runtime_const_init(type,sym) do { } while (0) + +#endif diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 5703526d6ebf..389a78415b9b 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -944,6 +944,14 @@ #define CON_INITCALL \ BOUNDED_SECTION_POST_LABEL(.con_initcall.init, __con_initcall, _start, _end) +#define RUNTIME_NAME(t,x) runtime_##t##_##x + +#define RUNTIME_CONST(t,x) \ + . = ALIGN(8); \ + RUNTIME_NAME(t,x) : AT(ADDR(RUNTIME_NAME(t,x)) - LOAD_OFFSET) { \ + *(RUNTIME_NAME(t,x)); \ + } + /* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */ #define KUNIT_TABLE() \ . = ALIGN(8); \ -- cgit v1.2.3 From 2003e483a81cc235e29f77da3f6b256cb4b348e7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 19 Jun 2024 13:31:05 -0700 Subject: fortify: Do not special-case 0-sized destinations All fake flexible arrays should have been removed now, so remove the special casing that was avoiding checking them. If a destination claims to be 0 sized, believe it. This is especially important for cases where __counted_by is in use and may have a 0 element count. Link: https://lore.kernel.org/r/20240619203105.work.747-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/fortify-string.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h index 7e0f340bf363..0d99bf11d260 100644 --- a/include/linux/fortify-string.h +++ b/include/linux/fortify-string.h @@ -601,11 +601,7 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, /* * Warn when writing beyond destination field size. * - * We must ignore p_size_field == 0 for existing 0-element - * fake flexible arrays, until they are all converted to - * proper flexible arrays. - * - * The implementation of __builtin_*object_size() behaves + * Note the implementation of __builtin_*object_size() behaves * like sizeof() when not directly referencing a flexible * array member, which means there will be many bounds checks * that will appear at run-time, without a way for them to be @@ -613,7 +609,7 @@ __FORTIFY_INLINE bool fortify_memcpy_chk(__kernel_size_t size, * is specifically the flexible array member). * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101832 */ - if (p_size_field != 0 && p_size_field != SIZE_MAX && + if (p_size_field != SIZE_MAX && p_size != p_size_field && p_size_field < size) return true; -- cgit v1.2.3 From ba63a7e08523be3496cc1b7e5f77d306144a3a40 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Sat, 20 Apr 2024 21:47:46 +0200 Subject: can: isotp: remove ISO 15675-2 specification version where possible With the new ISO 15765-2:2024 release the former documentation and comments have to be reworked. This patch removes the ISO specification version/date where possible. Signed-off-by: Oliver Hartkopp Acked-by: Vincent Mailhol Acked-by: Francesco Valla Link: https://lore.kernel.org/all/20240420194746.4885-1-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- include/uapi/linux/can/isotp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h index 6cde62371b6f..bd990917f7c4 100644 --- a/include/uapi/linux/can/isotp.h +++ b/include/uapi/linux/can/isotp.h @@ -2,7 +2,7 @@ /* * linux/can/isotp.h * - * Definitions for isotp CAN sockets (ISO 15765-2:2016) + * Definitions for ISO 15765-2 CAN transport protocol sockets * * Copyright (c) 2020 Volkswagen Group Electronic Research * All rights reserved. -- cgit v1.2.3 From 269e974e664207cc45f83b579565ba73de1b75dc Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 14 Jun 2024 11:41:02 +0200 Subject: driver core: make [device_]driver_attach take a const * Change device_driver_attach() and driver_attach() to take a const * to struct device driver as neither of them modify the structure at all. Also, for some odd reason, drivers/dma/idxd/compat.c had a duplicate external reference to device_driver_attach(), so remove that to fix up the build, it should never have had that there in the first place. Cc: Rafael J. Wysocki Cc: Fenghua Yu Cc: Dave Jiang Cc: Vinod Koul Cc: Andy Shevchenko Cc: Petr Tesarik Cc: Alexander Lobakin Cc: dmaengine@vger.kernel.org Link: https://lore.kernel.org/r/2024061401-rasping-manger-c385@gregkh Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index 3a630942f358..34eb20f5966f 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1177,12 +1177,12 @@ static inline void *dev_get_platdata(const struct device *dev) * Manual binding of a device to driver. See drivers/base/bus.c * for information on use. */ -int __must_check device_driver_attach(struct device_driver *drv, +int __must_check device_driver_attach(const struct device_driver *drv, struct device *dev); int __must_check device_bind_driver(struct device *dev); void device_release_driver(struct device *dev); int __must_check device_attach(struct device *dev); -int __must_check driver_attach(struct device_driver *drv); +int __must_check driver_attach(const struct device_driver *drv); void device_initial_probe(struct device *dev); int __must_check device_reprobe(struct device *dev); -- cgit v1.2.3 From f6860b6069b92559f5cdb65f48e2d82051eaebca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 19 Jun 2024 17:45:33 +0200 Subject: block: remove the unused blk_bounce enum The enum has been replaced with the BLK_FEAT_BOUNCE_HIGH flag. Reported-by: Keith Busch Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20240619154623.450048-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e96ba7b97288..f7d275e3fb2c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -343,15 +343,6 @@ enum { BLK_FLAGS_WRITE_CACHE_DISABLED = (1u << 31), }; -/* - * BLK_BOUNCE_NONE: never bounce (default) - * BLK_BOUNCE_HIGH: bounce all highmem pages - */ -enum blk_bounce { - BLK_BOUNCE_NONE, - BLK_BOUNCE_HIGH, -}; - struct queue_limits { unsigned int features; unsigned int flags; -- cgit v1.2.3 From bae1c74316b86c67c95658c3a0cd312cec9aad77 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 19 Jun 2024 17:45:35 +0200 Subject: block: renumber and rename the cache disabled flag Start with the first bit, and drop the plural-S from the name. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20240619154623.450048-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f7d275e3fb2c..713a98b6dbba 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -339,8 +339,8 @@ enum { /* internal flags in queue_limits.flags */ enum { - /* do not send FLUSH or FUA command despite advertised write cache */ - BLK_FLAGS_WRITE_CACHE_DISABLED = (1u << 31), + /* do not send FLUSH/FUA commands despite advertising a write cache */ + BLK_FLAG_WRITE_CACHE_DISABLED = (1u << 0), }; struct queue_limits { @@ -1339,7 +1339,7 @@ static inline bool bdev_stable_writes(struct block_device *bdev) static inline bool blk_queue_write_cache(struct request_queue *q) { return (q->limits.features & BLK_FEAT_WRITE_CACHE) && - !(q->limits.flags & BLK_FLAGS_WRITE_CACHE_DISABLED); + !(q->limits.flags & BLK_FLAG_WRITE_CACHE_DISABLED); } static inline bool bdev_write_cache(struct block_device *bdev) -- cgit v1.2.3 From 5543217be468268dfedf504f4969771b9a377353 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 19 Jun 2024 17:45:36 +0200 Subject: block: move the misaligned flag into the features field Move the misaligned flags into the features field to reclaim a little bit of space. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20240619154623.450048-5-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 713a98b6dbba..7ad2b1240fc0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -341,6 +341,9 @@ enum { enum { /* do not send FLUSH/FUA commands despite advertising a write cache */ BLK_FLAG_WRITE_CACHE_DISABLED = (1u << 0), + + /* I/O topology is misaligned */ + BLK_FEAT_MISALIGNED = (1u << 1), }; struct queue_limits { @@ -374,7 +377,6 @@ struct queue_limits { unsigned short max_integrity_segments; unsigned short max_discard_segments; - unsigned char misaligned; unsigned char discard_misaligned; unsigned char raid_partial_stripes_expensive; unsigned int max_open_zones; -- cgit v1.2.3 From 4cac3d3a712b5c76d462b29b73b9e58c0b6d9946 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 19 Jun 2024 17:45:37 +0200 Subject: block: remove the discard_alignment flag queue_limits.discard_alignment is never read except in the places where it is stacked into another limit. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20240619154623.450048-6-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7ad2b1240fc0..86410ce41bf6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -377,7 +377,6 @@ struct queue_limits { unsigned short max_integrity_segments; unsigned short max_discard_segments; - unsigned char discard_misaligned; unsigned char raid_partial_stripes_expensive; unsigned int max_open_zones; unsigned int max_active_zones; -- cgit v1.2.3 From 7d4dec525f5fd555037486af4d02dd3682655ba1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 19 Jun 2024 17:45:38 +0200 Subject: block: move the raid_partial_stripes_expensive flag into the features field Move the raid_partial_stripes_expensive flags into the features field to reclaim a little bit of space. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20240619154623.450048-7-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 86410ce41bf6..1fa2b148c206 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -328,6 +328,9 @@ enum { /* bounce all highmem pages */ BLK_FEAT_BOUNCE_HIGH = (1u << 14), + + /* undocumented magic for bcache */ + BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE = (1u << 15), }; /* @@ -335,7 +338,8 @@ enum { */ #define BLK_FEAT_INHERIT_MASK \ (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \ - BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | BLK_FEAT_BOUNCE_HIGH) + BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | BLK_FEAT_BOUNCE_HIGH | \ + BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE) /* internal flags in queue_limits.flags */ enum { @@ -377,7 +381,6 @@ struct queue_limits { unsigned short max_integrity_segments; unsigned short max_discard_segments; - unsigned char raid_partial_stripes_expensive; unsigned int max_open_zones; unsigned int max_active_zones; -- cgit v1.2.3 From 0ddd2ae586d28e521d37393364d989ce118802e0 Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Tue, 4 Jun 2024 16:49:34 +0800 Subject: drm/ttm: increase ttm pre-fault value to PMD size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ttm page fault handler ttm_bo_vm_fault_reserved() maps TTM_BO_VM_NUM_PREFAULT more pages beforehand due to the principle of locality. However, on some platform the page faults are more costly, this patch intends to increase the number of ttm pre-fault to relieve the number of page faults. When multiple levels of page table is supported, the new default value would be the PMD size, similar to huge page. Signed-off-by: Zhu Lingshan Reported-and-tested-by: Li Jingxiang Link: https://patchwork.freedesktop.org/patch/msgid/20240604084934.225738-1-lingshan.zhu@amd.com Reviewed-by: Christian König Signed-off-by: Christian König --- include/drm/ttm/ttm_bo.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h index 6ccf96c91f3a..ef0f52f56ebc 100644 --- a/include/drm/ttm/ttm_bo.h +++ b/include/drm/ttm/ttm_bo.h @@ -39,7 +39,11 @@ #include "ttm_device.h" /* Default number of pre-faulted pages in the TTM fault handler */ +#if CONFIG_PGTABLE_LEVELS > 2 +#define TTM_BO_VM_NUM_PREFAULT (1 << (PMD_SHIFT - PAGE_SHIFT)) +#else #define TTM_BO_VM_NUM_PREFAULT 16 +#endif struct iosys_map; -- cgit v1.2.3 From f70167a7a6e7e8a6911f3a216dc044cbfe7c1983 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 20 Jun 2024 12:53:51 +0000 Subject: block: Generalize chunk_sectors support as boundary support The purpose of the chunk_sectors limit is to ensure that a mergeble request fits within the boundary of the chunck_sector value. Such a feature will be useful for other request_queue boundary limits, so generalize the chunk_sectors merge code. This idea was proposed by Hannes Reinecke. Reviewed-by: Martin K. Petersen Signed-off-by: John Garry Reviewed-by: Hannes Reinecke Acked-by: Darrick J. Wong Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20240620125359.2684798-3-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0e8253c1507a..fb7d4c21bba8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -907,14 +907,15 @@ static inline bool bio_straddles_zones(struct bio *bio) } /* - * Return how much of the chunk is left to be used for I/O at a given offset. + * Return how much within the boundary is left to be used for I/O at a given + * offset. */ -static inline unsigned int blk_chunk_sectors_left(sector_t offset, - unsigned int chunk_sectors) +static inline unsigned int blk_boundary_sectors_left(sector_t offset, + unsigned int boundary_sectors) { - if (unlikely(!is_power_of_2(chunk_sectors))) - return chunk_sectors - sector_div(offset, chunk_sectors); - return chunk_sectors - (offset & (chunk_sectors - 1)); + if (unlikely(!is_power_of_2(boundary_sectors))) + return boundary_sectors - sector_div(offset, boundary_sectors); + return boundary_sectors - (offset & (boundary_sectors - 1)); } /** -- cgit v1.2.3 From c34fc6f26ab86d03a2d47446f42b6cd492dfdc56 Mon Sep 17 00:00:00 2001 From: Prasad Singamsetty Date: Thu, 20 Jun 2024 12:53:52 +0000 Subject: fs: Initial atomic write support An atomic write is a write issued with torn-write protection, meaning that for a power failure or any other hardware failure, all or none of the data from the write will be stored, but never a mix of old and new data. Userspace may add flag RWF_ATOMIC to pwritev2() to indicate that the write is to be issued with torn-write prevention, according to special alignment and length rules. For any syscall interface utilizing struct iocb, add IOCB_ATOMIC for iocb->ki_flags field to indicate the same. A call to statx will give the relevant atomic write info for a file: - atomic_write_unit_min - atomic_write_unit_max - atomic_write_segments_max Both min and max values must be a power-of-2. Applications can avail of atomic write feature by ensuring that the total length of a write is a power-of-2 in size and also sized between atomic_write_unit_min and atomic_write_unit_max, inclusive. Applications must ensure that the write is at a naturally-aligned offset in the file wrt the total write length. The value in atomic_write_segments_max indicates the upper limit for IOV_ITER iovcnt. Add file mode flag FMODE_CAN_ATOMIC_WRITE, so files which do not have the flag set will have RWF_ATOMIC rejected and not just ignored. Add a type argument to kiocb_set_rw_flags() to allows reads which have RWF_ATOMIC set to be rejected. Helper function generic_atomic_write_valid() can be used by FSes to verify compliant writes. There we check for iov_iter type is for ubuf, which implies iovcnt==1 for pwritev2(), which is an initial restriction for atomic_write_segments_max. Initially the only user will be bdev file operations write handler. We will rely on the block BIO submission path to ensure write sizes are compliant for the bdev, so we don't need to check atomic writes sizes yet. Signed-off-by: Prasad Singamsetty jpg: merge into single patch and much rewrite Acked-by: Darrick J. Wong Reviewed-by: Martin K. Petersen Signed-off-by: John Garry Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20240620125359.2684798-4-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/fs.h | 17 +++++++++++++++-- include/uapi/linux/fs.h | 5 ++++- 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0283cf366c2a..e049414bef7d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -125,8 +125,10 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_EXEC ((__force fmode_t)(1 << 5)) /* File writes are restricted (block device specific) */ #define FMODE_WRITE_RESTRICTED ((__force fmode_t)(1 << 6)) +/* File supports atomic writes */ +#define FMODE_CAN_ATOMIC_WRITE ((__force fmode_t)(1 << 7)) -/* FMODE_* bits 7 to 8 */ +/* FMODE_* bit 8 */ /* 32bit hashes as llseek() offset (for directories) */ #define FMODE_32BITHASH ((__force fmode_t)(1 << 9)) @@ -317,6 +319,7 @@ struct readahead_control; #define IOCB_SYNC (__force int) RWF_SYNC #define IOCB_NOWAIT (__force int) RWF_NOWAIT #define IOCB_APPEND (__force int) RWF_APPEND +#define IOCB_ATOMIC (__force int) RWF_ATOMIC /* non-RWF related bits - start at 16 */ #define IOCB_EVENTFD (1 << 16) @@ -351,6 +354,7 @@ struct readahead_control; { IOCB_SYNC, "SYNC" }, \ { IOCB_NOWAIT, "NOWAIT" }, \ { IOCB_APPEND, "APPEND" }, \ + { IOCB_ATOMIC, "ATOMIC"}, \ { IOCB_EVENTFD, "EVENTFD"}, \ { IOCB_DIRECT, "DIRECT" }, \ { IOCB_WRITE, "WRITE" }, \ @@ -3403,7 +3407,8 @@ static inline int iocb_flags(struct file *file) return res; } -static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) +static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags, + int rw_type) { int kiocb_flags = 0; @@ -3422,6 +3427,12 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) return -EOPNOTSUPP; kiocb_flags |= IOCB_NOIO; } + if (flags & RWF_ATOMIC) { + if (rw_type != WRITE) + return -EOPNOTSUPP; + if (!(ki->ki_filp->f_mode & FMODE_CAN_ATOMIC_WRITE)) + return -EOPNOTSUPP; + } kiocb_flags |= (__force int) (flags & RWF_SUPPORTED); if (flags & RWF_SYNC) kiocb_flags |= IOCB_DSYNC; @@ -3613,4 +3624,6 @@ extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len, extern int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice); +bool generic_atomic_write_valid(struct iov_iter *iter, loff_t pos); + #endif /* _LINUX_FS_H */ diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 45e4e64fd664..191a7e88a8ab 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -329,9 +329,12 @@ typedef int __bitwise __kernel_rwf_t; /* per-IO negation of O_APPEND */ #define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020) +/* Atomic Write */ +#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040) + /* mask of flags supported by the kernel */ #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ - RWF_APPEND | RWF_NOAPPEND) + RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC) /* Pagemap ioctl */ #define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg) -- cgit v1.2.3 From 0f9ca80fa4f9670ba09721e4e36b8baf086a500c Mon Sep 17 00:00:00 2001 From: Prasad Singamsetty Date: Thu, 20 Jun 2024 12:53:53 +0000 Subject: fs: Add initial atomic write support info to statx Extend statx system call to return additional info for atomic write support support for a file. Helper function generic_fill_statx_atomic_writes() can be used by FSes to fill in the relevant statx fields. For now atomic_write_segments_max will always be 1, otherwise some rules would need to be imposed on iovec length and alignment, which we don't want now. Signed-off-by: Prasad Singamsetty jpg: relocate bdev support to another patch Reviewed-by: Darrick J. Wong Reviewed-by: Martin K. Petersen Signed-off-by: John Garry Acked-by: Darrick J. Wong Link: https://lore.kernel.org/r/20240620125359.2684798-5-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/fs.h | 3 +++ include/linux/stat.h | 3 +++ include/uapi/linux/stat.h | 12 ++++++++++-- 3 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index e049414bef7d..db26b4a70c62 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3235,6 +3235,9 @@ extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *); void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); +void generic_fill_statx_atomic_writes(struct kstat *stat, + unsigned int unit_min, + unsigned int unit_max); extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int); void __inode_add_bytes(struct inode *inode, loff_t bytes); diff --git a/include/linux/stat.h b/include/linux/stat.h index bf92441dbad2..3d900c86981c 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -54,6 +54,9 @@ struct kstat { u32 dio_offset_align; u64 change_cookie; u64 subvol; + u32 atomic_write_unit_min; + u32 atomic_write_unit_max; + u32 atomic_write_segments_max; }; /* These definitions are internal to the kernel for now. Mainly used by nfsd. */ diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 67626d535316..887a25286441 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -126,9 +126,15 @@ struct statx { __u64 stx_mnt_id; __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ - __u64 stx_subvol; /* Subvolume identifier */ /* 0xa0 */ - __u64 __spare3[11]; /* Spare space for future expansion */ + __u64 stx_subvol; /* Subvolume identifier */ + __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* 0xb0 */ + __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ + __u32 __spare1[1]; + /* 0xb8 */ + __u64 __spare3[9]; /* Spare space for future expansion */ /* 0x100 */ }; @@ -157,6 +163,7 @@ struct statx { #define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ +#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ @@ -192,6 +199,7 @@ struct statx { #define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ #define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */ +#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */ #endif /* _UAPI_LINUX_STAT_H */ -- cgit v1.2.3 From 9da3d1e912f3953196e66991d75208cde3e845e1 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 20 Jun 2024 12:53:54 +0000 Subject: block: Add core atomic write support Add atomic write support, as follows: - add helper functions to get request_queue atomic write limits - report request_queue atomic write support limits to sysfs and update Doc - support to safely merge atomic writes - deal with splitting atomic writes - misc helper functions - add a per-request atomic write flag New request_queue limits are added, as follows: - atomic_write_hw_max is set by the block driver and is the maximum length of an atomic write which the device may support. It is not necessarily a power-of-2. - atomic_write_max_sectors is derived from atomic_write_hw_max_sectors and max_hw_sectors. It is always a power-of-2. Atomic writes may be merged, and atomic_write_max_sectors would be the limit on a merged atomic write request size. This value is not capped at max_sectors, as the value in max_sectors can be controlled from userspace, and it would only cause trouble if userspace could limit atomic_write_unit_max_bytes and the other atomic write limits. - atomic_write_hw_unit_{min,max} are set by the block driver and are the min/max length of an atomic write unit which the device may support. They both must be a power-of-2. Typically atomic_write_hw_unit_max will hold the same value as atomic_write_hw_max. - atomic_write_unit_{min,max} are derived from atomic_write_hw_unit_{min,max}, max_hw_sectors, and block core limits. Both min and max values must be a power-of-2. - atomic_write_hw_boundary is set by the block driver. If non-zero, it indicates an LBA space boundary at which an atomic write straddles no longer is atomically executed by the disk. The value must be a power-of-2. Note that it would be acceptable to enforce a rule that atomic_write_hw_boundary_sectors is a multiple of atomic_write_hw_unit_max, but the resultant code would be more complicated. All atomic writes limits are by default set 0 to indicate no atomic write support. Even though it is assumed by Linux that a logical block can always be atomically written, we ignore this as it is not of particular interest. Stacked devices are just not supported either for now. An atomic write must always be submitted to the block driver as part of a single request. As such, only a single BIO must be submitted to the block layer for an atomic write. When a single atomic write BIO is submitted, it cannot be split. As such, atomic_write_unit_{max, min}_bytes are limited by the maximum guaranteed BIO size which will not be required to be split. This max size is calculated by request_queue max segments and the number of bvecs a BIO can fit, BIO_MAX_VECS. Currently we rely on userspace issuing a write with iovcnt=1 for pwritev2() - as such, we can rely on each segment containing PAGE_SIZE of data, apart from the first+last, which each can fit logical block size of data. The first+last will be LBS length/aligned as we rely on direct IO alignment rules also. New sysfs files are added to report the following atomic write limits: - atomic_write_unit_max_bytes - same as atomic_write_unit_max_sectors in bytes - atomic_write_unit_min_bytes - same as atomic_write_unit_min_sectors in bytes - atomic_write_boundary_bytes - same as atomic_write_hw_boundary_sectors in bytes - atomic_write_max_bytes - same as atomic_write_max_sectors in bytes Atomic writes may only be merged with other atomic writes and only under the following conditions: - total resultant request length <= atomic_write_max_bytes - the merged write does not straddle a boundary Helper function bdev_can_atomic_write() is added to indicate whether atomic writes may be issued to a bdev. If a bdev is a partition, the partition start must be aligned with both atomic_write_unit_min_sectors and atomic_write_hw_boundary_sectors. FSes will rely on the block layer to validate that an atomic write BIO submitted will be of valid size, so add blk_validate_atomic_write_op_size() for this purpose. Userspace expects an atomic write which is of invalid size to be rejected with -EINVAL, so add BLK_STS_INVAL for this. Also use BLK_STS_INVAL for when a BIO needs to be split, as this should mean an invalid size BIO. Flag REQ_ATOMIC is used for indicating an atomic write. Co-developed-by: Himanshu Madhani Signed-off-by: Himanshu Madhani Reviewed-by: Martin K. Petersen Signed-off-by: John Garry Reviewed-by: Keith Busch Link: https://lore.kernel.org/r/20240620125359.2684798-6-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 8 ++++++- include/linux/blkdev.h | 55 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 781c4500491b..632edd71f8c6 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -162,6 +162,11 @@ typedef u16 blk_short_t; */ #define BLK_STS_DURATION_LIMIT ((__force blk_status_t)17) +/* + * Invalid size or alignment. + */ +#define BLK_STS_INVAL ((__force blk_status_t)19) + /** * blk_path_error - returns true if error may be path related * @error: status the request was completed with @@ -370,7 +375,7 @@ enum req_flag_bits { __REQ_SWAP, /* swap I/O */ __REQ_DRV, /* for driver use */ __REQ_FS_PRIVATE, /* for file system (submitter) use */ - + __REQ_ATOMIC, /* for atomic write operations */ /* * Command specific flags, keep last: */ @@ -402,6 +407,7 @@ enum req_flag_bits { #define REQ_SWAP (__force blk_opf_t)(1ULL << __REQ_SWAP) #define REQ_DRV (__force blk_opf_t)(1ULL << __REQ_DRV) #define REQ_FS_PRIVATE (__force blk_opf_t)(1ULL << __REQ_FS_PRIVATE) +#define REQ_ATOMIC (__force blk_opf_t)(1ULL << __REQ_ATOMIC) #define REQ_NOUNMAP (__force blk_opf_t)(1ULL << __REQ_NOUNMAP) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fb7d4c21bba8..4816f3b1d528 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -377,6 +377,16 @@ struct queue_limits { unsigned int discard_alignment; unsigned int zone_write_granularity; + /* atomic write limits */ + unsigned int atomic_write_hw_max; + unsigned int atomic_write_max_sectors; + unsigned int atomic_write_hw_boundary; + unsigned int atomic_write_boundary_sectors; + unsigned int atomic_write_hw_unit_min; + unsigned int atomic_write_unit_min; + unsigned int atomic_write_hw_unit_max; + unsigned int atomic_write_unit_max; + unsigned short max_segments; unsigned short max_integrity_segments; unsigned short max_discard_segments; @@ -1403,6 +1413,30 @@ static inline int queue_dma_alignment(const struct request_queue *q) return q ? q->limits.dma_alignment : 511; } +static inline unsigned int +queue_atomic_write_unit_max_bytes(const struct request_queue *q) +{ + return q->limits.atomic_write_unit_max; +} + +static inline unsigned int +queue_atomic_write_unit_min_bytes(const struct request_queue *q) +{ + return q->limits.atomic_write_unit_min; +} + +static inline unsigned int +queue_atomic_write_boundary_bytes(const struct request_queue *q) +{ + return q->limits.atomic_write_boundary_sectors << SECTOR_SHIFT; +} + +static inline unsigned int +queue_atomic_write_max_bytes(const struct request_queue *q) +{ + return q->limits.atomic_write_max_sectors << SECTOR_SHIFT; +} + static inline unsigned int bdev_dma_alignment(struct block_device *bdev) { return queue_dma_alignment(bdev_get_queue(bdev)); @@ -1644,6 +1678,27 @@ struct io_comp_batch { void (*complete)(struct io_comp_batch *); }; +static inline bool bdev_can_atomic_write(struct block_device *bdev) +{ + struct request_queue *bd_queue = bdev->bd_queue; + struct queue_limits *limits = &bd_queue->limits; + + if (!limits->atomic_write_unit_min) + return false; + + if (bdev_is_partition(bdev)) { + sector_t bd_start_sect = bdev->bd_start_sect; + unsigned int alignment = + max(limits->atomic_write_unit_min, + limits->atomic_write_hw_boundary); + + if (!IS_ALIGNED(bd_start_sect, alignment >> SECTOR_SHIFT)) + return false; + } + + return true; +} + #define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { } #endif /* _LINUX_BLKDEV_H */ -- cgit v1.2.3 From 9abcfbd235f59fb5b6379e5bc0231dad831ebace Mon Sep 17 00:00:00 2001 From: Prasad Singamsetty Date: Thu, 20 Jun 2024 12:53:55 +0000 Subject: block: Add atomic write support for statx Extend statx system call to return additional info for atomic write support support if the specified file is a block device. Reviewed-by: Martin K. Petersen Signed-off-by: Prasad Singamsetty Signed-off-by: John Garry Reviewed-by: Keith Busch Acked-by: Darrick J. Wong Reviewed-by: Darrick J. Wong Reviewed-by: Luis Chamberlain Link: https://lore.kernel.org/r/20240620125359.2684798-7-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4816f3b1d528..2800231046a9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1638,7 +1638,8 @@ int sync_blockdev(struct block_device *bdev); int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend); int sync_blockdev_nowait(struct block_device *bdev); void sync_bdevs(bool wait); -void bdev_statx_dioalign(struct inode *inode, struct kstat *stat); +void bdev_statx(struct inode *backing_inode, struct kstat *stat, + u32 request_mask); void printk_all_partitions(void); int __init early_lookup_bdev(const char *pathname, dev_t *dev); #else @@ -1656,7 +1657,8 @@ static inline int sync_blockdev_nowait(struct block_device *bdev) static inline void sync_bdevs(bool wait) { } -static inline void bdev_statx_dioalign(struct inode *inode, struct kstat *stat) +static inline void bdev_statx(struct inode *backing_inode, struct kstat *stat, + u32 request_mask) { } static inline void printk_all_partitions(void) -- cgit v1.2.3 From bf4ae8f2e6407a779c0368eb0f3e047a8333be17 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 20 Jun 2024 12:53:57 +0000 Subject: scsi: sd: Atomic write support Support is divided into two main areas: - reading VPD pages and setting sdev request_queue limits - support WRITE ATOMIC (16) command and tracing The relevant block limits VPD page need to be read to allow the block layer request_queue atomic write limits to be set. These VPD page limits are described in sbc4r22 section 6.6.4 - Block limits VPD page. There are five limits of interest: - MAXIMUM ATOMIC TRANSFER LENGTH - ATOMIC ALIGNMENT - ATOMIC TRANSFER LENGTH GRANULARITY - MAXIMUM ATOMIC TRANSFER LENGTH WITH BOUNDARY - MAXIMUM ATOMIC BOUNDARY SIZE MAXIMUM ATOMIC TRANSFER LENGTH is the maximum length for a WRITE ATOMIC (16) command. It will not be greater than the device MAXIMUM TRANSFER LENGTH. ATOMIC ALIGNMENT and ATOMIC TRANSFER LENGTH GRANULARITY are the minimum alignment and length values for an atomic write in terms of logical blocks. Unlike NVMe, SCSI does not specify an LBA space boundary, but does specify a per-IO boundary granularity. The maximum boundary size is specified in MAXIMUM ATOMIC BOUNDARY SIZE. When used, this boundary value is set in the WRITE ATOMIC (16) ATOMIC BOUNDARY field - layout for the WRITE_ATOMIC_16 command can be found in sbc4r22 section 5.48. This boundary value is the granularity size at which the device may atomically write the data. A value of zero in WRITE ATOMIC (16) ATOMIC BOUNDARY field means that all data must be atomically written together. MAXIMUM ATOMIC TRANSFER LENGTH WITH BOUNDARY is the maximum atomic write length if a non-zero boundary value is set. For atomic write support, the WRITE ATOMIC (16) boundary is not of much interest, as the block layer expects each request submitted to be executed atomically. However, the SCSI spec does leave itself open to a quirky scenario where MAXIMUM ATOMIC TRANSFER LENGTH is zero, yet MAXIMUM ATOMIC TRANSFER LENGTH WITH BOUNDARY and MAXIMUM ATOMIC BOUNDARY SIZE are both non-zero. This case will be supported. To set the block layer request_queue atomic write capabilities, sanitize the VPD page limits and set limits as follows: - atomic_write_unit_min is derived from granularity and alignment values. If no granularity value is not set, use physical block size - atomic_write_unit_max is derived from MAXIMUM ATOMIC TRANSFER LENGTH. In the scenario where MAXIMUM ATOMIC TRANSFER LENGTH is zero and boundary limits are non-zero, use MAXIMUM ATOMIC BOUNDARY SIZE for atomic_write_unit_max. New flag scsi_disk.use_atomic_write_boundary is set for this scenario. - atomic_write_boundary_bytes is set to zero always SCSI also supports a WRITE ATOMIC (32) command, which is for type 2 protection enabled. This is not going to be supported now, so check for T10_PI_TYPE2_PROTECTION when setting any request_queue limits. To handle an atomic write request, add support for WRITE ATOMIC (16) command in handler sd_setup_atomic_cmnd(). Flag use_atomic_write_boundary is checked here for encoding ATOMIC BOUNDARY field. Trace info is also added for WRITE_ATOMIC_16 command. Reviewed-by: Martin K. Petersen Signed-off-by: John Garry Acked-by: Darrick J. Wong Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20240620125359.2684798-9-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/scsi/scsi_proto.h | 1 + include/trace/events/scsi.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h index 843106e1109f..70e1262b2e20 100644 --- a/include/scsi/scsi_proto.h +++ b/include/scsi/scsi_proto.h @@ -120,6 +120,7 @@ #define WRITE_SAME_16 0x93 #define ZBC_OUT 0x94 #define ZBC_IN 0x95 +#define WRITE_ATOMIC_16 0x9c #define SERVICE_ACTION_BIDIRECTIONAL 0x9d #define SERVICE_ACTION_IN_16 0x9e #define SERVICE_ACTION_OUT_16 0x9f diff --git a/include/trace/events/scsi.h b/include/trace/events/scsi.h index 8e2d9b1b0e77..05f1945ed204 100644 --- a/include/trace/events/scsi.h +++ b/include/trace/events/scsi.h @@ -102,6 +102,7 @@ scsi_opcode_name(WRITE_32), \ scsi_opcode_name(WRITE_SAME_32), \ scsi_opcode_name(ATA_16), \ + scsi_opcode_name(WRITE_ATOMIC_16), \ scsi_opcode_name(ATA_12)) #define scsi_hostbyte_name(result) { result, #result } -- cgit v1.2.3 From 9919c5c98cb25dbf7e76aadb9beab55a2a25f830 Mon Sep 17 00:00:00 2001 From: Rafael Passos Date: Fri, 14 Jun 2024 23:24:08 -0300 Subject: bpf: remove unused parameter in bpf_jit_binary_pack_finalize Fixes a compiler warning. the bpf_jit_binary_pack_finalize function was taking an extra bpf_prog parameter that went unused. This removves it and updates the callers accordingly. Signed-off-by: Rafael Passos Link: https://lore.kernel.org/r/20240615022641.210320-2-rafael@rcpassos.me Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index b02aea291b7e..dd41a93f06b2 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1129,8 +1129,7 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **ro_image, struct bpf_binary_header **rw_hdr, u8 **rw_image, bpf_jit_fill_hole_t bpf_fill_ill_insns); -int bpf_jit_binary_pack_finalize(struct bpf_prog *prog, - struct bpf_binary_header *ro_header, +int bpf_jit_binary_pack_finalize(struct bpf_binary_header *ro_header, struct bpf_binary_header *rw_header); void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header, struct bpf_binary_header *rw_header); -- cgit v1.2.3 From ab224b9ef7c4eaa752752455ea79bd7022209d5d Mon Sep 17 00:00:00 2001 From: Rafael Passos Date: Fri, 14 Jun 2024 23:24:09 -0300 Subject: bpf: remove unused parameter in __bpf_free_used_btfs Fixes a compiler warning. The __bpf_free_used_btfs function was taking an extra unused struct bpf_prog_aux *aux param Signed-off-by: Rafael Passos Link: https://lore.kernel.org/r/20240615022641.210320-3-rafael@rcpassos.me Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f636b4998bf7..960780ef04e1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2933,8 +2933,7 @@ bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr) return ret; } -void __bpf_free_used_btfs(struct bpf_prog_aux *aux, - struct btf_mod_pair *used_btfs, u32 len); +void __bpf_free_used_btfs(struct btf_mod_pair *used_btfs, u32 len); static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) -- cgit v1.2.3 From 158ed777e330e9bf6bd592daaf1e860d965ec8b5 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 30 Apr 2024 11:43:16 +0100 Subject: firmware: qcom: scm: Add gpu_init_regs call This will used by drm/msm to initialize GPU registers that Qualcomm's firmware doesn't make writeable to the kernel. Reviewed-by: Dmitry Baryshkov Signed-off-by: Connor Abbott Reviewed-by: Konrad Dybcio Acked-by: Bjorn Andersson Link: https://lore.kernel.org/r/20240430-a750-raytracing-v3-2-7f57c5ac082d@gmail.com Signed-off-by: Bjorn Andersson --- include/linux/firmware/qcom/qcom_scm.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h index aaa19f93ac43..a221a643dc12 100644 --- a/include/linux/firmware/qcom/qcom_scm.h +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -115,6 +115,29 @@ int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, int qcom_scm_lmh_profile_change(u32 profile_id); bool qcom_scm_lmh_dcvsh_available(void); +/* + * Request TZ to program set of access controlled registers necessary + * irrespective of any features + */ +#define QCOM_SCM_GPU_ALWAYS_EN_REQ BIT(0) +/* + * Request TZ to program BCL id to access controlled register when BCL is + * enabled + */ +#define QCOM_SCM_GPU_BCL_EN_REQ BIT(1) +/* + * Request TZ to program set of access controlled register for CLX feature + * when enabled + */ +#define QCOM_SCM_GPU_CLX_EN_REQ BIT(2) +/* + * Request TZ to program tsense ids to access controlled registers for reading + * gpu temperature sensors + */ +#define QCOM_SCM_GPU_TSENSE_EN_REQ BIT(3) + +int qcom_scm_gpu_init_regs(u32 gpu_req); + #ifdef CONFIG_QCOM_QSEECOM int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id); -- cgit v1.2.3 From 9267997fa7aa0b597e8b32cb3fdfe91be1d35a83 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 5 Jun 2024 22:10:14 +0200 Subject: soc: qcom: Move some socinfo defines to the header In preparation for parsing the chip "feature code" (FC) and "product code" (PC) (essentially the parameters that let us conclusively characterize the sillicon we're running on, including various speed bins), move the socinfo version defines to the public header. Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240605-topic-smem_speedbin-v2-1-8989d7e3d176@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/soc/qcom/socinfo.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/soc/qcom/socinfo.h b/include/linux/soc/qcom/socinfo.h index e78777bb0f4a..10e0a4c287f4 100644 --- a/include/linux/soc/qcom/socinfo.h +++ b/include/linux/soc/qcom/socinfo.h @@ -12,6 +12,14 @@ #define SMEM_SOCINFO_BUILD_ID_LENGTH 32 #define SMEM_SOCINFO_CHIP_ID_LENGTH 32 +/* + * SoC version type with major number in the upper 16 bits and minor + * number in the lower 16 bits. + */ +#define SOCINFO_MAJOR(ver) (((ver) >> 16) & 0xffff) +#define SOCINFO_MINOR(ver) ((ver) & 0xffff) +#define SOCINFO_VERSION(maj, min) ((((maj) & 0xffff) << 16)|((min) & 0xffff)) + /* Socinfo SMEM item structure */ struct socinfo { __le32 fmt; -- cgit v1.2.3 From 81bbb2b891174da9301fc0d4fe9622bd4cb6a995 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 5 Jun 2024 22:10:15 +0200 Subject: soc: qcom: smem: Add a feature code getter Recent (SM8550+ ish) Qualcomm SoCs have a new mechanism for precisely identifying the specific SKU and the precise speed bin (in the general meaning of this word, anyway): a pair of values called Product Code and Feature Code. Based on this information, we can deduce the available frequencies for things such as Adreno. In the case of Adreno specifically, Pcode is useless for non-prototype SoCs. Introduce a getter for the feature code and export it. Reviewed-by: Dmitry Baryshkov Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240605-topic-smem_speedbin-v2-2-8989d7e3d176@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/soc/qcom/smem.h | 1 + include/linux/soc/qcom/socinfo.h | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) (limited to 'include') diff --git a/include/linux/soc/qcom/smem.h b/include/linux/soc/qcom/smem.h index a36a3b9d4929..0943bf419e11 100644 --- a/include/linux/soc/qcom/smem.h +++ b/include/linux/soc/qcom/smem.h @@ -13,5 +13,6 @@ int qcom_smem_get_free_space(unsigned host); phys_addr_t qcom_smem_virt_to_phys(void *p); int qcom_smem_get_soc_id(u32 *id); +int qcom_smem_get_feature_code(u32 *code); #endif diff --git a/include/linux/soc/qcom/socinfo.h b/include/linux/soc/qcom/socinfo.h index 10e0a4c287f4..608950443eee 100644 --- a/include/linux/soc/qcom/socinfo.h +++ b/include/linux/soc/qcom/socinfo.h @@ -3,6 +3,8 @@ #ifndef __QCOM_SOCINFO_H__ #define __QCOM_SOCINFO_H__ +#include + /* * SMEM item id, used to acquire handles to respective * SMEM region. @@ -82,4 +84,28 @@ struct socinfo { __le32 boot_core; }; +/* Internal feature codes */ +enum qcom_socinfo_feature_code { + /* External feature codes */ + SOCINFO_FC_UNKNOWN = 0x0, + SOCINFO_FC_AA, + SOCINFO_FC_AB, + SOCINFO_FC_AC, + SOCINFO_FC_AD, + SOCINFO_FC_AE, + SOCINFO_FC_AF, + SOCINFO_FC_AG, + SOCINFO_FC_AH, +}; + +/* Internal feature codes */ +/* Valid values: 0 <= n <= 0xf */ +#define SOCINFO_FC_Yn(n) (0xf1 + (n)) +#define SOCINFO_FC_INT_MAX SOCINFO_FC_Yn(0xf) + +/* Product codes */ +#define SOCINFO_PC_UNKNOWN 0 +#define SOCINFO_PCn(n) ((n) + 1) +#define SOCINFO_PC_RESERVE (BIT(31) - 1) + #endif -- cgit v1.2.3 From 5878853fc9389e7d0988d4b465a415cf96fd14fa Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Thu, 20 Jun 2024 14:27:20 +0200 Subject: dmaengine: Add API function dmaengine_prep_peripheral_dma_vec() This function can be used to initiate a scatter-gather DMA transfer, where the address and size of each segment is located in one entry of the dma_vec array. The major difference with dmaengine_prep_slave_sg() is that it supports specifying the lengths of each DMA transfer; as trying to override the length of the transfer with dmaengine_prep_slave_sg() is a very tedious process. The introduction of a new API function is also justified by the fact that scatterlists are on their way out. Note that dmaengine_prep_interleaved_dma() is not helpful either in that case, as it assumes that the address of each segment will be higher than the one of the previous segment, which we just cannot guarantee in case of a scatter-gather transfer. Signed-off-by: Paul Cercueil Co-developed-by: Nuno Sa Signed-off-by: Nuno Sa Link: https://lore.kernel.org/r/20240620122726.41232-2-paul@crapouillou.net Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 752dbde4cec1..9fc03068cabc 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -160,6 +160,16 @@ struct dma_interleaved_template { struct data_chunk sgl[]; }; +/** + * struct dma_vec - DMA vector + * @addr: Bus address of the start of the vector + * @len: Length in bytes of the DMA vector + */ +struct dma_vec { + dma_addr_t addr; + size_t len; +}; + /** * enum dma_ctrl_flags - DMA flags to augment operation preparation, * control completion, and communicate status. @@ -910,6 +920,10 @@ struct dma_device { struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)( struct dma_chan *chan, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_peripheral_dma_vec)( + struct dma_chan *chan, const struct dma_vec *vecs, + size_t nents, enum dma_transfer_direction direction, + unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_slave_sg)( struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, enum dma_transfer_direction direction, @@ -973,6 +987,25 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_single( dir, flags, NULL); } +/** + * dmaengine_prep_peripheral_dma_vec() - Prepare a DMA scatter-gather descriptor + * @chan: The channel to be used for this descriptor + * @vecs: The array of DMA vectors that should be transferred + * @nents: The number of DMA vectors in the array + * @dir: Specifies the direction of the data transfer + * @flags: DMA engine flags + */ +static inline struct dma_async_tx_descriptor *dmaengine_prep_peripheral_dma_vec( + struct dma_chan *chan, const struct dma_vec *vecs, size_t nents, + enum dma_transfer_direction dir, unsigned long flags) +{ + if (!chan || !chan->device || !chan->device->device_prep_peripheral_dma_vec) + return NULL; + + return chan->device->device_prep_peripheral_dma_vec(chan, vecs, nents, + dir, flags); +} + static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_sg( struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len, enum dma_transfer_direction dir, unsigned long flags) -- cgit v1.2.3 From 5f2e950755c76c6fc20fc4ebd8355671fbbd7ac0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 13 Jun 2024 16:48:36 +0200 Subject: leds: core: Introduce led_get_color_name() function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is similar to the existing led_colors[] array but is safer to use and usable by everyone. Getting string representations of color ids is useful for drivers which are handling color IDs anyways, for example for the multicolor API. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20240613-cros_ec-led-v3-1-500b50f41e0f@weissschuh.net Signed-off-by: Lee Jones --- include/linux/leds.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/leds.h b/include/linux/leds.h index 6300313c46b7..dedea965afbf 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -427,6 +427,16 @@ void led_sysfs_enable(struct led_classdev *led_cdev); int led_compose_name(struct device *dev, struct led_init_data *init_data, char *led_classdev_name); +/** + * led_get_color_name - get string representation of color ID + * @color_id: The LED_COLOR_ID_* constant + * + * Get the string name of a LED_COLOR_ID_* constant. + * + * Returns: A string constant or NULL on an invalid ID. + */ +const char *led_get_color_name(u8 color_id); + /** * led_sysfs_is_disabled - check if LED sysfs interface is disabled * @led_cdev: the LED to query -- cgit v1.2.3 From 1cbf347288702af0fe8667c0ce760afbe982a2f1 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 14 Jun 2024 11:14:18 +0300 Subject: i2c: Add nop fwnode operations Add nop variants of i2c_find_device_by_fwnode(), i2c_find_adapter_by_fwnode() and i2c_get_adapter_by_fwnode() for use without CONFIG_I2C. Signed-off-by: Sakari Ailus Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 9709537370ee..424acb98c7c2 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -960,8 +960,6 @@ int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr); #define builtin_i2c_driver(__i2c_driver) \ builtin_driver(__i2c_driver, i2c_add_driver) -#endif /* I2C */ - /* must call put_device() when done with returned i2c_client device */ struct i2c_client *i2c_find_device_by_fwnode(struct fwnode_handle *fwnode); @@ -971,6 +969,28 @@ struct i2c_adapter *i2c_find_adapter_by_fwnode(struct fwnode_handle *fwnode); /* must call i2c_put_adapter() when done with returned i2c_adapter device */ struct i2c_adapter *i2c_get_adapter_by_fwnode(struct fwnode_handle *fwnode); +#else /* I2C */ + +static inline struct i2c_client * +i2c_find_device_by_fwnode(struct fwnode_handle *fwnode) +{ + return NULL; +} + +static inline struct i2c_adapter * +i2c_find_adapter_by_fwnode(struct fwnode_handle *fwnode) +{ + return NULL; +} + +static inline struct i2c_adapter * +i2c_get_adapter_by_fwnode(struct fwnode_handle *fwnode) +{ + return NULL; +} + +#endif /* !I2C */ + #if IS_ENABLED(CONFIG_OF) /* must call put_device() when done with returned i2c_client device */ static inline struct i2c_client *of_find_i2c_device_by_node(struct device_node *node) -- cgit v1.2.3 From 6ae0092ca7adfce79336c6525cb0da561ecd4f04 Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 14 Jun 2024 14:16:05 -0700 Subject: thermal: intel: intel_tcc: Add model checks for temperature registers The register MSR_TEMPERATURE_TARGET is not architectural. Its fields may be defined differently for each processor model. TCC_OFFSET is an example of such case. Despite being specified as architectural, the registers IA32_[PACKAGE]_ THERM_STATUS have become model-specific: in recent processors, the digital temperature readout uses bits [23:16] whereas the Intel Software Developer's manual specifies bits [22:16]. Create an array of processor models and their bitmasks for TCC_OFFSET and the digital temperature readout fields. Do not include recent processors. Instead, use the bitmasks of these recent processors as default. Use these model-specific bitmasks when reading TCC_OFFSET or the temperature sensors. Initialize a model-specific data structure during subsys_initcall() to have it ready when thermal drivers are loaded. Expose the new interface intel_tcc_get_offset_mask(). The intel_tcc_cooling driver will use it. Reviewed-by: Zhang Rui Signed-off-by: Ricardo Neri Link: https://patch.msgid.link/20240614211606.5896-2-ricardo.neri-calderon@linux.intel.com Signed-off-by: Rafael J. Wysocki --- include/linux/intel_tcc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/intel_tcc.h b/include/linux/intel_tcc.h index 8ff8eabb4a98..fa788817acfc 100644 --- a/include/linux/intel_tcc.h +++ b/include/linux/intel_tcc.h @@ -14,5 +14,6 @@ int intel_tcc_get_tjmax(int cpu); int intel_tcc_get_offset(int cpu); int intel_tcc_set_offset(int cpu, int offset); int intel_tcc_get_temp(int cpu, int *temp, bool pkg); +u32 intel_tcc_get_offset_mask(void); #endif /* __INTEL_TCC_H__ */ -- cgit v1.2.3 From b6cfe2287df6e26d685af8a8a96ed1bf87bdde28 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 21 Jun 2024 12:15:05 +0900 Subject: block: Define bdev_nr_zones() as an inline function There is no need for bdev_nr_zones() to be an exported function calculating the number of zones of a block device. Instead, given that all callers use this helper with a fully initialized block device that has a gendisk, we can redefine this function as an inline helper in blkdev.h. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20240621031506.759397-3-dlemoal@kernel.org Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2800231046a9..1078a7d51295 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -673,7 +673,6 @@ static inline bool blk_queue_is_zoned(struct request_queue *q) } #ifdef CONFIG_BLK_DEV_ZONED -unsigned int bdev_nr_zones(struct block_device *bdev); static inline unsigned int disk_nr_zones(struct gendisk *disk) { @@ -687,6 +686,11 @@ static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) return sector >> ilog2(disk->queue->limits.chunk_sectors); } +static inline unsigned int bdev_nr_zones(struct block_device *bdev) +{ + return disk_nr_zones(bdev->bd_disk); +} + static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { return bdev->bd_disk->queue->limits.max_open_zones; -- cgit v1.2.3 From caaf7101c01a91a882d3da2f566579dda692367d Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 21 Jun 2024 12:15:06 +0900 Subject: block: Cleanup block device zone helpers There is no need to conditionally define on CONFIG_BLK_DEV_ZONED the inline helper functions bdev_nr_zones(), bdev_max_open_zones(), bdev_max_active_zones() and disk_zone_no() as these function will return the correct valu in all cases (zoned device or not, including when CONFIG_BLK_DEV_ZONED is not set). Furthermore, disk_nr_zones() definition can be simplified as disk->nr_zones is always 0 for regular block devices. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20240621031506.759397-4-dlemoal@kernel.org Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 44 ++++++++++++-------------------------------- 1 file changed, 12 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1078a7d51295..e89003360c17 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -673,11 +673,21 @@ static inline bool blk_queue_is_zoned(struct request_queue *q) } #ifdef CONFIG_BLK_DEV_ZONED - static inline unsigned int disk_nr_zones(struct gendisk *disk) { - return blk_queue_is_zoned(disk->queue) ? disk->nr_zones : 0; + return disk->nr_zones; +} +bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs); +#else /* CONFIG_BLK_DEV_ZONED */ +static inline unsigned int disk_nr_zones(struct gendisk *disk) +{ + return 0; +} +static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) +{ + return false; } +#endif /* CONFIG_BLK_DEV_ZONED */ static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) { @@ -701,36 +711,6 @@ static inline unsigned int bdev_max_active_zones(struct block_device *bdev) return bdev->bd_disk->queue->limits.max_active_zones; } -bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs); -#else /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int bdev_nr_zones(struct block_device *bdev) -{ - return 0; -} - -static inline unsigned int disk_nr_zones(struct gendisk *disk) -{ - return 0; -} -static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) -{ - return 0; -} -static inline unsigned int bdev_max_open_zones(struct block_device *bdev) -{ - return 0; -} - -static inline unsigned int bdev_max_active_zones(struct block_device *bdev) -{ - return 0; -} -static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) -{ - return false; -} -#endif /* CONFIG_BLK_DEV_ZONED */ - static inline unsigned int blk_queue_depth(struct request_queue *q) { if (q->queue_depth) -- cgit v1.2.3 From f80a55fa90fa76d01e3fffaa5d0413e522ab9a00 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 17 Jun 2024 09:27:27 +0200 Subject: nvme: fixup comment for nvme RDMA Provider Type PRTYPE is the provider type, not the QP service type. Fixes: eb793e2c9286 ("nvme.h: add NVMe over Fabrics definitions") Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- include/linux/nvme.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 425573202295..69ac2abf8acf 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -87,8 +87,8 @@ enum { NVMF_RDMA_QPTYPE_DATAGRAM = 2, /* Reliable Datagram */ }; -/* RDMA QP Service Type codes for Discovery Log Page entry TSAS - * RDMA_QPTYPE field +/* RDMA Provider Type codes for Discovery Log Page entry TSAS + * RDMA_PRTYPE field */ enum { NVMF_RDMA_PRTYPE_NOT_SPECIFIED = 1, /* No Provider Specified */ -- cgit v1.2.3 From 0f1f5803920d2a6b88bee950914fd37421e17170 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 17 Jun 2024 09:27:28 +0200 Subject: nvmet: make 'tsas' attribute idempotent for RDMA The RDMA transport defines values for TSAS, but it cannot be changed as we only support the 'connected' mode. So to avoid errors during reconfiguration we should allow to write the current value. Fixes: 3f123494db72 ("nvmet: make TCP sectype settable via configfs") Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- include/linux/nvme.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 69ac2abf8acf..c693ac344ec0 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -85,6 +85,7 @@ enum { enum { NVMF_RDMA_QPTYPE_CONNECTED = 1, /* Reliable Connected */ NVMF_RDMA_QPTYPE_DATAGRAM = 2, /* Reliable Datagram */ + NVMF_RDMA_QPTYPE_INVALID = 0xff, }; /* RDMA Provider Type codes for Discovery Log Page entry TSAS @@ -110,6 +111,7 @@ enum { NVMF_TCP_SECTYPE_NONE = 0, /* No Security */ NVMF_TCP_SECTYPE_TLS12 = 1, /* TLSv1.2, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */ NVMF_TCP_SECTYPE_TLS13 = 2, /* TLSv1.3, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */ + NVMF_TCP_SECTYPE_INVALID = 0xff, }; #define NVME_AQ_DEPTH 32 -- cgit v1.2.3 From 9403408e122628a6dc7154a95716f00dae3747c7 Mon Sep 17 00:00:00 2001 From: Christian Loehle Date: Mon, 17 Jun 2024 17:16:15 +0100 Subject: tick: Remove unnused tick_nohz_get_idle_calls() The function returns the idle calls counter for the current cpu and therefore usually isn't what the caller wants. It is unnused since commit 466a2b42d676 ("cpufreq: schedutil: Use idle_calls counter of the remote CPU") Signed-off-by: Christian Loehle Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240617161615.49309-1-christian.loehle@arm.com --- include/linux/tick.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/tick.h b/include/linux/tick.h index 4924a33700b7..72744638c5b0 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -139,7 +139,6 @@ extern void tick_nohz_irq_exit(void); extern bool tick_nohz_idle_got_tick(void); extern ktime_t tick_nohz_get_next_hrtimer(void); extern ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next); -extern unsigned long tick_nohz_get_idle_calls(void); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); -- cgit v1.2.3 From 4df13a6871d9e97aeeef72244e9a954c5cf11f54 Mon Sep 17 00:00:00 2001 From: Ben Segal Date: Wed, 19 Jun 2024 13:58:46 +0200 Subject: vfio/pci: Support 8-byte PCI loads and stores Many PCI adapters can benefit or even require full 64bit read and write access to their registers. In order to enable work on user-space drivers for these devices add two new variations vfio_pci_core_io{read|write}64 of the existing access methods when the architecture supports 64-bit ioreads and iowrites. Signed-off-by: Ben Segal Co-developed-by: Gerd Bayer Signed-off-by: Gerd Bayer Link: https://lore.kernel.org/r/20240619115847.1344875-3-gbayer@linux.ibm.com Signed-off-by: Alex Williamson --- include/linux/vfio_pci_core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index f87067438ed4..7b45f70f84c3 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -155,5 +155,8 @@ int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev, \ VFIO_IOREAD_DECLATION(8) VFIO_IOREAD_DECLATION(16) VFIO_IOREAD_DECLATION(32) +#ifdef ioread64 +VFIO_IOREAD_DECLATION(64) +#endif #endif /* VFIO_PCI_CORE_H */ -- cgit v1.2.3 From abe8103da3c59415b790fa260a6676c7bf252a65 Mon Sep 17 00:00:00 2001 From: Gerd Bayer Date: Wed, 19 Jun 2024 13:58:47 +0200 Subject: vfio/pci: Fix typo in macro to declare accessors Correct spelling of DECLA[RA]TION Suggested-by: Ramesh Thomas Signed-off-by: Gerd Bayer Link: https://lore.kernel.org/r/20240619115847.1344875-4-gbayer@linux.ibm.com Signed-off-by: Alex Williamson --- include/linux/vfio_pci_core.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index 7b45f70f84c3..fbb472dd99b3 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -137,26 +137,26 @@ bool vfio_pci_core_range_intersect_range(loff_t buf_start, size_t buf_cnt, loff_t *buf_offset, size_t *intersect_count, size_t *register_offset); -#define VFIO_IOWRITE_DECLATION(size) \ +#define VFIO_IOWRITE_DECLARATION(size) \ int vfio_pci_core_iowrite##size(struct vfio_pci_core_device *vdev, \ bool test_mem, u##size val, void __iomem *io); -VFIO_IOWRITE_DECLATION(8) -VFIO_IOWRITE_DECLATION(16) -VFIO_IOWRITE_DECLATION(32) +VFIO_IOWRITE_DECLARATION(8) +VFIO_IOWRITE_DECLARATION(16) +VFIO_IOWRITE_DECLARATION(32) #ifdef iowrite64 -VFIO_IOWRITE_DECLATION(64) +VFIO_IOWRITE_DECLARATION(64) #endif -#define VFIO_IOREAD_DECLATION(size) \ +#define VFIO_IOREAD_DECLARATION(size) \ int vfio_pci_core_ioread##size(struct vfio_pci_core_device *vdev, \ bool test_mem, u##size *val, void __iomem *io); -VFIO_IOREAD_DECLATION(8) -VFIO_IOREAD_DECLATION(16) -VFIO_IOREAD_DECLATION(32) +VFIO_IOREAD_DECLARATION(8) +VFIO_IOREAD_DECLARATION(16) +VFIO_IOREAD_DECLARATION(32) #ifdef ioread64 -VFIO_IOREAD_DECLATION(64) +VFIO_IOREAD_DECLARATION(64) #endif #endif /* VFIO_PCI_CORE_H */ -- cgit v1.2.3 From 90c3e2bc9ecbf3210d661fadee4c947d34a88ceb Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 30 Apr 2024 11:43:16 +0100 Subject: firmware: qcom_scm: Add gpu_init_regs call This will used by drm/msm to initialize GPU registers that Qualcomm's firmware doesn't make writeable to the kernel. Reviewed-by: Dmitry Baryshkov Signed-off-by: Connor Abbott Reviewed-by: Konrad Dybcio Acked-by: Bjorn Andersson Patchwork: https://patchwork.freedesktop.org/patch/592039/ Signed-off-by: Rob Clark --- include/linux/firmware/qcom/qcom_scm.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h index aaa19f93ac43..a221a643dc12 100644 --- a/include/linux/firmware/qcom/qcom_scm.h +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -115,6 +115,29 @@ int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val, int qcom_scm_lmh_profile_change(u32 profile_id); bool qcom_scm_lmh_dcvsh_available(void); +/* + * Request TZ to program set of access controlled registers necessary + * irrespective of any features + */ +#define QCOM_SCM_GPU_ALWAYS_EN_REQ BIT(0) +/* + * Request TZ to program BCL id to access controlled register when BCL is + * enabled + */ +#define QCOM_SCM_GPU_BCL_EN_REQ BIT(1) +/* + * Request TZ to program set of access controlled register for CLX feature + * when enabled + */ +#define QCOM_SCM_GPU_CLX_EN_REQ BIT(2) +/* + * Request TZ to program tsense ids to access controlled registers for reading + * gpu temperature sensors + */ +#define QCOM_SCM_GPU_TSENSE_EN_REQ BIT(3) + +int qcom_scm_gpu_init_regs(u32 gpu_req); + #ifdef CONFIG_QCOM_QSEECOM int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id); -- cgit v1.2.3 From 41fd54ef74b02233a419b4929d26662e5f105f46 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Tue, 30 Apr 2024 11:43:19 +0100 Subject: drm/msm: Add MSM_PARAM_RAYTRACING uapi Expose the value of the software fuse to userspace. Reviewed-by: Konrad Dybcio Reviewed-by: Dmitry Baryshkov Signed-off-by: Connor Abbott Patchwork: https://patchwork.freedesktop.org/patch/592044/ Signed-off-by: Rob Clark --- include/uapi/drm/msm_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index d8a6b3472760..3fca72f73861 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -87,6 +87,7 @@ struct drm_msm_timespec { #define MSM_PARAM_VA_START 0x0e /* RO: start of valid GPU iova range */ #define MSM_PARAM_VA_SIZE 0x0f /* RO: size of valid GPU iova range (bytes) */ #define MSM_PARAM_HIGHEST_BANK_BIT 0x10 /* RO */ +#define MSM_PARAM_RAYTRACING 0x11 /* RO */ /* For backwards compat. The original support for preemption was based on * a single ring per priority level so # of priority levels equals the # -- cgit v1.2.3 From d4e48e3dd45017abdd69a19285d197de897ef44f Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Thu, 20 Jun 2024 10:17:29 +0100 Subject: module, bpf: Store BTF base pointer in struct module ...as this will allow split BTF modules with a base BTF representation (rather than the full vmlinux BTF at time of BTF encoding) to resolve their references to kernel types in a way that is more resilient to small changes in kernel types. This will allow modules that are not built every time the kernel is to provide more resilient BTF, rather than have it invalidated every time BTF ids for core kernel types change. Fields are ordered to avoid holes in struct module. Signed-off-by: Alan Maguire Signed-off-by: Andrii Nakryiko Reviewed-by: Luis Chamberlain Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240620091733.1967885-3-alan.maguire@oracle.com --- include/linux/module.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index ffa1c603163c..b79d926cae8a 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -509,7 +509,9 @@ struct module { #endif #ifdef CONFIG_DEBUG_INFO_BTF_MODULES unsigned int btf_data_size; + unsigned int btf_base_data_size; void *btf_data; + void *btf_base_data; #endif #ifdef CONFIG_JUMP_LABEL struct jump_entry *jump_entries; -- cgit v1.2.3 From 8646db238997df36c6ad71a9d7e0b52ceee221b2 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Thu, 20 Jun 2024 10:17:31 +0100 Subject: libbpf,bpf: Share BTF relocate-related code with kernel Share relocation implementation with the kernel. As part of this, we also need the type/string iteration functions so also share btf_iter.c file. Relocation code in kernel and userspace is identical save for the impementation of the reparenting of split BTF to the relocated base BTF and retrieval of the BTF header from "struct btf"; these small functions need separate user-space and kernel implementations for the separate "struct btf"s they operate upon. One other wrinkle on the kernel side is we have to map .BTF.ids in modules as they were generated with the type ids used at BTF encoding time. btf_relocate() optionally returns an array mapping from old BTF ids to relocated ids, so we use that to fix up these references where needed for kfuncs. Signed-off-by: Alan Maguire Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240620091733.1967885-5-alan.maguire@oracle.com --- include/linux/btf.h | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index 56d91daacdba..d199fa17abb4 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -140,6 +140,7 @@ extern const struct file_operations btf_fops; const char *btf_get_name(const struct btf *btf); void btf_get(struct btf *btf); void btf_put(struct btf *btf); +const struct btf_header *btf_header(const struct btf *btf); int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_sz); struct btf *btf_get_by_fd(int fd); int btf_get_info_by_fd(const struct btf *btf, @@ -212,8 +213,10 @@ int btf_get_fd_by_id(u32 id); u32 btf_obj_id(const struct btf *btf); bool btf_is_kernel(const struct btf *btf); bool btf_is_module(const struct btf *btf); +bool btf_is_vmlinux(const struct btf *btf); struct module *btf_try_get_module(const struct btf *btf); u32 btf_nr_types(const struct btf *btf); +struct btf *btf_base_btf(const struct btf *btf); bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, const struct btf_member *m, u32 expected_offset, u32 expected_size); @@ -339,6 +342,11 @@ static inline u8 btf_int_offset(const struct btf_type *t) return BTF_INT_OFFSET(*(u32 *)(t + 1)); } +static inline __u8 btf_int_bits(const struct btf_type *t) +{ + return BTF_INT_BITS(*(__u32 *)(t + 1)); +} + static inline bool btf_type_is_scalar(const struct btf_type *t) { return btf_type_is_int(t) || btf_type_is_enum(t); @@ -478,6 +486,11 @@ static inline struct btf_param *btf_params(const struct btf_type *t) return (struct btf_param *)(t + 1); } +static inline struct btf_decl_tag *btf_decl_tag(const struct btf_type *t) +{ + return (struct btf_decl_tag *)(t + 1); +} + static inline int btf_id_cmp_func(const void *a, const void *b) { const int *pa = a, *pb = b; @@ -515,9 +528,38 @@ static inline const struct bpf_struct_ops_desc *bpf_struct_ops_find(struct btf * } #endif +enum btf_field_iter_kind { + BTF_FIELD_ITER_IDS, + BTF_FIELD_ITER_STRS, +}; + +struct btf_field_desc { + /* once-per-type offsets */ + int t_off_cnt, t_offs[2]; + /* member struct size, or zero, if no members */ + int m_sz; + /* repeated per-member offsets */ + int m_off_cnt, m_offs[1]; +}; + +struct btf_field_iter { + struct btf_field_desc desc; + void *p; + int m_idx; + int off_idx; + int vlen; +}; + #ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); +void btf_set_base_btf(struct btf *btf, const struct btf *base_btf); +int btf_relocate(struct btf *btf, const struct btf *base_btf, __u32 **map_ids); +int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t, + enum btf_field_iter_kind iter_kind); +__u32 *btf_field_iter_next(struct btf_field_iter *it); + const char *btf_name_by_offset(const struct btf *btf, u32 offset); +const char *btf_str_by_offset(const struct btf *btf, u32 offset); struct btf *btf_parse_vmlinux(void); struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog); u32 *btf_kfunc_id_set_contains(const struct btf *btf, u32 kfunc_btf_id, @@ -544,6 +586,28 @@ static inline const struct btf_type *btf_type_by_id(const struct btf *btf, { return NULL; } + +static inline void btf_set_base_btf(struct btf *btf, const struct btf *base_btf) +{ +} + +static inline int btf_relocate(void *log, struct btf *btf, const struct btf *base_btf, + __u32 **map_ids) +{ + return -EOPNOTSUPP; +} + +static inline int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t, + enum btf_field_iter_kind iter_kind) +{ + return -EOPNOTSUPP; +} + +static inline __u32 *btf_field_iter_next(struct btf_field_iter *it) +{ + return NULL; +} + static inline const char *btf_name_by_offset(const struct btf *btf, u32 offset) { -- cgit v1.2.3 From d4a0055fdc22381fa256e345095e88d134e354c5 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 21 Jun 2024 15:51:31 -0500 Subject: spi: add devm_spi_optimize_message() helper This adds a new helper function devm_spi_optimize_message() that automatically registers spi_unoptimize_message() to be called when the device is removed. Signed-off-by: David Lechner Link: https://patch.msgid.link/20240621-devm_spi_optimize_message-v1-2-3f9dcba6e95e@baylibre.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index e8e1e798924f..e6e5978f7564 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -1268,6 +1268,8 @@ static inline void spi_message_free(struct spi_message *m) extern int spi_optimize_message(struct spi_device *spi, struct spi_message *msg); extern void spi_unoptimize_message(struct spi_message *msg); +extern int devm_spi_optimize_message(struct device *dev, struct spi_device *spi, + struct spi_message *msg); extern int spi_setup(struct spi_device *spi); extern int spi_async(struct spi_device *spi, struct spi_message *message); -- cgit v1.2.3 From 532857c2a76ba3553302cf2a2cbec7679fb5b4fe Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Wed, 5 Jun 2024 12:20:46 -0500 Subject: dt-bindings: clock: sun50i-h616-ccu: Add GPADC clocks Add the required clock bindings for the GPADC. Signed-off-by: Chris Morgan Acked-by: Jernej Skrabec Reviewed-by: Andre Przywara Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20240605172049.231108-2-macroalpha82@gmail.com Signed-off-by: Chen-Yu Tsai --- include/dt-bindings/clock/sun50i-h616-ccu.h | 1 + include/dt-bindings/reset/sun50i-h616-ccu.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/sun50i-h616-ccu.h b/include/dt-bindings/clock/sun50i-h616-ccu.h index 6f8f01e67628..ebb146ab7f8c 100644 --- a/include/dt-bindings/clock/sun50i-h616-ccu.h +++ b/include/dt-bindings/clock/sun50i-h616-ccu.h @@ -112,5 +112,6 @@ #define CLK_HDCP 126 #define CLK_BUS_HDCP 127 #define CLK_PLL_SYSTEM_32K 128 +#define CLK_BUS_GPADC 129 #endif /* _DT_BINDINGS_CLK_SUN50I_H616_H_ */ diff --git a/include/dt-bindings/reset/sun50i-h616-ccu.h b/include/dt-bindings/reset/sun50i-h616-ccu.h index 1bd8bb0a11be..ed177c04afdd 100644 --- a/include/dt-bindings/reset/sun50i-h616-ccu.h +++ b/include/dt-bindings/reset/sun50i-h616-ccu.h @@ -66,5 +66,6 @@ #define RST_BUS_TVE0 57 #define RST_BUS_HDCP 58 #define RST_BUS_KEYADC 59 +#define RST_BUS_GPADC 60 #endif /* _DT_BINDINGS_RESET_SUN50I_H616_H_ */ -- cgit v1.2.3 From 06efa5f30c28eaf237247ca8c4cb46eb62cb6bd9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 22 Jun 2024 21:38:58 -0400 Subject: closures: closure_get_not_zero(), closure_return_sync() Provide new primitives for solving a lifetime issue with bcachefs btree_trans objects. closure_sync_return(): like closure_sync(), wait synchronously for any outstanding gets. like closure_return, the closure is considered "finished" and the ref left at 0. closure_get_not_zero(): get a ref on a closure if it's alive, i.e. the ref is not zero. Signed-off-by: Kent Overstreet --- include/linux/closure.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/closure.h b/include/linux/closure.h index 99155df162d0..59b8c06b11ff 100644 --- a/include/linux/closure.h +++ b/include/linux/closure.h @@ -284,6 +284,21 @@ static inline void closure_get(struct closure *cl) #endif } +/** + * closure_get_not_zero + */ +static inline bool closure_get_not_zero(struct closure *cl) +{ + unsigned old = atomic_read(&cl->remaining); + do { + if (!(old & CLOSURE_REMAINING_MASK)) + return false; + + } while (!atomic_try_cmpxchg_acquire(&cl->remaining, &old, old + 1)); + + return true; +} + /** * closure_init - Initialize a closure, setting the refcount to 1 * @cl: closure to initialize @@ -310,6 +325,12 @@ static inline void closure_init_stack(struct closure *cl) atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); } +static inline void closure_init_stack_release(struct closure *cl) +{ + memset(cl, 0, sizeof(struct closure)); + atomic_set_release(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); +} + /** * closure_wake_up - wake up all closures on a wait list, * with memory barrier @@ -355,6 +376,8 @@ do { \ */ #define closure_return(_cl) continue_at((_cl), NULL, NULL) +void closure_return_sync(struct closure *cl); + /** * continue_at_nobarrier - jump to another function without barrier * -- cgit v1.2.3 From 467cfe945656df044c8cf9121e5cdbe5b977b497 Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Mon, 19 Feb 2024 13:43:55 +0200 Subject: accel/habanalabs/gaudi2: align embedded specs headers Align embedded headers to latest release. Reviewed-by: Tomer Tayar Signed-off-by: Ofir Bitton --- include/linux/habanalabs/cpucp_if.h | 10 ++++++++-- include/linux/habanalabs/hl_boot_if.h | 29 ++++++++++++++++++++++------- 2 files changed, 30 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h index f316c8d0f3fc..1ac1d68193e3 100644 --- a/include/linux/habanalabs/cpucp_if.h +++ b/include/linux/habanalabs/cpucp_if.h @@ -42,6 +42,12 @@ enum eq_event_id { EQ_EVENT_PWR_BRK_ENTRY, EQ_EVENT_PWR_BRK_EXIT, EQ_EVENT_HEARTBEAT, + EQ_EVENT_CPLD_RESET_REASON, + EQ_EVENT_CPLD_SHUTDOWN, + EQ_EVENT_POWER_EVT_START, + EQ_EVENT_POWER_EVT_END, + EQ_EVENT_THERMAL_EVT_START, + EQ_EVENT_THERMAL_EVT_END, }; /* @@ -1165,7 +1171,7 @@ struct cpucp_security_info { struct cpucp_info { struct cpucp_sensor sensors[CPUCP_MAX_SENSORS]; __u8 kernel_version[VERSION_MAX_LEN]; - __le32 reserved; + __le32 reserved1; __le32 card_type; __le32 card_location; __le32 cpld_version; @@ -1187,7 +1193,7 @@ struct cpucp_info { __u8 substrate_version; __u8 eq_health_check_supported; struct cpucp_security_info sec_info; - __le32 fw_hbm_region_size; + __le32 reserved2; __u8 pll_map[PLL_MAP_LEN]; __le64 mme_binning_mask; __u8 fw_os_version[VERSION_MAX_LEN]; diff --git a/include/linux/habanalabs/hl_boot_if.h b/include/linux/habanalabs/hl_boot_if.h index 93366d5621fd..d2a9fc96424b 100644 --- a/include/linux/habanalabs/hl_boot_if.h +++ b/include/linux/habanalabs/hl_boot_if.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 * - * Copyright 2018-2020 HabanaLabs, Ltd. + * Copyright 2018-2023 HabanaLabs, Ltd. * All Rights Reserved. * */ @@ -49,7 +49,6 @@ enum cpu_boot_err { #define CPU_BOOT_ERR_FATAL_MASK \ ((1 << CPU_BOOT_ERR_DRAM_INIT_FAIL) | \ (1 << CPU_BOOT_ERR_PLL_FAIL) | \ - (1 << CPU_BOOT_ERR_DEVICE_UNUSABLE_FAIL) | \ (1 << CPU_BOOT_ERR_BINNING_FAIL) | \ (1 << CPU_BOOT_ERR_DRAM_SKIPPED) | \ (1 << CPU_BOOT_ERR_ENG_ARC_MEM_SCRUB_FAIL) | \ @@ -194,6 +193,8 @@ enum cpu_boot_dev_sts { CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN = 24, CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN = 25, CPU_BOOT_DEV_STS_MAP_HWMON_EN = 26, + CPU_BOOT_DEV_STS_NIC_MEM_CLEAR_EN = 27, + CPU_BOOT_DEV_STS_MMU_PGTBL_DRAM_EN = 28, CPU_BOOT_DEV_STS_ENABLED = 31, CPU_BOOT_DEV_STS_SCND_EN = 63, CPU_BOOT_DEV_STS_LAST = 64 /* we have 2 registers of 32 bits */ @@ -331,6 +332,17 @@ enum cpu_boot_dev_sts { * HWMON enum mapping to cpucp enums. * Initialized in: linux * + * CPU_BOOT_DEV_STS0_NIC_MEM_CLEAR_EN + * If set, means f/w supports nic hbm memory clear and + * tmr,txs hbm memory init. + * Initialized in: zephyr-mgmt + * + * CPU_BOOT_DEV_STS_MMU_PGTBL_DRAM_EN + * MMU page tables are located in DRAM. + * F/W initializes security settings for MMU + * page tables to reside in DRAM. + * Initialized in: zephyr-mgmt + * * CPU_BOOT_DEV_STS0_ENABLED Device status register enabled. * This is a main indication that the * running FW populates the device status @@ -367,6 +379,8 @@ enum cpu_boot_dev_sts { #define CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN (1 << CPU_BOOT_DEV_STS_FW_NIC_STAT_EXT_EN) #define CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN (1 << CPU_BOOT_DEV_STS_IS_IDLE_CHECK_EN) #define CPU_BOOT_DEV_STS0_MAP_HWMON_EN (1 << CPU_BOOT_DEV_STS_MAP_HWMON_EN) +#define CPU_BOOT_DEV_STS0_NIC_MEM_CLEAR_EN (1 << CPU_BOOT_DEV_STS_NIC_MEM_CLEAR_EN) +#define CPU_BOOT_DEV_STS0_MMU_PGTBL_DRAM_EN (1 << CPU_BOOT_DEV_STS_MMU_PGTBL_DRAM_EN) #define CPU_BOOT_DEV_STS0_ENABLED (1 << CPU_BOOT_DEV_STS_ENABLED) #define CPU_BOOT_DEV_STS1_ENABLED (1 << CPU_BOOT_DEV_STS_ENABLED) @@ -450,11 +464,11 @@ struct cpu_dyn_regs { __le32 gic_dma_core_irq_ctrl; __le32 gic_host_halt_irq; __le32 gic_host_ints_irq; - __le32 gic_host_soft_rst_irq; + __le32 reserved0; __le32 gic_rot_qm_irq_ctrl; - __le32 cpu_rst_status; + __le32 reserved1; __le32 eng_arc_irq_ctrl; - __le32 reserved1[20]; /* reserve for future use */ + __le32 reserved2[20]; /* reserve for future use */ }; /* TODO: remove the desc magic after the code is updated to use message */ @@ -551,8 +565,9 @@ enum lkd_fw_ascii_msg_lvls { LKD_FW_ASCII_MSG_DBG = 3, }; -#define LKD_FW_ASCII_MSG_MAX_LEN 128 -#define LKD_FW_ASCII_MSG_MAX 4 /* consider ABI when changing */ +#define LKD_FW_ASCII_MSG_MAX_LEN 128 +#define LKD_FW_ASCII_MSG_MAX 4 /* consider ABI when changing */ +#define LKD_FW_ASCII_MSG_MIN_DESC_VERSION 3 struct lkd_fw_ascii_msg { __u8 valid; -- cgit v1.2.3 From c2a27584ff3b7f17da3ed2abfa3956f9d376e3da Mon Sep 17 00:00:00 2001 From: Dani Liberman Date: Mon, 11 Mar 2024 18:31:10 +0200 Subject: accel/habanalabs: separate nonce from max_size in cpucp_packet struct In struct cpucp_packet both nonce and data_max_size members are in an union overlapping each other. This is a problem as they both are used in attestation and info_signed packets. The solution here is to move the nonce member to a different union under the same structure. Signed-off-by: Dani Liberman Reviewed-by: Ofir Bitton Signed-off-by: Ofir Bitton --- include/linux/habanalabs/cpucp_if.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h index 1ac1d68193e3..0913415243e8 100644 --- a/include/linux/habanalabs/cpucp_if.h +++ b/include/linux/habanalabs/cpucp_if.h @@ -859,9 +859,6 @@ struct cpucp_packet { * result cannot be used to hold general purpose data. */ __le32 status_mask; - - /* random, used once number, for security packets */ - __le32 nonce; }; union { @@ -870,6 +867,9 @@ struct cpucp_packet { /* For Generic packet sub index */ __le32 pkt_subidx; + + /* random, used once number, for security packets */ + __le32 nonce; }; }; -- cgit v1.2.3 From cebb64f9335b073cc2877fed16c613c56bed82bc Mon Sep 17 00:00:00 2001 From: Vitaly Margolin Date: Mon, 27 May 2024 17:25:37 +0300 Subject: accel/habanalabs: add cpld ts cpld_timestamp cpucp Add cpld_timestamp field to cpucp_info structure and return cpld timestamp as part of cpld version Signed-off-by: Vitaly Margolin Reviewed-by: Ofir Bitton Signed-off-by: Ofir Bitton --- include/linux/habanalabs/cpucp_if.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h index 0913415243e8..1ed17887f1a8 100644 --- a/include/linux/habanalabs/cpucp_if.h +++ b/include/linux/habanalabs/cpucp_if.h @@ -1146,6 +1146,7 @@ struct cpucp_security_info { * (0 = fully functional, 1 = lower-half is not functional, * 2 = upper-half is not functional) * @sec_info: security information + * @cpld_timestamp: CPLD programmed F/W timestamp. * @pll_map: Bit map of supported PLLs for current ASIC version. * @mme_binning_mask: MME binning mask, * bits [0:6] <==> dcore0 mme fma @@ -1193,7 +1194,7 @@ struct cpucp_info { __u8 substrate_version; __u8 eq_health_check_supported; struct cpucp_security_info sec_info; - __le32 reserved2; + __le32 cpld_timestamp; __u8 pll_map[PLL_MAP_LEN]; __le64 mme_binning_mask; __u8 fw_os_version[VERSION_MAX_LEN]; -- cgit v1.2.3 From c4548eee537ed7671170eb370c6c8db12dcfd3e8 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Tue, 16 Apr 2024 19:01:42 +0300 Subject: accel/habanalabs: dump the EQ entries headers on EQ heartbeat failure Add a dump of the EQ entries headers upon a EQ heartbeat failure. Signed-off-by: Tomer Tayar Reviewed-by: Ofir Bitton Signed-off-by: Ofir Bitton --- include/linux/habanalabs/cpucp_if.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/habanalabs/cpucp_if.h b/include/linux/habanalabs/cpucp_if.h index 1ed17887f1a8..7ed3fdd55dda 100644 --- a/include/linux/habanalabs/cpucp_if.h +++ b/include/linux/habanalabs/cpucp_if.h @@ -397,6 +397,9 @@ struct hl_eq_entry { #define EQ_CTL_READY_SHIFT 31 #define EQ_CTL_READY_MASK 0x80000000 +#define EQ_CTL_EVENT_MODE_SHIFT 28 +#define EQ_CTL_EVENT_MODE_MASK 0x70000000 + #define EQ_CTL_EVENT_TYPE_SHIFT 16 #define EQ_CTL_EVENT_TYPE_MASK 0x0FFF0000 -- cgit v1.2.3 From b7f5e0636f1d450c5df00fea194a8efd39f87a1b Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Thu, 6 Jun 2024 16:33:59 +0200 Subject: dt-bindings: clock: rk3128: Drop CLK_NR_CLKS CLK_NR_CLKS should not be part of the binding. Let's drop it, since the kernel code no longer uses it either. Signed-off-by: Alex Bee Acked-by: Krzysztof Kozlowski Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20240606143401.32454-4-knaerzche@gmail.com Signed-off-by: Heiko Stuebner --- include/dt-bindings/clock/rk3128-cru.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/rk3128-cru.h b/include/dt-bindings/clock/rk3128-cru.h index 1be455ba4985..2616a8791c14 100644 --- a/include/dt-bindings/clock/rk3128-cru.h +++ b/include/dt-bindings/clock/rk3128-cru.h @@ -145,8 +145,6 @@ #define HCLK_CRYPTO 476 #define HCLK_PERI 478 -#define CLK_NR_CLKS (HCLK_PERI + 1) - /* soft-reset indices */ #define SRST_CORE0_PO 0 #define SRST_CORE1_PO 1 -- cgit v1.2.3 From f9da49c3c4c43278d8d8fafb3df7f5514478eb28 Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Thu, 6 Jun 2024 16:34:00 +0200 Subject: dt-bindings: clock: rk3128: Add HCLK_SFC Add a clock id for SFC's AHB clock. Signed-off-by: Alex Bee Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240606143401.32454-5-knaerzche@gmail.com Signed-off-by: Heiko Stuebner --- include/dt-bindings/clock/rk3128-cru.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/clock/rk3128-cru.h b/include/dt-bindings/clock/rk3128-cru.h index 2616a8791c14..b609fcf96508 100644 --- a/include/dt-bindings/clock/rk3128-cru.h +++ b/include/dt-bindings/clock/rk3128-cru.h @@ -144,6 +144,7 @@ #define HCLK_TSP 475 #define HCLK_CRYPTO 476 #define HCLK_PERI 478 +#define HCLK_SFC 479 /* soft-reset indices */ #define SRST_CORE0_PO 0 -- cgit v1.2.3 From 84f5a7b67b61bfeb0a939ddc5eca8586cae101de Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 27 May 2024 14:54:52 +0200 Subject: firmware: qcom: add a dedicated TrustZone buffer allocator We have several SCM calls that require passing buffers to the TrustZone on top of the SMC core which allocates memory for calls that require more than 4 arguments. Currently every user does their own thing which leads to code duplication. Many users call dma_alloc_coherent() for every call which is terribly unperformant (speed- and size-wise). Provide a set of library functions for creating and managing pools of memory which is suitable for sharing with the TrustZone, that is: page-aligned, contiguous and non-cachable as well as provides a way of mapping of kernel virtual addresses to physical space. Make the allocator ready for extending with additional modes of operation which will allow us to support the SHM bridge safety mechanism once all users convert. Signed-off-by: Bartosz Golaszewski Reviewed-by: Andrew Halaney Tested-by: Andrew Halaney # sc8280xp-lenovo-thinkpad-x13s Tested-by: Deepti Jaggi #sa8775p-ride Reviewed-by: Elliot Berman Link: https://lore.kernel.org/r/20240527-shm-bridge-v10-2-ce7afaa58d3a@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/firmware/qcom/qcom_tzmem.h | 56 ++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 include/linux/firmware/qcom/qcom_tzmem.h (limited to 'include') diff --git a/include/linux/firmware/qcom/qcom_tzmem.h b/include/linux/firmware/qcom/qcom_tzmem.h new file mode 100644 index 000000000000..b83b63a0c049 --- /dev/null +++ b/include/linux/firmware/qcom/qcom_tzmem.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023-2024 Linaro Ltd. + */ + +#ifndef __QCOM_TZMEM_H +#define __QCOM_TZMEM_H + +#include +#include +#include + +struct device; +struct qcom_tzmem_pool; + +/** + * enum qcom_tzmem_policy - Policy for pool growth. + */ +enum qcom_tzmem_policy { + /**< Static pool, never grow above initial size. */ + QCOM_TZMEM_POLICY_STATIC = 1, + /**< When out of memory, add increment * current size of memory. */ + QCOM_TZMEM_POLICY_MULTIPLIER, + /**< When out of memory add as much as is needed until max_size. */ + QCOM_TZMEM_POLICY_ON_DEMAND, +}; + +/** + * struct qcom_tzmem_pool_config - TZ memory pool configuration. + * @initial_size: Number of bytes to allocate for the pool during its creation. + * @policy: Pool size growth policy. + * @increment: Used with policies that allow pool growth. + * @max_size: Size above which the pool will never grow. + */ +struct qcom_tzmem_pool_config { + size_t initial_size; + enum qcom_tzmem_policy policy; + size_t increment; + size_t max_size; +}; + +struct qcom_tzmem_pool * +qcom_tzmem_pool_new(const struct qcom_tzmem_pool_config *config); +void qcom_tzmem_pool_free(struct qcom_tzmem_pool *pool); +struct qcom_tzmem_pool * +devm_qcom_tzmem_pool_new(struct device *dev, + const struct qcom_tzmem_pool_config *config); + +void *qcom_tzmem_alloc(struct qcom_tzmem_pool *pool, size_t size, gfp_t gfp); +void qcom_tzmem_free(void *ptr); + +DEFINE_FREE(qcom_tzmem, void *, if (_T) qcom_tzmem_free(_T)) + +phys_addr_t qcom_tzmem_to_phys(void *ptr); + +#endif /* __QCOM_TZMEM */ -- cgit v1.2.3 From 6612103ec35af6058bb85ab24dae28e119b3c055 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 27 May 2024 14:54:59 +0200 Subject: firmware: qcom: qseecom: convert to using the TZ allocator Drop the DMA mapping operations from qcom_scm_qseecom_app_send() and convert all users of it in the qseecom module to using the TZ allocator for creating SCM call buffers. As this is largely a module separate from the SCM driver, let's use a separate memory pool. Set the initial size to 4K and - if we run out - add twice the current amount to the pool. Signed-off-by: Bartosz Golaszewski Reviewed-by: Elliot Berman Reviewed-by: Amirreza Zarrabi Link: https://lore.kernel.org/r/20240527-shm-bridge-v10-9-ce7afaa58d3a@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/firmware/qcom/qcom_qseecom.h | 8 ++++---- include/linux/firmware/qcom/qcom_scm.h | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/firmware/qcom/qcom_qseecom.h b/include/linux/firmware/qcom/qcom_qseecom.h index 366243ee9609..1dc5b3b50aa9 100644 --- a/include/linux/firmware/qcom/qcom_qseecom.h +++ b/include/linux/firmware/qcom/qcom_qseecom.h @@ -73,9 +73,9 @@ static inline void qseecom_dma_free(struct qseecom_client *client, size_t size, /** * qcom_qseecom_app_send() - Send to and receive data from a given QSEE app. * @client: The QSEECOM client associated with the target app. - * @req: DMA address of the request buffer sent to the app. + * @req: Request buffer sent to the app (must be TZ memory). * @req_size: Size of the request buffer. - * @rsp: DMA address of the response buffer, written to by the app. + * @rsp: Response buffer, written to by the app (must be TZ memory). * @rsp_size: Size of the response buffer. * * Sends a request to the QSEE app associated with the given client and read @@ -90,8 +90,8 @@ static inline void qseecom_dma_free(struct qseecom_client *client, size_t size, * Return: Zero on success, nonzero on failure. */ static inline int qcom_qseecom_app_send(struct qseecom_client *client, - dma_addr_t req, size_t req_size, - dma_addr_t rsp, size_t rsp_size) + void *req, size_t req_size, + void *rsp, size_t rsp_size) { return qcom_scm_qseecom_app_send(client->app_id, req, req_size, rsp, rsp_size); } diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h index a221a643dc12..77be72d40200 100644 --- a/include/linux/firmware/qcom/qcom_scm.h +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -141,8 +141,8 @@ int qcom_scm_gpu_init_regs(u32 gpu_req); #ifdef CONFIG_QCOM_QSEECOM int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id); -int qcom_scm_qseecom_app_send(u32 app_id, dma_addr_t req, size_t req_size, - dma_addr_t rsp, size_t rsp_size); +int qcom_scm_qseecom_app_send(u32 app_id, void *req, size_t req_size, + void *rsp, size_t rsp_size); #else /* CONFIG_QCOM_QSEECOM */ @@ -152,8 +152,8 @@ static inline int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id) } static inline int qcom_scm_qseecom_app_send(u32 app_id, - dma_addr_t req, size_t req_size, - dma_addr_t rsp, size_t rsp_size) + void *req, size_t req_size, + void *rsp, size_t rsp_size) { return -EINVAL; } -- cgit v1.2.3 From 178e19c0df1b1b27668fc6ca43b25a03eda01dad Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 27 May 2024 14:55:00 +0200 Subject: firmware: qcom: scm: add support for SHM bridge operations SHM Bridge is a safety mechanism allowing to limit the amount of memory shared between the kernel and the TrustZone to regions explicitly marked as such. Add low-level primitives for enabling SHM bridge support as well as creating and destroying SHM bridges to qcom-scm. Signed-off-by: Bartosz Golaszewski Acked-by: Andrew Halaney Tested-by: Andrew Halaney # sc8280xp-lenovo-thinkpad-x13s Tested-by: Deepti Jaggi #sa8775p-ride Reviewed-by: Elliot Berman Link: https://lore.kernel.org/r/20240527-shm-bridge-v10-10-ce7afaa58d3a@linaro.org Signed-off-by: Bjorn Andersson --- include/linux/firmware/qcom/qcom_scm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/firmware/qcom/qcom_scm.h b/include/linux/firmware/qcom/qcom_scm.h index 77be72d40200..9f14976399ab 100644 --- a/include/linux/firmware/qcom/qcom_scm.h +++ b/include/linux/firmware/qcom/qcom_scm.h @@ -138,6 +138,12 @@ bool qcom_scm_lmh_dcvsh_available(void); int qcom_scm_gpu_init_regs(u32 gpu_req); +int qcom_scm_shm_bridge_enable(void); +int qcom_scm_shm_bridge_create(struct device *dev, u64 pfn_and_ns_perm_flags, + u64 ipfn_and_s_perm_flags, u64 size_and_flags, + u64 ns_vmids, u64 *handle); +int qcom_scm_shm_bridge_delete(struct device *dev, u64 handle); + #ifdef CONFIG_QCOM_QSEECOM int qcom_scm_qseecom_app_get_id(const char *app_name, u32 *app_id); -- cgit v1.2.3 From ae24ba769b5c108b00cdf49ed39b189eb012fbb8 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 23 Jun 2024 17:39:00 +0900 Subject: firewire: core: undefine macros after use in tracepoints events Some macros are defined in tracepoints events. They should be back to undefined state after use. Link: https://lore.kernel.org/r/20240623083900.777897-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index d237a30d197b..25289a063deb 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -428,6 +428,14 @@ TRACE_EVENT(self_id_sequence, ) ); +#undef PHY_PACKET_SELF_ID_GET_PHY_ID +#undef PHY_PACKET_SELF_ID_GET_LINK_ACTIVE +#undef PHY_PACKET_SELF_ID_GET_GAP_COUNT +#undef PHY_PACKET_SELF_ID_GET_SCODE +#undef PHY_PACKET_SELF_ID_GET_CONTENDER +#undef PHY_PACKET_SELF_ID_GET_POWER_CLASS +#undef PHY_PACKET_SELF_ID_GET_INITIATED_RESET + #undef QUADLET_SIZE #endif // _FIREWIRE_TRACE_EVENT_H -- cgit v1.2.3 From b5bad839c01e3a2a41056ac0bc07e7b1ea0ba412 Mon Sep 17 00:00:00 2001 From: Devarsh Thakkar Date: Wed, 19 Jun 2024 01:06:48 +0530 Subject: media: v4l2-jpeg: Export reference quantization and huffman tables Export reference quantization and huffman tables as provided in ITU-T.81 so that they can be re-used by other JPEG drivers. These are example tables provided in ITU-T.81 as reference tables and the JPEG encoders are free to use either these or their own proprietary tables. Also add necessary prefixes to be used for huffman tables in global header file. Signed-off-by: Devarsh Thakkar Reviewed-by: Hans Verkuil Signed-off-by: Sebastian Fricke Signed-off-by: Hans Verkuil --- include/media/v4l2-jpeg.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include') diff --git a/include/media/v4l2-jpeg.h b/include/media/v4l2-jpeg.h index 2dba843ce3bd..b65658a02e3c 100644 --- a/include/media/v4l2-jpeg.h +++ b/include/media/v4l2-jpeg.h @@ -14,6 +14,30 @@ #define V4L2_JPEG_MAX_COMPONENTS 4 #define V4L2_JPEG_MAX_TABLES 4 +/* + * Prefixes used to generate huffman table class and destination identifiers as + * described below: + * + * V4L2_JPEG_LUM_HT | V4L2_JPEG_DC_HT : Prefix for Luma DC coefficients + * huffman table + * V4L2_JPEG_LUM_HT | V4L2_JPEG_AC_HT : Prefix for Luma AC coefficients + * huffman table + * V4L2_JPEG_CHR_HT | V4L2_JPEG_DC_HT : Prefix for Chroma DC coefficients + * huffman table + * V4L2_JPEG_CHR_HT | V4L2_JPEG_AC_HT : Prefix for Chroma AC coefficients + * huffman table + */ +#define V4L2_JPEG_LUM_HT 0x00 +#define V4L2_JPEG_CHR_HT 0x01 +#define V4L2_JPEG_DC_HT 0x00 +#define V4L2_JPEG_AC_HT 0x10 + +/* Length of reference huffman tables as provided in Table K.3 of ITU-T.81 */ +#define V4L2_JPEG_REF_HT_AC_LEN 178 +#define V4L2_JPEG_REF_HT_DC_LEN 28 + +/* Array size for 8x8 block of samples or DCT coefficient */ +#define V4L2_JPEG_PIXELS_IN_BLOCK 64 /** * struct v4l2_jpeg_reference - reference into the JPEG buffer @@ -154,4 +178,12 @@ int v4l2_jpeg_parse_quantization_tables(void *buf, size_t len, u8 precision, int v4l2_jpeg_parse_huffman_tables(void *buf, size_t len, struct v4l2_jpeg_reference *huffman_tables); +extern const u8 v4l2_jpeg_zigzag_scan_index[V4L2_JPEG_PIXELS_IN_BLOCK]; +extern const u8 v4l2_jpeg_ref_table_luma_qt[V4L2_JPEG_PIXELS_IN_BLOCK]; +extern const u8 v4l2_jpeg_ref_table_chroma_qt[V4L2_JPEG_PIXELS_IN_BLOCK]; +extern const u8 v4l2_jpeg_ref_table_luma_dc_ht[V4L2_JPEG_REF_HT_DC_LEN]; +extern const u8 v4l2_jpeg_ref_table_luma_ac_ht[V4L2_JPEG_REF_HT_AC_LEN]; +extern const u8 v4l2_jpeg_ref_table_chroma_dc_ht[V4L2_JPEG_REF_HT_DC_LEN]; +extern const u8 v4l2_jpeg_ref_table_chroma_ac_ht[V4L2_JPEG_REF_HT_AC_LEN]; + #endif -- cgit v1.2.3 From 00dd4d86ed908e70d912a96ad91d1248ff055b62 Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Fri, 21 Jun 2024 21:23:07 +0800 Subject: ASoc: tas2781: Add name_prefix as the prefix name of firmwares and kcontrol to support corresponding TAS2563/TAS2781s Add name_prefix as the prefix name of firmwares and kcontrol to support corresponding TAS2563/TAS2781s. name_prefix is not mandatory. Signed-off-by: Shenghao Ding Link: https://patch.msgid.link/20240621132309.564-1-shenghao-ding@ti.com Signed-off-by: Mark Brown --- include/sound/tas2781.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index 99ca3e401fd1..cd8ce522b78e 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -108,6 +108,7 @@ struct tasdevice_priv { unsigned char coef_binaryname[64]; unsigned char rca_binaryname[64]; unsigned char dev_name[32]; + const char *name_prefix; unsigned char ndev; unsigned int magic_num; unsigned int chip_id; -- cgit v1.2.3 From c5d1e53040efd3c630df3e2dba3df76372174aa6 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Sun, 16 Jun 2024 18:00:20 +0200 Subject: dt-bindings: clock: r8a7779: Remove duplicate newline Drop duplicate newline. No functional change. Signed-off-by: Marek Vasut Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240616160038.45937-1-marek.vasut+renesas@mailbox.org Signed-off-by: Geert Uytterhoeven --- include/dt-bindings/clock/r8a7779-clock.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/dt-bindings/clock/r8a7779-clock.h b/include/dt-bindings/clock/r8a7779-clock.h index 342a60b11934..e39acdc6499c 100644 --- a/include/dt-bindings/clock/r8a7779-clock.h +++ b/include/dt-bindings/clock/r8a7779-clock.h @@ -57,5 +57,4 @@ #define R8A7779_CLK_MMC1 30 #define R8A7779_CLK_MMC0 31 - #endif /* __DT_BINDINGS_CLOCK_R8A7779_H__ */ -- cgit v1.2.3 From d6c5fc9add9eb7a0d5bd179ab07cbf43e32b28d8 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 19 Jun 2024 13:22:46 +0200 Subject: dt-bindings: clock: rcar-gen2: Remove obsolete header files The clock definitions in were superseded by those in a long time ago. The last DTS user of these files was removed in commit 362b334b17943d84 ("ARM: dts: r8a7791: Convert to new CPG/MSSR bindings") in v4.15. Driver support for the old bindings was removed in commit 58256143cff7c2e0 ("clk: renesas: Remove R-Car Gen2 legacy DT clock support") in v5.5, so there is no point to keep on carrying these. Signed-off-by: Geert Uytterhoeven Acked-by: Conor Dooley Link: https://lore.kernel.org/d4abb688d666be35e99577a25b16958cbb4c3c98.1718796005.git.geert+renesas@glider.be --- include/dt-bindings/clock/r8a7790-clock.h | 158 ----------------------------- include/dt-bindings/clock/r8a7791-clock.h | 161 ------------------------------ include/dt-bindings/clock/r8a7792-clock.h | 98 ------------------ include/dt-bindings/clock/r8a7793-clock.h | 159 ----------------------------- include/dt-bindings/clock/r8a7794-clock.h | 137 ------------------------- 5 files changed, 713 deletions(-) delete mode 100644 include/dt-bindings/clock/r8a7790-clock.h delete mode 100644 include/dt-bindings/clock/r8a7791-clock.h delete mode 100644 include/dt-bindings/clock/r8a7792-clock.h delete mode 100644 include/dt-bindings/clock/r8a7793-clock.h delete mode 100644 include/dt-bindings/clock/r8a7794-clock.h (limited to 'include') diff --git a/include/dt-bindings/clock/r8a7790-clock.h b/include/dt-bindings/clock/r8a7790-clock.h deleted file mode 100644 index c92ff1e60223..000000000000 --- a/include/dt-bindings/clock/r8a7790-clock.h +++ /dev/null @@ -1,158 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright 2013 Ideas On Board SPRL - */ - -#ifndef __DT_BINDINGS_CLOCK_R8A7790_H__ -#define __DT_BINDINGS_CLOCK_R8A7790_H__ - -/* CPG */ -#define R8A7790_CLK_MAIN 0 -#define R8A7790_CLK_PLL0 1 -#define R8A7790_CLK_PLL1 2 -#define R8A7790_CLK_PLL3 3 -#define R8A7790_CLK_LB 4 -#define R8A7790_CLK_QSPI 5 -#define R8A7790_CLK_SDH 6 -#define R8A7790_CLK_SD0 7 -#define R8A7790_CLK_SD1 8 -#define R8A7790_CLK_Z 9 -#define R8A7790_CLK_RCAN 10 -#define R8A7790_CLK_ADSP 11 - -/* MSTP0 */ -#define R8A7790_CLK_MSIOF0 0 - -/* MSTP1 */ -#define R8A7790_CLK_VCP1 0 -#define R8A7790_CLK_VCP0 1 -#define R8A7790_CLK_VPC1 2 -#define R8A7790_CLK_VPC0 3 -#define R8A7790_CLK_JPU 6 -#define R8A7790_CLK_SSP1 9 -#define R8A7790_CLK_TMU1 11 -#define R8A7790_CLK_3DG 12 -#define R8A7790_CLK_2DDMAC 15 -#define R8A7790_CLK_FDP1_2 17 -#define R8A7790_CLK_FDP1_1 18 -#define R8A7790_CLK_FDP1_0 19 -#define R8A7790_CLK_TMU3 21 -#define R8A7790_CLK_TMU2 22 -#define R8A7790_CLK_CMT0 24 -#define R8A7790_CLK_TMU0 25 -#define R8A7790_CLK_VSP1_DU1 27 -#define R8A7790_CLK_VSP1_DU0 28 -#define R8A7790_CLK_VSP1_R 30 -#define R8A7790_CLK_VSP1_S 31 - -/* MSTP2 */ -#define R8A7790_CLK_SCIFA2 2 -#define R8A7790_CLK_SCIFA1 3 -#define R8A7790_CLK_SCIFA0 4 -#define R8A7790_CLK_MSIOF2 5 -#define R8A7790_CLK_SCIFB0 6 -#define R8A7790_CLK_SCIFB1 7 -#define R8A7790_CLK_MSIOF1 8 -#define R8A7790_CLK_MSIOF3 15 -#define R8A7790_CLK_SCIFB2 16 -#define R8A7790_CLK_SYS_DMAC1 18 -#define R8A7790_CLK_SYS_DMAC0 19 - -/* MSTP3 */ -#define R8A7790_CLK_IIC2 0 -#define R8A7790_CLK_TPU0 4 -#define R8A7790_CLK_MMCIF1 5 -#define R8A7790_CLK_SCIF2 10 -#define R8A7790_CLK_SDHI3 11 -#define R8A7790_CLK_SDHI2 12 -#define R8A7790_CLK_SDHI1 13 -#define R8A7790_CLK_SDHI0 14 -#define R8A7790_CLK_MMCIF0 15 -#define R8A7790_CLK_IIC0 18 -#define R8A7790_CLK_PCIEC 19 -#define R8A7790_CLK_IIC1 23 -#define R8A7790_CLK_SSUSB 28 -#define R8A7790_CLK_CMT1 29 -#define R8A7790_CLK_USBDMAC0 30 -#define R8A7790_CLK_USBDMAC1 31 - -/* MSTP4 */ -#define R8A7790_CLK_IRQC 7 -#define R8A7790_CLK_INTC_SYS 8 - -/* MSTP5 */ -#define R8A7790_CLK_AUDIO_DMAC1 1 -#define R8A7790_CLK_AUDIO_DMAC0 2 -#define R8A7790_CLK_ADSP_MOD 6 -#define R8A7790_CLK_THERMAL 22 -#define R8A7790_CLK_PWM 23 - -/* MSTP7 */ -#define R8A7790_CLK_EHCI 3 -#define R8A7790_CLK_HSUSB 4 -#define R8A7790_CLK_HSCIF1 16 -#define R8A7790_CLK_HSCIF0 17 -#define R8A7790_CLK_SCIF1 20 -#define R8A7790_CLK_SCIF0 21 -#define R8A7790_CLK_DU2 22 -#define R8A7790_CLK_DU1 23 -#define R8A7790_CLK_DU0 24 -#define R8A7790_CLK_LVDS1 25 -#define R8A7790_CLK_LVDS0 26 - -/* MSTP8 */ -#define R8A7790_CLK_MLB 2 -#define R8A7790_CLK_VIN3 8 -#define R8A7790_CLK_VIN2 9 -#define R8A7790_CLK_VIN1 10 -#define R8A7790_CLK_VIN0 11 -#define R8A7790_CLK_ETHERAVB 12 -#define R8A7790_CLK_ETHER 13 -#define R8A7790_CLK_SATA1 14 -#define R8A7790_CLK_SATA0 15 - -/* MSTP9 */ -#define R8A7790_CLK_GPIO5 7 -#define R8A7790_CLK_GPIO4 8 -#define R8A7790_CLK_GPIO3 9 -#define R8A7790_CLK_GPIO2 10 -#define R8A7790_CLK_GPIO1 11 -#define R8A7790_CLK_GPIO0 12 -#define R8A7790_CLK_RCAN1 15 -#define R8A7790_CLK_RCAN0 16 -#define R8A7790_CLK_QSPI_MOD 17 -#define R8A7790_CLK_IICDVFS 26 -#define R8A7790_CLK_I2C3 28 -#define R8A7790_CLK_I2C2 29 -#define R8A7790_CLK_I2C1 30 -#define R8A7790_CLK_I2C0 31 - -/* MSTP10 */ -#define R8A7790_CLK_SSI_ALL 5 -#define R8A7790_CLK_SSI9 6 -#define R8A7790_CLK_SSI8 7 -#define R8A7790_CLK_SSI7 8 -#define R8A7790_CLK_SSI6 9 -#define R8A7790_CLK_SSI5 10 -#define R8A7790_CLK_SSI4 11 -#define R8A7790_CLK_SSI3 12 -#define R8A7790_CLK_SSI2 13 -#define R8A7790_CLK_SSI1 14 -#define R8A7790_CLK_SSI0 15 -#define R8A7790_CLK_SCU_ALL 17 -#define R8A7790_CLK_SCU_DVC1 18 -#define R8A7790_CLK_SCU_DVC0 19 -#define R8A7790_CLK_SCU_CTU1_MIX1 20 -#define R8A7790_CLK_SCU_CTU0_MIX0 21 -#define R8A7790_CLK_SCU_SRC9 22 -#define R8A7790_CLK_SCU_SRC8 23 -#define R8A7790_CLK_SCU_SRC7 24 -#define R8A7790_CLK_SCU_SRC6 25 -#define R8A7790_CLK_SCU_SRC5 26 -#define R8A7790_CLK_SCU_SRC4 27 -#define R8A7790_CLK_SCU_SRC3 28 -#define R8A7790_CLK_SCU_SRC2 29 -#define R8A7790_CLK_SCU_SRC1 30 -#define R8A7790_CLK_SCU_SRC0 31 - -#endif /* __DT_BINDINGS_CLOCK_R8A7790_H__ */ diff --git a/include/dt-bindings/clock/r8a7791-clock.h b/include/dt-bindings/clock/r8a7791-clock.h deleted file mode 100644 index bb4f18b1b3d5..000000000000 --- a/include/dt-bindings/clock/r8a7791-clock.h +++ /dev/null @@ -1,161 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright 2013 Ideas On Board SPRL - */ - -#ifndef __DT_BINDINGS_CLOCK_R8A7791_H__ -#define __DT_BINDINGS_CLOCK_R8A7791_H__ - -/* CPG */ -#define R8A7791_CLK_MAIN 0 -#define R8A7791_CLK_PLL0 1 -#define R8A7791_CLK_PLL1 2 -#define R8A7791_CLK_PLL3 3 -#define R8A7791_CLK_LB 4 -#define R8A7791_CLK_QSPI 5 -#define R8A7791_CLK_SDH 6 -#define R8A7791_CLK_SD0 7 -#define R8A7791_CLK_Z 8 -#define R8A7791_CLK_RCAN 9 -#define R8A7791_CLK_ADSP 10 - -/* MSTP0 */ -#define R8A7791_CLK_MSIOF0 0 - -/* MSTP1 */ -#define R8A7791_CLK_VCP0 1 -#define R8A7791_CLK_VPC0 3 -#define R8A7791_CLK_JPU 6 -#define R8A7791_CLK_SSP1 9 -#define R8A7791_CLK_TMU1 11 -#define R8A7791_CLK_3DG 12 -#define R8A7791_CLK_2DDMAC 15 -#define R8A7791_CLK_FDP1_1 18 -#define R8A7791_CLK_FDP1_0 19 -#define R8A7791_CLK_TMU3 21 -#define R8A7791_CLK_TMU2 22 -#define R8A7791_CLK_CMT0 24 -#define R8A7791_CLK_TMU0 25 -#define R8A7791_CLK_VSP1_DU1 27 -#define R8A7791_CLK_VSP1_DU0 28 -#define R8A7791_CLK_VSP1_S 31 - -/* MSTP2 */ -#define R8A7791_CLK_SCIFA2 2 -#define R8A7791_CLK_SCIFA1 3 -#define R8A7791_CLK_SCIFA0 4 -#define R8A7791_CLK_MSIOF2 5 -#define R8A7791_CLK_SCIFB0 6 -#define R8A7791_CLK_SCIFB1 7 -#define R8A7791_CLK_MSIOF1 8 -#define R8A7791_CLK_SCIFB2 16 -#define R8A7791_CLK_SYS_DMAC1 18 -#define R8A7791_CLK_SYS_DMAC0 19 - -/* MSTP3 */ -#define R8A7791_CLK_TPU0 4 -#define R8A7791_CLK_SDHI2 11 -#define R8A7791_CLK_SDHI1 12 -#define R8A7791_CLK_SDHI0 14 -#define R8A7791_CLK_MMCIF0 15 -#define R8A7791_CLK_IIC0 18 -#define R8A7791_CLK_PCIEC 19 -#define R8A7791_CLK_IIC1 23 -#define R8A7791_CLK_SSUSB 28 -#define R8A7791_CLK_CMT1 29 -#define R8A7791_CLK_USBDMAC0 30 -#define R8A7791_CLK_USBDMAC1 31 - -/* MSTP4 */ -#define R8A7791_CLK_IRQC 7 -#define R8A7791_CLK_INTC_SYS 8 - -/* MSTP5 */ -#define R8A7791_CLK_AUDIO_DMAC1 1 -#define R8A7791_CLK_AUDIO_DMAC0 2 -#define R8A7791_CLK_ADSP_MOD 6 -#define R8A7791_CLK_THERMAL 22 -#define R8A7791_CLK_PWM 23 - -/* MSTP7 */ -#define R8A7791_CLK_EHCI 3 -#define R8A7791_CLK_HSUSB 4 -#define R8A7791_CLK_HSCIF2 13 -#define R8A7791_CLK_SCIF5 14 -#define R8A7791_CLK_SCIF4 15 -#define R8A7791_CLK_HSCIF1 16 -#define R8A7791_CLK_HSCIF0 17 -#define R8A7791_CLK_SCIF3 18 -#define R8A7791_CLK_SCIF2 19 -#define R8A7791_CLK_SCIF1 20 -#define R8A7791_CLK_SCIF0 21 -#define R8A7791_CLK_DU1 23 -#define R8A7791_CLK_DU0 24 -#define R8A7791_CLK_LVDS0 26 - -/* MSTP8 */ -#define R8A7791_CLK_IPMMU_SGX 0 -#define R8A7791_CLK_MLB 2 -#define R8A7791_CLK_VIN2 9 -#define R8A7791_CLK_VIN1 10 -#define R8A7791_CLK_VIN0 11 -#define R8A7791_CLK_ETHERAVB 12 -#define R8A7791_CLK_ETHER 13 -#define R8A7791_CLK_SATA1 14 -#define R8A7791_CLK_SATA0 15 - -/* MSTP9 */ -#define R8A7791_CLK_GYROADC 1 -#define R8A7791_CLK_GPIO7 4 -#define R8A7791_CLK_GPIO6 5 -#define R8A7791_CLK_GPIO5 7 -#define R8A7791_CLK_GPIO4 8 -#define R8A7791_CLK_GPIO3 9 -#define R8A7791_CLK_GPIO2 10 -#define R8A7791_CLK_GPIO1 11 -#define R8A7791_CLK_GPIO0 12 -#define R8A7791_CLK_RCAN1 15 -#define R8A7791_CLK_RCAN0 16 -#define R8A7791_CLK_QSPI_MOD 17 -#define R8A7791_CLK_I2C5 25 -#define R8A7791_CLK_IICDVFS 26 -#define R8A7791_CLK_I2C4 27 -#define R8A7791_CLK_I2C3 28 -#define R8A7791_CLK_I2C2 29 -#define R8A7791_CLK_I2C1 30 -#define R8A7791_CLK_I2C0 31 - -/* MSTP10 */ -#define R8A7791_CLK_SSI_ALL 5 -#define R8A7791_CLK_SSI9 6 -#define R8A7791_CLK_SSI8 7 -#define R8A7791_CLK_SSI7 8 -#define R8A7791_CLK_SSI6 9 -#define R8A7791_CLK_SSI5 10 -#define R8A7791_CLK_SSI4 11 -#define R8A7791_CLK_SSI3 12 -#define R8A7791_CLK_SSI2 13 -#define R8A7791_CLK_SSI1 14 -#define R8A7791_CLK_SSI0 15 -#define R8A7791_CLK_SCU_ALL 17 -#define R8A7791_CLK_SCU_DVC1 18 -#define R8A7791_CLK_SCU_DVC0 19 -#define R8A7791_CLK_SCU_CTU1_MIX1 20 -#define R8A7791_CLK_SCU_CTU0_MIX0 21 -#define R8A7791_CLK_SCU_SRC9 22 -#define R8A7791_CLK_SCU_SRC8 23 -#define R8A7791_CLK_SCU_SRC7 24 -#define R8A7791_CLK_SCU_SRC6 25 -#define R8A7791_CLK_SCU_SRC5 26 -#define R8A7791_CLK_SCU_SRC4 27 -#define R8A7791_CLK_SCU_SRC3 28 -#define R8A7791_CLK_SCU_SRC2 29 -#define R8A7791_CLK_SCU_SRC1 30 -#define R8A7791_CLK_SCU_SRC0 31 - -/* MSTP11 */ -#define R8A7791_CLK_SCIFA3 6 -#define R8A7791_CLK_SCIFA4 7 -#define R8A7791_CLK_SCIFA5 8 - -#endif /* __DT_BINDINGS_CLOCK_R8A7791_H__ */ diff --git a/include/dt-bindings/clock/r8a7792-clock.h b/include/dt-bindings/clock/r8a7792-clock.h deleted file mode 100644 index 2948d9ce3a14..000000000000 --- a/include/dt-bindings/clock/r8a7792-clock.h +++ /dev/null @@ -1,98 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2016 Cogent Embedded, Inc. - */ - -#ifndef __DT_BINDINGS_CLOCK_R8A7792_H__ -#define __DT_BINDINGS_CLOCK_R8A7792_H__ - -/* CPG */ -#define R8A7792_CLK_MAIN 0 -#define R8A7792_CLK_PLL0 1 -#define R8A7792_CLK_PLL1 2 -#define R8A7792_CLK_PLL3 3 -#define R8A7792_CLK_LB 4 -#define R8A7792_CLK_QSPI 5 - -/* MSTP0 */ -#define R8A7792_CLK_MSIOF0 0 - -/* MSTP1 */ -#define R8A7792_CLK_JPU 6 -#define R8A7792_CLK_TMU1 11 -#define R8A7792_CLK_TMU3 21 -#define R8A7792_CLK_TMU2 22 -#define R8A7792_CLK_CMT0 24 -#define R8A7792_CLK_TMU0 25 -#define R8A7792_CLK_VSP1DU1 27 -#define R8A7792_CLK_VSP1DU0 28 -#define R8A7792_CLK_VSP1_SY 31 - -/* MSTP2 */ -#define R8A7792_CLK_MSIOF1 8 -#define R8A7792_CLK_SYS_DMAC1 18 -#define R8A7792_CLK_SYS_DMAC0 19 - -/* MSTP3 */ -#define R8A7792_CLK_TPU0 4 -#define R8A7792_CLK_SDHI0 14 -#define R8A7792_CLK_CMT1 29 - -/* MSTP4 */ -#define R8A7792_CLK_IRQC 7 -#define R8A7792_CLK_INTC_SYS 8 - -/* MSTP5 */ -#define R8A7792_CLK_AUDIO_DMAC0 2 -#define R8A7792_CLK_THERMAL 22 -#define R8A7792_CLK_PWM 23 - -/* MSTP7 */ -#define R8A7792_CLK_HSCIF1 16 -#define R8A7792_CLK_HSCIF0 17 -#define R8A7792_CLK_SCIF3 18 -#define R8A7792_CLK_SCIF2 19 -#define R8A7792_CLK_SCIF1 20 -#define R8A7792_CLK_SCIF0 21 -#define R8A7792_CLK_DU1 23 -#define R8A7792_CLK_DU0 24 - -/* MSTP8 */ -#define R8A7792_CLK_VIN5 4 -#define R8A7792_CLK_VIN4 5 -#define R8A7792_CLK_VIN3 8 -#define R8A7792_CLK_VIN2 9 -#define R8A7792_CLK_VIN1 10 -#define R8A7792_CLK_VIN0 11 -#define R8A7792_CLK_ETHERAVB 12 - -/* MSTP9 */ -#define R8A7792_CLK_GPIO7 4 -#define R8A7792_CLK_GPIO6 5 -#define R8A7792_CLK_GPIO5 7 -#define R8A7792_CLK_GPIO4 8 -#define R8A7792_CLK_GPIO3 9 -#define R8A7792_CLK_GPIO2 10 -#define R8A7792_CLK_GPIO1 11 -#define R8A7792_CLK_GPIO0 12 -#define R8A7792_CLK_GPIO11 13 -#define R8A7792_CLK_GPIO10 14 -#define R8A7792_CLK_CAN1 15 -#define R8A7792_CLK_CAN0 16 -#define R8A7792_CLK_QSPI_MOD 17 -#define R8A7792_CLK_GPIO9 19 -#define R8A7792_CLK_GPIO8 21 -#define R8A7792_CLK_I2C5 25 -#define R8A7792_CLK_IICDVFS 26 -#define R8A7792_CLK_I2C4 27 -#define R8A7792_CLK_I2C3 28 -#define R8A7792_CLK_I2C2 29 -#define R8A7792_CLK_I2C1 30 -#define R8A7792_CLK_I2C0 31 - -/* MSTP10 */ -#define R8A7792_CLK_SSI_ALL 5 -#define R8A7792_CLK_SSI4 11 -#define R8A7792_CLK_SSI3 12 - -#endif /* __DT_BINDINGS_CLOCK_R8A7792_H__ */ diff --git a/include/dt-bindings/clock/r8a7793-clock.h b/include/dt-bindings/clock/r8a7793-clock.h deleted file mode 100644 index 49c66d8ed178..000000000000 --- a/include/dt-bindings/clock/r8a7793-clock.h +++ /dev/null @@ -1,159 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * - * r8a7793 clock definition - * - * Copyright (C) 2014 Renesas Electronics Corporation - */ - -#ifndef __DT_BINDINGS_CLOCK_R8A7793_H__ -#define __DT_BINDINGS_CLOCK_R8A7793_H__ - -/* CPG */ -#define R8A7793_CLK_MAIN 0 -#define R8A7793_CLK_PLL0 1 -#define R8A7793_CLK_PLL1 2 -#define R8A7793_CLK_PLL3 3 -#define R8A7793_CLK_LB 4 -#define R8A7793_CLK_QSPI 5 -#define R8A7793_CLK_SDH 6 -#define R8A7793_CLK_SD0 7 -#define R8A7793_CLK_Z 8 -#define R8A7793_CLK_RCAN 9 -#define R8A7793_CLK_ADSP 10 - -/* MSTP0 */ -#define R8A7793_CLK_MSIOF0 0 - -/* MSTP1 */ -#define R8A7793_CLK_VCP0 1 -#define R8A7793_CLK_VPC0 3 -#define R8A7793_CLK_SSP1 9 -#define R8A7793_CLK_TMU1 11 -#define R8A7793_CLK_3DG 12 -#define R8A7793_CLK_2DDMAC 15 -#define R8A7793_CLK_FDP1_1 18 -#define R8A7793_CLK_FDP1_0 19 -#define R8A7793_CLK_TMU3 21 -#define R8A7793_CLK_TMU2 22 -#define R8A7793_CLK_CMT0 24 -#define R8A7793_CLK_TMU0 25 -#define R8A7793_CLK_VSP1_DU1 27 -#define R8A7793_CLK_VSP1_DU0 28 -#define R8A7793_CLK_VSP1_S 31 - -/* MSTP2 */ -#define R8A7793_CLK_SCIFA2 2 -#define R8A7793_CLK_SCIFA1 3 -#define R8A7793_CLK_SCIFA0 4 -#define R8A7793_CLK_MSIOF2 5 -#define R8A7793_CLK_SCIFB0 6 -#define R8A7793_CLK_SCIFB1 7 -#define R8A7793_CLK_MSIOF1 8 -#define R8A7793_CLK_SCIFB2 16 -#define R8A7793_CLK_SYS_DMAC1 18 -#define R8A7793_CLK_SYS_DMAC0 19 - -/* MSTP3 */ -#define R8A7793_CLK_TPU0 4 -#define R8A7793_CLK_SDHI2 11 -#define R8A7793_CLK_SDHI1 12 -#define R8A7793_CLK_SDHI0 14 -#define R8A7793_CLK_MMCIF0 15 -#define R8A7793_CLK_IIC0 18 -#define R8A7793_CLK_PCIEC 19 -#define R8A7793_CLK_IIC1 23 -#define R8A7793_CLK_SSUSB 28 -#define R8A7793_CLK_CMT1 29 -#define R8A7793_CLK_USBDMAC0 30 -#define R8A7793_CLK_USBDMAC1 31 - -/* MSTP4 */ -#define R8A7793_CLK_IRQC 7 -#define R8A7793_CLK_INTC_SYS 8 - -/* MSTP5 */ -#define R8A7793_CLK_AUDIO_DMAC1 1 -#define R8A7793_CLK_AUDIO_DMAC0 2 -#define R8A7793_CLK_ADSP_MOD 6 -#define R8A7793_CLK_THERMAL 22 -#define R8A7793_CLK_PWM 23 - -/* MSTP7 */ -#define R8A7793_CLK_EHCI 3 -#define R8A7793_CLK_HSUSB 4 -#define R8A7793_CLK_HSCIF2 13 -#define R8A7793_CLK_SCIF5 14 -#define R8A7793_CLK_SCIF4 15 -#define R8A7793_CLK_HSCIF1 16 -#define R8A7793_CLK_HSCIF0 17 -#define R8A7793_CLK_SCIF3 18 -#define R8A7793_CLK_SCIF2 19 -#define R8A7793_CLK_SCIF1 20 -#define R8A7793_CLK_SCIF0 21 -#define R8A7793_CLK_DU1 23 -#define R8A7793_CLK_DU0 24 -#define R8A7793_CLK_LVDS0 26 - -/* MSTP8 */ -#define R8A7793_CLK_IPMMU_SGX 0 -#define R8A7793_CLK_VIN2 9 -#define R8A7793_CLK_VIN1 10 -#define R8A7793_CLK_VIN0 11 -#define R8A7793_CLK_ETHER 13 -#define R8A7793_CLK_SATA1 14 -#define R8A7793_CLK_SATA0 15 - -/* MSTP9 */ -#define R8A7793_CLK_GPIO7 4 -#define R8A7793_CLK_GPIO6 5 -#define R8A7793_CLK_GPIO5 7 -#define R8A7793_CLK_GPIO4 8 -#define R8A7793_CLK_GPIO3 9 -#define R8A7793_CLK_GPIO2 10 -#define R8A7793_CLK_GPIO1 11 -#define R8A7793_CLK_GPIO0 12 -#define R8A7793_CLK_RCAN1 15 -#define R8A7793_CLK_RCAN0 16 -#define R8A7793_CLK_QSPI_MOD 17 -#define R8A7793_CLK_I2C5 25 -#define R8A7793_CLK_IICDVFS 26 -#define R8A7793_CLK_I2C4 27 -#define R8A7793_CLK_I2C3 28 -#define R8A7793_CLK_I2C2 29 -#define R8A7793_CLK_I2C1 30 -#define R8A7793_CLK_I2C0 31 - -/* MSTP10 */ -#define R8A7793_CLK_SSI_ALL 5 -#define R8A7793_CLK_SSI9 6 -#define R8A7793_CLK_SSI8 7 -#define R8A7793_CLK_SSI7 8 -#define R8A7793_CLK_SSI6 9 -#define R8A7793_CLK_SSI5 10 -#define R8A7793_CLK_SSI4 11 -#define R8A7793_CLK_SSI3 12 -#define R8A7793_CLK_SSI2 13 -#define R8A7793_CLK_SSI1 14 -#define R8A7793_CLK_SSI0 15 -#define R8A7793_CLK_SCU_ALL 17 -#define R8A7793_CLK_SCU_DVC1 18 -#define R8A7793_CLK_SCU_DVC0 19 -#define R8A7793_CLK_SCU_CTU1_MIX1 20 -#define R8A7793_CLK_SCU_CTU0_MIX0 21 -#define R8A7793_CLK_SCU_SRC9 22 -#define R8A7793_CLK_SCU_SRC8 23 -#define R8A7793_CLK_SCU_SRC7 24 -#define R8A7793_CLK_SCU_SRC6 25 -#define R8A7793_CLK_SCU_SRC5 26 -#define R8A7793_CLK_SCU_SRC4 27 -#define R8A7793_CLK_SCU_SRC3 28 -#define R8A7793_CLK_SCU_SRC2 29 -#define R8A7793_CLK_SCU_SRC1 30 -#define R8A7793_CLK_SCU_SRC0 31 - -/* MSTP11 */ -#define R8A7793_CLK_SCIFA3 6 -#define R8A7793_CLK_SCIFA4 7 -#define R8A7793_CLK_SCIFA5 8 - -#endif /* __DT_BINDINGS_CLOCK_R8A7793_H__ */ diff --git a/include/dt-bindings/clock/r8a7794-clock.h b/include/dt-bindings/clock/r8a7794-clock.h deleted file mode 100644 index 649f005782d0..000000000000 --- a/include/dt-bindings/clock/r8a7794-clock.h +++ /dev/null @@ -1,137 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ - * - * Copyright (C) 2014 Renesas Electronics Corporation - * Copyright 2013 Ideas On Board SPRL - */ - -#ifndef __DT_BINDINGS_CLOCK_R8A7794_H__ -#define __DT_BINDINGS_CLOCK_R8A7794_H__ - -/* CPG */ -#define R8A7794_CLK_MAIN 0 -#define R8A7794_CLK_PLL0 1 -#define R8A7794_CLK_PLL1 2 -#define R8A7794_CLK_PLL3 3 -#define R8A7794_CLK_LB 4 -#define R8A7794_CLK_QSPI 5 -#define R8A7794_CLK_SDH 6 -#define R8A7794_CLK_SD0 7 -#define R8A7794_CLK_RCAN 8 - -/* MSTP0 */ -#define R8A7794_CLK_MSIOF0 0 - -/* MSTP1 */ -#define R8A7794_CLK_VCP0 1 -#define R8A7794_CLK_VPC0 3 -#define R8A7794_CLK_TMU1 11 -#define R8A7794_CLK_3DG 12 -#define R8A7794_CLK_2DDMAC 15 -#define R8A7794_CLK_FDP1_0 19 -#define R8A7794_CLK_TMU3 21 -#define R8A7794_CLK_TMU2 22 -#define R8A7794_CLK_CMT0 24 -#define R8A7794_CLK_TMU0 25 -#define R8A7794_CLK_VSP1_DU0 28 -#define R8A7794_CLK_VSP1_S 31 - -/* MSTP2 */ -#define R8A7794_CLK_SCIFA2 2 -#define R8A7794_CLK_SCIFA1 3 -#define R8A7794_CLK_SCIFA0 4 -#define R8A7794_CLK_MSIOF2 5 -#define R8A7794_CLK_SCIFB0 6 -#define R8A7794_CLK_SCIFB1 7 -#define R8A7794_CLK_MSIOF1 8 -#define R8A7794_CLK_SCIFB2 16 -#define R8A7794_CLK_SYS_DMAC1 18 -#define R8A7794_CLK_SYS_DMAC0 19 - -/* MSTP3 */ -#define R8A7794_CLK_SDHI2 11 -#define R8A7794_CLK_SDHI1 12 -#define R8A7794_CLK_SDHI0 14 -#define R8A7794_CLK_MMCIF0 15 -#define R8A7794_CLK_IIC0 18 -#define R8A7794_CLK_IIC1 23 -#define R8A7794_CLK_CMT1 29 -#define R8A7794_CLK_USBDMAC0 30 -#define R8A7794_CLK_USBDMAC1 31 - -/* MSTP4 */ -#define R8A7794_CLK_IRQC 7 -#define R8A7794_CLK_INTC_SYS 8 - -/* MSTP5 */ -#define R8A7794_CLK_AUDIO_DMAC0 2 -#define R8A7794_CLK_PWM 23 - -/* MSTP7 */ -#define R8A7794_CLK_EHCI 3 -#define R8A7794_CLK_HSUSB 4 -#define R8A7794_CLK_HSCIF2 13 -#define R8A7794_CLK_SCIF5 14 -#define R8A7794_CLK_SCIF4 15 -#define R8A7794_CLK_HSCIF1 16 -#define R8A7794_CLK_HSCIF0 17 -#define R8A7794_CLK_SCIF3 18 -#define R8A7794_CLK_SCIF2 19 -#define R8A7794_CLK_SCIF1 20 -#define R8A7794_CLK_SCIF0 21 -#define R8A7794_CLK_DU1 23 -#define R8A7794_CLK_DU0 24 - -/* MSTP8 */ -#define R8A7794_CLK_VIN1 10 -#define R8A7794_CLK_VIN0 11 -#define R8A7794_CLK_ETHERAVB 12 -#define R8A7794_CLK_ETHER 13 - -/* MSTP9 */ -#define R8A7794_CLK_GPIO6 5 -#define R8A7794_CLK_GPIO5 7 -#define R8A7794_CLK_GPIO4 8 -#define R8A7794_CLK_GPIO3 9 -#define R8A7794_CLK_GPIO2 10 -#define R8A7794_CLK_GPIO1 11 -#define R8A7794_CLK_GPIO0 12 -#define R8A7794_CLK_RCAN1 15 -#define R8A7794_CLK_RCAN0 16 -#define R8A7794_CLK_QSPI_MOD 17 -#define R8A7794_CLK_I2C5 25 -#define R8A7794_CLK_I2C4 27 -#define R8A7794_CLK_I2C3 28 -#define R8A7794_CLK_I2C2 29 -#define R8A7794_CLK_I2C1 30 -#define R8A7794_CLK_I2C0 31 - -/* MSTP10 */ -#define R8A7794_CLK_SSI_ALL 5 -#define R8A7794_CLK_SSI9 6 -#define R8A7794_CLK_SSI8 7 -#define R8A7794_CLK_SSI7 8 -#define R8A7794_CLK_SSI6 9 -#define R8A7794_CLK_SSI5 10 -#define R8A7794_CLK_SSI4 11 -#define R8A7794_CLK_SSI3 12 -#define R8A7794_CLK_SSI2 13 -#define R8A7794_CLK_SSI1 14 -#define R8A7794_CLK_SSI0 15 -#define R8A7794_CLK_SCU_ALL 17 -#define R8A7794_CLK_SCU_DVC1 18 -#define R8A7794_CLK_SCU_DVC0 19 -#define R8A7794_CLK_SCU_CTU1_MIX1 20 -#define R8A7794_CLK_SCU_CTU0_MIX0 21 -#define R8A7794_CLK_SCU_SRC6 25 -#define R8A7794_CLK_SCU_SRC5 26 -#define R8A7794_CLK_SCU_SRC4 27 -#define R8A7794_CLK_SCU_SRC3 28 -#define R8A7794_CLK_SCU_SRC2 29 -#define R8A7794_CLK_SCU_SRC1 30 - -/* MSTP11 */ -#define R8A7794_CLK_SCIFA3 6 -#define R8A7794_CLK_SCIFA4 7 -#define R8A7794_CLK_SCIFA5 8 - -#endif /* __DT_BINDINGS_CLOCK_R8A7794_H__ */ -- cgit v1.2.3 From 2f50304e9efb69604040feadc13f9590be8cd391 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Tue, 4 Jun 2024 18:05:13 +0100 Subject: serial: sh-sci: Add support for RZ/V2H(P) SoC Add serial support for RZ/V2H(P) SoC with earlycon. The SCIF interface in the Renesas RZ/V2H(P) is similar to that available in the RZ/G2L (R9A07G044) SoC, with the following differences: - RZ/V2H(P) SoC has three additional interrupts: one for Tx end/Rx ready and two for Rx and Tx buffer full, all of which are edge-triggered. - RZ/V2H(P) supports asynchronous mode, whereas RZ/G2L supports both synchronous and asynchronous modes. - There are differences in the configuration of certain registers such as SCSMR, SCFCR, and SCSPTR between the two SoCs. To handle these differences on RZ/V2H(P) SoC SCIx_RZV2H_SCIF_REGTYPE is added. Signed-off-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240604170513.522631-6-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_sci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h index 1c89611e0e06..0f2f50b8d28e 100644 --- a/include/linux/serial_sci.h +++ b/include/linux/serial_sci.h @@ -37,6 +37,7 @@ enum { SCIx_SH7705_SCIF_REGTYPE, SCIx_HSCIF_REGTYPE, SCIx_RZ_SCIFA_REGTYPE, + SCIx_RZV2H_SCIF_REGTYPE, SCIx_NR_REGTYPES, }; -- cgit v1.2.3 From c5603e2a621dac10c5e21cc430848ebcfa6c7e01 Mon Sep 17 00:00:00 2001 From: Doug Brown Date: Thu, 6 Jun 2024 12:56:31 -0700 Subject: Revert "serial: core: only stop transmit when HW fifo is empty" This reverts commit 7bfb915a597a301abb892f620fe5c283a9fdbd77. This commit broke pxa and omap-serial, because it inhibited them from calling stop_tx() if their TX FIFOs weren't completely empty. This resulted in these two drivers hanging during transmits because the TX interrupt would stay enabled, and a new TX interrupt would never fire. Cc: stable@vger.kernel.org Fixes: 7bfb915a597a ("serial: core: only stop transmit when HW fifo is empty") Signed-off-by: Doug Brown Link: https://lore.kernel.org/r/20240606195632.173255-2-doug@schmorgal.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 8cb65f50e830..3fb9a29e025f 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -811,8 +811,7 @@ enum UART_TX_FLAGS { if (pending < WAKEUP_CHARS) { \ uart_write_wakeup(__port); \ \ - if (!((flags) & UART_TX_NOSTOP) && pending == 0 && \ - __port->ops->tx_empty(__port)) \ + if (!((flags) & UART_TX_NOSTOP) && pending == 0) \ __port->ops->stop_tx(__port); \ } \ \ -- cgit v1.2.3 From 9bb43b9e8d9a288a214e9b17acc9e46fda3977cf Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Thu, 6 Jun 2024 12:56:32 -0700 Subject: serial: core: introduce uart_port_tx_limited_flags() Analogue to uart_port_tx_flags() introduced in commit 3ee07964d407 ("serial: core: introduce uart_port_tx_flags()"), add a _flags variant for uart_port_tx_limited(). Fixes: d11cc8c3c4b6 ("tty: serial: use uart_port_tx_limited()") Cc: stable@vger.kernel.org Signed-off-by: Jonas Gorski Signed-off-by: Doug Brown Link: https://lore.kernel.org/r/20240606195632.173255-3-doug@schmorgal.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 3fb9a29e025f..aea25eef9a1a 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -850,6 +850,24 @@ enum UART_TX_FLAGS { __count--); \ }) +/** + * uart_port_tx_limited_flags -- transmit helper for uart_port with count limiting with flags + * @port: uart port + * @ch: variable to store a character to be written to the HW + * @flags: %UART_TX_NOSTOP or similar + * @count: a limit of characters to send + * @tx_ready: can HW accept more data function + * @put_char: function to write a character + * @tx_done: function to call after the loop is done + * + * See uart_port_tx_limited() for more details. + */ +#define uart_port_tx_limited_flags(port, ch, flags, count, tx_ready, put_char, tx_done) ({ \ + unsigned int __count = (count); \ + __uart_port_tx(port, ch, flags, tx_ready, put_char, tx_done, __count, \ + __count--); \ +}) + /** * uart_port_tx -- transmit helper for uart_port * @port: uart port -- cgit v1.2.3 From 50cf5f3842af3135b88b041890e7e12a74425fcb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 6 Jun 2024 12:25:01 -0600 Subject: io_uring/msg_ring: add an alloc cache for io_kiocb entries With slab accounting, allocating and freeing memory has considerable overhead. Add a basic alloc cache for the io_kiocb allocations that msg_ring needs to do. Unlike other caches, this one is used by the sender, grabbing it from the remote ring. When the remote ring gets the posted completion, it'll free it locally. Hence it is separately locked, using ctx->msg_lock. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 1052a68fd68d..ede42dce1506 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -397,6 +397,9 @@ struct io_ring_ctx { struct callback_head poll_wq_task_work; struct list_head defer_list; + struct io_alloc_cache msg_cache; + spinlock_t msg_lock; + #ifdef CONFIG_NET_RX_BUSY_POLL struct list_head napi_list; /* track busy poll napi_id */ spinlock_t napi_lock; /* napi_list lock */ -- cgit v1.2.3 From 89f415b99050fbf7bcf3fca0cde30c09b62265eb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 20 Jun 2024 17:39:45 +0200 Subject: mfd: tmio: Remove obsolete .set_clk_div() callback Commit bef64d2908e825c5 ("mmc: remove tmio_mmc driver") removed the last user of the .set_clk_div() callback in the tmio_mmc_data structure. Signed-off-by: Geert Uytterhoeven Reviewed-by: Wolfram Sang Tested-by: Wolfram Sang Reviewed-by: Lad Prabhakar Acked-by: Lee Jones Link: https://lore.kernel.org/r/e0fa98f138a7b2836128178f8b3a757978517307.1718897545.git.geert+renesas@glider.be Signed-off-by: Ulf Hansson --- include/linux/mfd/tmio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index eace8ea6cda0..aca74ac1ff69 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -101,7 +101,6 @@ struct tmio_mmc_data { unsigned int max_blk_count; unsigned short max_segs; void (*set_pwr)(struct platform_device *host, int state); - void (*set_clk_div)(struct platform_device *host, int state); }; /* -- cgit v1.2.3 From f86937afb446d056e9e385da05536eb26483874c Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 20 Jun 2024 17:39:46 +0200 Subject: mmc: tmio: Remove obsolete .set_pwr() callback() Commit ca78476e4888f1f1 ("mfd: Remove toshiba tmio drivers") removed the last users of the .set_pwr() callback in the tmio_mmc_data structure. Remove the callback, and all related infrastructure. Signed-off-by: Geert Uytterhoeven Acked-by: Lee Jones Reviewed-by: Wolfram Sang Tested-by: Wolfram Sang Reviewed-by: Lad Prabhakar Link: https://lore.kernel.org/r/fbbc13ddd19df2c40933ffa3b82fb14841bf1d4c.1718897545.git.geert+renesas@glider.be --- include/linux/mfd/tmio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index aca74ac1ff69..8c09d14a3a28 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -100,7 +100,6 @@ struct tmio_mmc_data { dma_addr_t dma_rx_offset; unsigned int max_blk_count; unsigned short max_segs; - void (*set_pwr)(struct platform_device *host, int state); }; /* -- cgit v1.2.3 From 4b8e88e563b5f666446d002ad0dc1e6e8e7102b0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Jun 2024 11:34:09 +0200 Subject: ftruncate: pass a signed offset The old ftruncate() syscall, using the 32-bit off_t misses a sign extension when called in compat mode on 64-bit architectures. As a result, passing a negative length accidentally succeeds in truncating to file size between 2GiB and 4GiB. Changing the type of the compat syscall to the signed compat_off_t changes the behavior so it instead returns -EINVAL. The native entry point, the truncate() syscall and the corresponding loff_t based variants are all correct already and do not suffer from this mistake. Fixes: 3f6d078d4acc ("fix compat truncate/ftruncate") Reviewed-by: Christian Brauner Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann --- include/linux/compat.h | 2 +- include/linux/syscalls.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/compat.h b/include/linux/compat.h index 233f61ec8afc..56cebaff0c91 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -608,7 +608,7 @@ asmlinkage long compat_sys_fstatfs(unsigned int fd, asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf); asmlinkage long compat_sys_truncate(const char __user *, compat_off_t); -asmlinkage long compat_sys_ftruncate(unsigned int, compat_ulong_t); +asmlinkage long compat_sys_ftruncate(unsigned int, compat_off_t); /* No generic prototype for truncate64, ftruncate64, fallocate */ asmlinkage long compat_sys_openat(int dfd, const char __user *filename, int flags, umode_t mode); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 9104952d323d..ba9337709878 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -418,7 +418,7 @@ asmlinkage long sys_listmount(const struct mnt_id_req __user *req, u64 __user *mnt_ids, size_t nr_mnt_ids, unsigned int flags); asmlinkage long sys_truncate(const char __user *path, long length); -asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length); +asmlinkage long sys_ftruncate(unsigned int fd, off_t length); #if BITS_PER_LONG == 32 asmlinkage long sys_truncate64(const char __user *path, loff_t length); asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length); -- cgit v1.2.3 From 118d777c4cb40f44e7b675955fd2da8b22f83913 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 17 Jun 2024 12:18:37 +0100 Subject: wordpart.h: Add REPEAT_BYTE_U32() In some cases it's necessary to replicate a byte across a u32 value, for which REPEAT_BYTE() would be helpful. Currently this requires explicit masking of the result to avoid sparse warnings, as e.g. (u32)REPEAT_BYTE(0xa0)) ... will result in a warning: cast truncates bits from constant value (a0a0a0a0a0a0a0a0 becomes a0a0a0a0) Add a new REPEAT_BYTE_U32() which does the necessary masking internally, so that we don't need to duplicate this for every usage. Signed-off-by: Mark Rutland Cc: Alexandru Elisei Cc: Marc Zyngier Cc: Thomas Gleixner Cc: Will Deacon Link: https://lore.kernel.org/r/20240617111841.2529370-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas Acked-by: Thomas Gleixner --- include/linux/wordpart.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/wordpart.h b/include/linux/wordpart.h index 4ca1ba66d2f0..5a7b97bb7c95 100644 --- a/include/linux/wordpart.h +++ b/include/linux/wordpart.h @@ -39,6 +39,14 @@ */ #define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) +/** + * REPEAT_BYTE_U32 - repeat the value @x multiple times as a u32 value + * @x: value to repeat + * + * NOTE: @x is not checked for > 0xff; larger values produce odd results. + */ +#define REPEAT_BYTE_U32(x) lower_32_bits(REPEAT_BYTE(x)) + /* Set bits in the first 'n' bytes when loaded from memory */ #ifdef __LITTLE_ENDIAN # define aligned_byte_mask(n) ((1UL << 8*(n))-1) -- cgit v1.2.3 From a6156e70316b322b61739468e465bfb1345b7ae2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 17 Jun 2024 12:18:39 +0100 Subject: irqchip/gic-v3: Make distributor priorities variables In subsequent patches the GICv3 driver will choose the regular interrupt priority at boot time. In preparation for using dynamic priorities, place the priorities in variables and update the code to pass these as parameters. Users of GICD_INT_DEF_PRI_X4 are modified to replicate the priority byte using REPEAT_BYTE_U32(). There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Alexandru Elisei Cc: Marc Zyngier Cc: Thomas Gleixner Cc: Will Deacon Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Link: https://lore.kernel.org/r/20240617111841.2529370-4-mark.rutland@arm.com Signed-off-by: Catalin Marinas Acked-by: Thomas Gleixner --- include/linux/irqchip/arm-gic-common.h | 4 ---- include/linux/irqchip/arm-gic-v3.h | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-common.h b/include/linux/irqchip/arm-gic-common.h index 1177f3a1aed5..fc0246cc05ac 100644 --- a/include/linux/irqchip/arm-gic-common.h +++ b/include/linux/irqchip/arm-gic-common.h @@ -10,10 +10,6 @@ #include #define GICD_INT_DEF_PRI 0xa0 -#define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\ - (GICD_INT_DEF_PRI << 16) |\ - (GICD_INT_DEF_PRI << 8) |\ - GICD_INT_DEF_PRI) struct irq_domain; struct fwnode_handle; diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 728691365464..70c0948f978e 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -638,7 +638,7 @@ struct fwnode_handle; int __init its_lpi_memreserve_init(void); int its_cpu_init(void); int its_init(struct fwnode_handle *handle, struct rdists *rdists, - struct irq_domain *domain); + struct irq_domain *domain, u8 irq_prio); int mbi_init(struct fwnode_handle *fwnode, struct irq_domain *parent); static inline bool gic_enable_sre(void) -- cgit v1.2.3 From 18fdb6348c480fb646799f621204f674815f05e7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 17 Jun 2024 12:18:41 +0100 Subject: arm64: irqchip/gic-v3: Select priorities at boot time The distributor and PMR/RPR can present different views of the interrupt priority space dependent upon the values of GICD_CTLR.DS and SCR_EL3.FIQ. Currently we treat the distributor's view of the priority space as canonical, and when the two differ we change the way we handle values in the PMR/RPR, using the `gic_nonsecure_priorities` static key to decide what to do. This approach works, but it's sub-optimal. When using pseudo-NMI we manipulate the distributor rarely, and we manipulate the PMR/RPR registers very frequently in code spread out throughout the kernel (e.g. local_irq_{save,restore}()). It would be nicer if we could use fixed values for the PMR/RPR, and dynamically choose the values programmed into the distributor. This patch changes the GICv3 driver and arm64 code accordingly. PMR values are chosen at compile time, and the GICv3 driver determines the appropriate values to program into the distributor at boot time. This removes the need for the `gic_nonsecure_priorities` static key and results in smaller and better generated code for saving/restoring the irqflags. Before this patch, local_irq_disable() compiles to: | 0000000000000000 : | 0: d503201f nop | 4: d50343df msr daifset, #0x3 | 8: d65f03c0 ret | c: d503201f nop | 10: d2800c00 mov x0, #0x60 // #96 | 14: d5184600 msr icc_pmr_el1, x0 | 18: d65f03c0 ret | 1c: d2801400 mov x0, #0xa0 // #160 | 20: 17fffffd b 14 After this patch, local_irq_disable() compiles to: | 0000000000000000 : | 0: d503201f nop | 4: d50343df msr daifset, #0x3 | 8: d65f03c0 ret | c: d2801800 mov x0, #0xc0 // #192 | 10: d5184600 msr icc_pmr_el1, x0 | 14: d65f03c0 ret ... with 3 fewer instructions per call. For defconfig + CONFIG_PSEUDO_NMI=y, this results in a minor saving of ~4K of text, and will make it easier to make further improvements to the way we manipulate irqflags and DAIF bits. Signed-off-by: Mark Rutland Cc: Alexandru Elisei Cc: Marc Zyngier Cc: Thomas Gleixner Cc: Will Deacon Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Link: https://lore.kernel.org/r/20240617111841.2529370-6-mark.rutland@arm.com Signed-off-by: Catalin Marinas Acked-by: Thomas Gleixner --- include/linux/irqchip/arm-gic-v3-prio.h | 52 +++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 include/linux/irqchip/arm-gic-v3-prio.h (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v3-prio.h b/include/linux/irqchip/arm-gic-v3-prio.h new file mode 100644 index 000000000000..44157c9abb78 --- /dev/null +++ b/include/linux/irqchip/arm-gic-v3-prio.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __LINUX_IRQCHIP_ARM_GIC_V3_PRIO_H +#define __LINUX_IRQCHIP_ARM_GIC_V3_PRIO_H + +/* + * GIC priorities from the view of the PMR/RPR. + * + * These values are chosen to be valid in either the absolute priority space or + * the NS view of the priority space. The value programmed into the distributor + * and ITS will be chosen at boot time such that these values appear in the + * PMR/RPR. + * + * GICV3_PRIO_UNMASKED is the PMR view of the priority to use to permit both + * IRQs and pseudo-NMIs. + * + * GICV3_PRIO_IRQ is the PMR view of the priority of regular interrupts. This + * can be written to the PMR to mask regular IRQs. + * + * GICV3_PRIO_NMI is the PMR view of the priority of pseudo-NMIs. This can be + * written to the PMR to mask pseudo-NMIs. + * + * On arm64 some code sections either automatically switch back to PSR.I or + * explicitly require to not use priority masking. If bit GICV3_PRIO_PSR_I_SET + * is included in the priority mask, it indicates that PSR.I should be set and + * interrupt disabling temporarily does not rely on IRQ priorities. + */ +#define GICV3_PRIO_UNMASKED 0xe0 +#define GICV3_PRIO_IRQ 0xc0 +#define GICV3_PRIO_NMI 0x80 + +#define GICV3_PRIO_PSR_I_SET (1 << 4) + +#ifndef __ASSEMBLER__ + +#define __gicv3_prio_to_ns(p) (0xff & ((p) << 1)) +#define __gicv3_ns_to_prio(ns) (0x80 | ((ns) >> 1)) + +#define __gicv3_prio_valid_ns(p) \ + (__gicv3_ns_to_prio(__gicv3_prio_to_ns(p)) == (p)) + +static_assert(__gicv3_prio_valid_ns(GICV3_PRIO_NMI)); +static_assert(__gicv3_prio_valid_ns(GICV3_PRIO_IRQ)); + +static_assert(GICV3_PRIO_NMI < GICV3_PRIO_IRQ); +static_assert(GICV3_PRIO_IRQ < GICV3_PRIO_UNMASKED); + +static_assert(GICV3_PRIO_IRQ < (GICV3_PRIO_IRQ | GICV3_PRIO_PSR_I_SET)); + +#endif /* __ASSEMBLER */ + +#endif /* __LINUX_IRQCHIP_ARM_GIC_V3_PRIO_H */ -- cgit v1.2.3 From 12046f8c77e0ed6d41beabde0edbb729499c970b Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Mon, 24 Jun 2024 19:31:15 +0200 Subject: platform/x86: wmi: Add driver_override support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for forcing the WMI driver core to bind a certain WMI driver to a WMI device. This will be necessary to support generic WMI drivers without an ID table Reviewed-by: Hans de Goede Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20240624173116.31314-2-W_Armin@gmx.de Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/wmi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/wmi.h b/include/linux/wmi.h index 63cca3b58d6d..3275470b5531 100644 --- a/include/linux/wmi.h +++ b/include/linux/wmi.h @@ -16,12 +16,16 @@ * struct wmi_device - WMI device structure * @dev: Device associated with this WMI device * @setable: True for devices implementing the Set Control Method + * @driver_override: Driver name to force a match; do not set directly, + * because core frees it; use driver_set_override() to + * set or clear it. * * This represents WMI devices discovered by the WMI driver core. */ struct wmi_device { struct device dev; bool setable; + const char *driver_override; }; /** -- cgit v1.2.3 From 44348870de4b8f292f97b84583a298d66fbaf738 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 24 Jun 2024 19:38:35 +0200 Subject: block: fix the blk_queue_nonrot polarity Take care of the inverse polarity of the BLK_FEAT_ROTATIONAL flag vs the old nonrot helper. Fixes: bd4a633b6f7c ("block: move the nonrot flag to queue_limits") Reported-by: kernel test robot Reported-by: Keith Busch Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20240624173835.76753-1-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e89003360c17..b2f1362c4681 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -617,7 +617,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) -#define blk_queue_nonrot(q) ((q)->limits.features & BLK_FEAT_ROTATIONAL) +#define blk_queue_nonrot(q) (!((q)->limits.features & BLK_FEAT_ROTATIONAL)) #define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT) #define blk_queue_zone_resetall(q) \ ((q)->limits.features & BLK_FEAT_ZONE_RESETALL) -- cgit v1.2.3 From 4a24bbabc826eaba6f94a9741712117b8e2b0587 Mon Sep 17 00:00:00 2001 From: Chengming Zhou Date: Fri, 7 Jun 2024 16:40:14 +0800 Subject: slab: delete useless RED_INACTIVE and RED_ACTIVE These seem useless since we use the SLUB_RED_INACTIVE and SLUB_RED_ACTIVE, so just delete them, no functional change. Reviewed-by: Vlastimil Babka Signed-off-by: Chengming Zhou Reviewed-by: Christoph Lameter (Ampere) Signed-off-by: Vlastimil Babka --- include/linux/poison.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/poison.h b/include/linux/poison.h index 1f0ee2459f2a..9c1a035af97c 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -38,11 +38,8 @@ * Magic nums for obj red zoning. * Placed in the first word before and the first word after an obj. */ -#define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */ -#define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */ - -#define SLUB_RED_INACTIVE 0xbb -#define SLUB_RED_ACTIVE 0xcc +#define SLUB_RED_INACTIVE 0xbb /* when obj is inactive */ +#define SLUB_RED_ACTIVE 0xcc /* when obj is active */ /* ...and for poisoning */ #define POISON_INUSE 0x5a /* for use-uninitialised poisoning */ -- cgit v1.2.3 From d89a5c6705998ddc42b104f8eabd3c4b9e8fde08 Mon Sep 17 00:00:00 2001 From: Weiwen Hu Date: Mon, 3 Jun 2024 20:57:00 +0800 Subject: nvme: fix status magic numbers Replaced some magic numbers about SC and SCT with enum and macro. Signed-off-by: Weiwen Hu Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- include/linux/nvme.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 425573202295..c8d7fdd095a1 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1846,6 +1846,7 @@ enum { /* * Generic Command Status: */ + NVME_SCT_GENERIC = 0x0, NVME_SC_SUCCESS = 0x0, NVME_SC_INVALID_OPCODE = 0x1, NVME_SC_INVALID_FIELD = 0x2, @@ -1893,6 +1894,7 @@ enum { /* * Command Specific Status: */ + NVME_SCT_COMMAND_SPECIFIC = 0x100, NVME_SC_CQ_INVALID = 0x100, NVME_SC_QID_INVALID = 0x101, NVME_SC_QUEUE_SIZE = 0x102, @@ -1966,6 +1968,7 @@ enum { /* * Media and Data Integrity Errors: */ + NVME_SCT_MEDIA_ERROR = 0x200, NVME_SC_WRITE_FAULT = 0x280, NVME_SC_READ_ERROR = 0x281, NVME_SC_GUARD_CHECK = 0x282, @@ -1978,6 +1981,7 @@ enum { /* * Path-related Errors: */ + NVME_SCT_PATH = 0x300, NVME_SC_INTERNAL_PATH_ERROR = 0x300, NVME_SC_ANA_PERSISTENT_LOSS = 0x301, NVME_SC_ANA_INACCESSIBLE = 0x302, @@ -1986,11 +1990,17 @@ enum { NVME_SC_HOST_PATH_ERROR = 0x370, NVME_SC_HOST_ABORTED_CMD = 0x371, - NVME_SC_CRD = 0x1800, + NVME_SC_MASK = 0x00ff, /* Status Code */ + NVME_SCT_MASK = 0x0700, /* Status Code Type */ + NVME_SCT_SC_MASK = NVME_SCT_MASK | NVME_SC_MASK, + + NVME_SC_CRD = 0x1800, /* Command Retry Delayed */ NVME_SC_MORE = 0x2000, - NVME_SC_DNR = 0x4000, + NVME_SC_DNR = 0x4000, /* Do Not Retry */ }; +#define NVME_SCT(status) ((status) >> 8 & 7) + struct nvme_completion { /* * Used by Admin and Fabrics commands to return data: -- cgit v1.2.3 From dd0b0a4a2c5d7209457dc172997d1243ad269cfa Mon Sep 17 00:00:00 2001 From: Weiwen Hu Date: Mon, 3 Jun 2024 20:57:01 +0800 Subject: nvme: rename CDR/MORE/DNR to NVME_STATUS_* CDR/MORE/DNR fields are not belonging to SC in the NVMe spec, rename them to NVME_STATUS_* to avoid confusion. Signed-off-by: Weiwen Hu Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- include/linux/nvme.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index c8d7fdd095a1..27faae34245d 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1994,9 +1994,9 @@ enum { NVME_SCT_MASK = 0x0700, /* Status Code Type */ NVME_SCT_SC_MASK = NVME_SCT_MASK | NVME_SC_MASK, - NVME_SC_CRD = 0x1800, /* Command Retry Delayed */ - NVME_SC_MORE = 0x2000, - NVME_SC_DNR = 0x4000, /* Do Not Retry */ + NVME_STATUS_CRD = 0x1800, /* Command Retry Delayed */ + NVME_STATUS_MORE = 0x2000, + NVME_STATUS_DNR = 0x4000, /* Do Not Retry */ }; #define NVME_SCT(status) ((status) >> 8 & 7) -- cgit v1.2.3 From 99032e9dbabcfc64bf71e94a1b662c4bfe534ed1 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 27 May 2024 07:15:23 +0200 Subject: nvmet-fc: implement host_traddr() Implement callback to display the host transport address by adding a callback 'host_traddr' for nvmet_fc_target_template. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: James Smart Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- include/linux/nvme-fc-driver.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 4109f1bd6128..89ea1ebd975a 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -920,6 +920,9 @@ struct nvmet_fc_target_port { * further references to hosthandle. * Entrypoint is Mandatory if the lldd calls nvmet_fc_invalidate_host(). * + * @host_traddr: called by the transport to retrieve the node name and + * port name of the host port address. + * * @max_hw_queues: indicates the maximum number of hw queues the LLDD * supports for cpu affinitization. * Value is Mandatory. Must be at least 1. @@ -975,6 +978,7 @@ struct nvmet_fc_target_template { void (*ls_abort)(struct nvmet_fc_target_port *targetport, void *hosthandle, struct nvmefc_ls_req *lsreq); void (*host_release)(void *hosthandle); + int (*host_traddr)(void *hosthandle, u64 *wwnn, u64 *wwpn); u32 max_hw_queues; u16 max_sgl_segments; -- cgit v1.2.3 From 25e6e00d3f78d73c30ee78fee8060cf68908be50 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Mon, 24 Jun 2024 07:08:52 +0900 Subject: firewire: core: add tracepoints events for allocation/deallocation of isochronous context It is helpful to trace the allocation and dealocation of isochronous when the core function is requested them by both in-kernel unit drivers and userspace applications. This commit adds some tracepoints events for the aim. Link: https://lore.kernel.org/r/20240623220859.851685-2-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 105 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) (limited to 'include') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index 25289a063deb..0cca1ef841f3 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -436,6 +436,111 @@ TRACE_EVENT(self_id_sequence, #undef PHY_PACKET_SELF_ID_GET_POWER_CLASS #undef PHY_PACKET_SELF_ID_GET_INITIATED_RESET +TRACE_EVENT_CONDITION(isoc_outbound_allocate, + TP_PROTO(const struct fw_iso_context *ctx, unsigned int channel, unsigned int scode), + TP_ARGS(ctx, channel, scode), + TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT), + TP_STRUCT__entry( + __field(u64, context) + __field(u8, card_index) + __field(u8, channel) + __field(u8, scode) + ), + TP_fast_assign( + __entry->context = (uintptr_t)ctx; + __entry->card_index = ctx->card->index; + __entry->channel = channel; + __entry->scode = scode; + ), + TP_printk( + "context=0x%llx card_index=%u channel=%u scode=%u", + __entry->context, + __entry->card_index, + __entry->channel, + __entry->scode + ) +); + +TRACE_EVENT_CONDITION(isoc_inbound_single_allocate, + TP_PROTO(const struct fw_iso_context *ctx, unsigned int channel, unsigned int header_size), + TP_ARGS(ctx, channel, header_size), + TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE), + TP_STRUCT__entry( + __field(u64, context) + __field(u8, card_index) + __field(u8, channel) + __field(u8, header_size) + ), + TP_fast_assign( + __entry->context = (uintptr_t)ctx; + __entry->card_index = ctx->card->index; + __entry->channel = channel; + __entry->header_size = header_size; + ), + TP_printk( + "context=0x%llx card_index=%u channel=%u header_size=%u", + __entry->context, + __entry->card_index, + __entry->channel, + __entry->header_size + ) +); + +TRACE_EVENT_CONDITION(isoc_inbound_multiple_allocate, + TP_PROTO(const struct fw_iso_context *ctx), + TP_ARGS(ctx), + TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL), + TP_STRUCT__entry( + __field(u64, context) + __field(u8, card_index) + ), + TP_fast_assign( + __entry->context = (uintptr_t)ctx; + __entry->card_index = ctx->card->index; + ), + TP_printk( + "context=0x%llx card_index=%u", + __entry->context, + __entry->card_index + ) +); + +DECLARE_EVENT_CLASS(isoc_destroy_template, + TP_PROTO(const struct fw_iso_context *ctx), + TP_ARGS(ctx), + TP_STRUCT__entry( + __field(u64, context) + __field(u8, card_index) + ), + TP_fast_assign( + __entry->context = (uintptr_t)ctx; + __entry->card_index = ctx->card->index; + ), + TP_printk( + "context=0x%llx card_index=%u", + __entry->context, + __entry->card_index + ) +) + +DEFINE_EVENT_CONDITION(isoc_destroy_template, isoc_outbound_destroy, + TP_PROTO(const struct fw_iso_context *ctx), + TP_ARGS(ctx), + TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT) +); + +DEFINE_EVENT_CONDITION(isoc_destroy_template, isoc_inbound_single_destroy, + TP_PROTO(const struct fw_iso_context *ctx), + TP_ARGS(ctx), + TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE) +); + +DEFINE_EVENT_CONDITION(isoc_destroy_template, isoc_inbound_multiple_destroy, + TP_PROTO(const struct fw_iso_context *ctx), + TP_ARGS(ctx), + TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL) +); + #undef QUADLET_SIZE #endif // _FIREWIRE_TRACE_EVENT_H -- cgit v1.2.3 From 9f16ac725b233c53e2a043aa8d1b6f0453e94556 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Mon, 24 Jun 2024 07:08:53 +0900 Subject: firewire: core: add tracepoints events for setting channels of multichannel context It is helpful to trace the channel setting for the multichannel isochronous context when the core function is requested it by both in-kernel unit drivers and userspace applications. This commit adds some tracepoints events for the aim. Link: https://lore.kernel.org/r/20240623220859.851685-3-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index 0cca1ef841f3..0334e02f9d94 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -541,6 +541,27 @@ DEFINE_EVENT_CONDITION(isoc_destroy_template, isoc_inbound_multiple_destroy, TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL) ); +TRACE_EVENT(isoc_inbound_multiple_channels, + TP_PROTO(const struct fw_iso_context *ctx, u64 channels), + TP_ARGS(ctx, channels), + TP_STRUCT__entry( + __field(u64, context) + __field(u8, card_index) + __field(u64, channels) + ), + TP_fast_assign( + __entry->context = (uintptr_t)ctx; + __entry->card_index = ctx->card->index; + __entry->channels = channels; + ), + TP_printk( + "context=0x%llx card_index=%u channels=0x%016llx", + __entry->context, + __entry->card_index, + __entry->channels + ) +); + #undef QUADLET_SIZE #endif // _FIREWIRE_TRACE_EVENT_H -- cgit v1.2.3 From 4e64210f67126e77d54ffcd3297a87539b858322 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Mon, 24 Jun 2024 07:08:54 +0900 Subject: firewire: core: add tracepoints events for starting/stopping of isochronous context It is helpful to trace the starting and stopping of isochronous context when the core function is requested them by both in-kernel unit drivers and userspace applications. This commit adds some tracepoints events for the aim. Link: https://lore.kernel.org/r/20240623220859.851685-4-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 108 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) (limited to 'include') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index 0334e02f9d94..c984b7d6380f 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -562,6 +562,114 @@ TRACE_EVENT(isoc_inbound_multiple_channels, ) ); +TRACE_EVENT_CONDITION(isoc_outbound_start, + TP_PROTO(const struct fw_iso_context *ctx, int cycle_match), + TP_ARGS(ctx, cycle_match), + TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT), + TP_STRUCT__entry( + __field(u64, context) + __field(u8, card_index) + __field(bool, cycle_match) + __field(u16, cycle) + ), + TP_fast_assign( + __entry->context = (uintptr_t)ctx; + __entry->card_index = ctx->card->index; + __entry->cycle_match = cycle_match < 0 ? false : true; + __entry->cycle = __entry->cycle_match ? (u16)cycle_match : 0; + ), + TP_printk( + "context=0x%llx card_index=%u cycle_match=%s cycle=0x%04x", + __entry->context, + __entry->card_index, + __entry->cycle_match ? "true" : "false", + __entry->cycle + ) +); + +DECLARE_EVENT_CLASS(isoc_inbound_start_template, + TP_PROTO(const struct fw_iso_context *ctx, int cycle_match, unsigned int sync, unsigned int tags), + TP_ARGS(ctx, cycle_match, sync, tags), + TP_STRUCT__entry( + __field(u64, context) + __field(u8, card_index) + __field(bool, cycle_match) + __field(u16, cycle) + __field(u8, sync) + __field(u8, tags) + ), + TP_fast_assign( + __entry->context = (uintptr_t)ctx; + __entry->card_index = ctx->card->index; + __entry->cycle_match = cycle_match < 0 ? false : true; + __entry->cycle = __entry->cycle_match ? (u16)cycle_match : 0; + __entry->sync = sync; + __entry->tags = tags; + ), + TP_printk( + "context=0x%llx card_index=%u cycle_match=%s cycle=0x%04x sync=%u tags=%s", + __entry->context, + __entry->card_index, + __entry->cycle_match ? "true" : "false", + __entry->cycle, + __entry->sync, + __print_flags(__entry->tags, "|", + { FW_ISO_CONTEXT_MATCH_TAG0, "0" }, + { FW_ISO_CONTEXT_MATCH_TAG1, "1" }, + { FW_ISO_CONTEXT_MATCH_TAG2, "2" }, + { FW_ISO_CONTEXT_MATCH_TAG3, "3" } + ) + ) +); + +DEFINE_EVENT_CONDITION(isoc_inbound_start_template, isoc_inbound_single_start, + TP_PROTO(const struct fw_iso_context *ctx, int cycle_match, unsigned int sync, unsigned int tags), + TP_ARGS(ctx, cycle_match, sync, tags), + TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE) +); + +DEFINE_EVENT_CONDITION(isoc_inbound_start_template, isoc_inbound_multiple_start, + TP_PROTO(const struct fw_iso_context *ctx, int cycle_match, unsigned int sync, unsigned int tags), + TP_ARGS(ctx, cycle_match, sync, tags), + TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL) +); + +DECLARE_EVENT_CLASS(isoc_stop_template, + TP_PROTO(const struct fw_iso_context *ctx), + TP_ARGS(ctx), + TP_STRUCT__entry( + __field(u64, context) + __field(u8, card_index) + ), + TP_fast_assign( + __entry->context = (uintptr_t)ctx; + __entry->card_index = ctx->card->index; + ), + TP_printk( + "context=0x%llx card_index=%u", + __entry->context, + __entry->card_index + ) +) + +DEFINE_EVENT_CONDITION(isoc_stop_template, isoc_outbound_stop, + TP_PROTO(const struct fw_iso_context *ctx), + TP_ARGS(ctx), + TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT) +); + +DEFINE_EVENT_CONDITION(isoc_stop_template, isoc_inbound_single_stop, + TP_PROTO(const struct fw_iso_context *ctx), + TP_ARGS(ctx), + TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE) +); + +DEFINE_EVENT_CONDITION(isoc_stop_template, isoc_inbound_multiple_stop, + TP_PROTO(const struct fw_iso_context *ctx), + TP_ARGS(ctx), + TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL) +); + #undef QUADLET_SIZE #endif // _FIREWIRE_TRACE_EVENT_H -- cgit v1.2.3 From c0b0ce6c47903ed2b9028933793801b3b4e72ca1 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Mon, 24 Jun 2024 07:08:55 +0900 Subject: firewire: core: add tracepoints events for flushing of isochronous context It is helpful to trace the flushing of isochronous context when the core function is requested them by both in-kernel unit drivers and userspace applications. This commit adds some tracepoints events for the aim. Link: https://lore.kernel.org/r/20240623220859.851685-5-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index c984b7d6380f..b404e6324671 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -670,6 +670,42 @@ DEFINE_EVENT_CONDITION(isoc_stop_template, isoc_inbound_multiple_stop, TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL) ); +DECLARE_EVENT_CLASS(isoc_flush_template, + TP_PROTO(const struct fw_iso_context *ctx), + TP_ARGS(ctx), + TP_STRUCT__entry( + __field(u64, context) + __field(u8, card_index) + ), + TP_fast_assign( + __entry->context = (uintptr_t)ctx; + __entry->card_index = ctx->card->index; + ), + TP_printk( + "context=0x%llx card_index=%u", + __entry->context, + __entry->card_index + ) +); + +DEFINE_EVENT_CONDITION(isoc_flush_template, isoc_outbound_flush, + TP_PROTO(const struct fw_iso_context *ctx), + TP_ARGS(ctx), + TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT) +); + +DEFINE_EVENT_CONDITION(isoc_flush_template, isoc_inbound_single_flush, + TP_PROTO(const struct fw_iso_context *ctx), + TP_ARGS(ctx), + TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE) +); + +DEFINE_EVENT_CONDITION(isoc_flush_template, isoc_inbound_multiple_flush, + TP_PROTO(const struct fw_iso_context *ctx), + TP_ARGS(ctx), + TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL) +); + #undef QUADLET_SIZE #endif // _FIREWIRE_TRACE_EVENT_H -- cgit v1.2.3 From 8320b63e028f62542942941b51951006403d8b4f Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Mon, 24 Jun 2024 07:08:56 +0900 Subject: firewire: core: add tracepoints events for flushing completions of isochronous context It is helpful to trace the flushing completions of isochronous context when the core function is requested them by both in-kernel unit drivers and userspace applications. This commit adds some tracepoints events for the aim. Link: https://lore.kernel.org/r/20240623220859.851685-6-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index b404e6324671..ad1546120aa3 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -706,6 +706,42 @@ DEFINE_EVENT_CONDITION(isoc_flush_template, isoc_inbound_multiple_flush, TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL) ); +DECLARE_EVENT_CLASS(isoc_flush_completions_template, + TP_PROTO(const struct fw_iso_context *ctx), + TP_ARGS(ctx), + TP_STRUCT__entry( + __field(u64, context) + __field(u8, card_index) + ), + TP_fast_assign( + __entry->context = (uintptr_t)ctx; + __entry->card_index = ctx->card->index; + ), + TP_printk( + "context=0x%llx card_index=%u", + __entry->context, + __entry->card_index + ) +); + +DEFINE_EVENT_CONDITION(isoc_flush_completions_template, isoc_outbound_flush_completions, + TP_PROTO(const struct fw_iso_context *ctx), + TP_ARGS(ctx), + TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT) +); + +DEFINE_EVENT_CONDITION(isoc_flush_completions_template, isoc_inbound_single_flush_completions, + TP_PROTO(const struct fw_iso_context *ctx), + TP_ARGS(ctx), + TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE) +); + +DEFINE_EVENT_CONDITION(isoc_flush_completions_template, isoc_inbound_multiple_flush_completions, + TP_PROTO(const struct fw_iso_context *ctx), + TP_ARGS(ctx), + TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL) +); + #undef QUADLET_SIZE #endif // _FIREWIRE_TRACE_EVENT_H -- cgit v1.2.3 From 1f3c0d794df6d85834a0664db3658c422ec5c8d8 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Mon, 24 Jun 2024 07:08:57 +0900 Subject: firewire: core: add tracepoints events for queueing packets of isochronous context It is helpful to trace the queueing packets of isochronous context when the core function is requested them by both in-kernel unit drivers and userspace applications. This commit adds some tracepoints events for the aim. Link: https://lore.kernel.org/r/20240623220859.851685-7-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 79 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) (limited to 'include') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index ad1546120aa3..0381b3ca4d0e 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -742,6 +742,85 @@ DEFINE_EVENT_CONDITION(isoc_flush_completions_template, isoc_inbound_multiple_fl TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL) ); +#define TP_STRUCT__entry_iso_packet(ctx, buffer_offset, packet) \ + TP_STRUCT__entry( \ + __field(u64, context) \ + __field(u8, card_index) \ + __field(u32, buffer_offset) \ + __field(bool, interrupt) \ + __field(bool, skip) \ + __field(u8, sy) \ + __field(u8, tag) \ + __dynamic_array(u32, header, packet->header_length / QUADLET_SIZE) \ + ) + +#define TP_fast_assign_iso_packet(ctx, buffer_offset, packet) \ + TP_fast_assign( \ + __entry->context = (uintptr_t)ctx; \ + __entry->card_index = ctx->card->index; \ + __entry->buffer_offset = buffer_offset; \ + __entry->interrupt = packet->interrupt; \ + __entry->skip = packet->skip; \ + __entry->sy = packet->sy; \ + __entry->tag = packet->tag; \ + memcpy(__get_dynamic_array(header), packet->header, \ + __get_dynamic_array_len(header)); \ + ) + +TRACE_EVENT_CONDITION(isoc_outbound_queue, + TP_PROTO(const struct fw_iso_context *ctx, unsigned long buffer_offset, const struct fw_iso_packet *packet), + TP_ARGS(ctx, buffer_offset, packet), + TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT), + TP_STRUCT__entry_iso_packet(ctx, buffer_offset, packet), + TP_fast_assign_iso_packet(ctx, buffer_offset, packet), + TP_printk( + "context=0x%llx card_index=%u buffer_offset=0x%x interrupt=%s skip=%s sy=%d tag=%u header=%s", + __entry->context, + __entry->card_index, + __entry->buffer_offset, + __entry->interrupt ? "true" : "false", + __entry->skip ? "true" : "false", + __entry->sy, + __entry->tag, + __print_array(__get_dynamic_array(header), + __get_dynamic_array_len(header) / QUADLET_SIZE, QUADLET_SIZE) + ) +); + +TRACE_EVENT_CONDITION(isoc_inbound_single_queue, + TP_PROTO(const struct fw_iso_context *ctx, unsigned long buffer_offset, const struct fw_iso_packet *packet), + TP_ARGS(ctx, buffer_offset, packet), + TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE), + TP_STRUCT__entry_iso_packet(ctx, buffer_offset, packet), + TP_fast_assign_iso_packet(ctx, buffer_offset, packet), + TP_printk( + "context=0x%llx card_index=%u buffer_offset=0x%x interrupt=%s skip=%s", + __entry->context, + __entry->card_index, + __entry->buffer_offset, + __entry->interrupt ? "true" : "false", + __entry->skip ? "true" : "false" + ) +); + +TRACE_EVENT_CONDITION(isoc_inbound_multiple_queue, + TP_PROTO(const struct fw_iso_context *ctx, unsigned long buffer_offset, const struct fw_iso_packet *packet), + TP_ARGS(ctx, buffer_offset, packet), + TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL), + TP_STRUCT__entry_iso_packet(ctx, buffer_offset, packet), + TP_fast_assign_iso_packet(ctx, buffer_offset, packet), + TP_printk( + "context=0x%llx card_index=%u buffer_offset=0x%x interrupt=%s", + __entry->context, + __entry->card_index, + __entry->buffer_offset, + __entry->interrupt ? "true" : "false" + ) +); + +#undef TP_STRUCT__entry_iso_packet +#undef TP_fast_assign_iso_packet + #undef QUADLET_SIZE #endif // _FIREWIRE_TRACE_EVENT_H -- cgit v1.2.3 From daf763c2d6d1dc50586e351aca868c7786d3c84e Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Mon, 24 Jun 2024 07:08:58 +0900 Subject: firewire: core: add tracepoints events for completions of packets in isochronous context It is helpful to trace completion of packets in isochronous context when the core function is requested them by both in-kernel units driver and userspace applications. This commit adds some tracepoints events for the aim. Link: https://lore.kernel.org/r/20240623220859.851685-8-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 78 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'include') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index 0381b3ca4d0e..d9158a134beb 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -821,6 +821,84 @@ TRACE_EVENT_CONDITION(isoc_inbound_multiple_queue, #undef TP_STRUCT__entry_iso_packet #undef TP_fast_assign_iso_packet +#ifndef show_cause +enum fw_iso_context_completions_cause { + FW_ISO_CONTEXT_COMPLETIONS_CAUSE_FLUSH = 0, + FW_ISO_CONTEXT_COMPLETIONS_CAUSE_IRQ, + FW_ISO_CONTEXT_COMPLETIONS_CAUSE_HEADER_OVERFLOW, +}; +#define show_cause(cause) \ + __print_symbolic(cause, \ + { FW_ISO_CONTEXT_COMPLETIONS_CAUSE_FLUSH, "FLUSH" }, \ + { FW_ISO_CONTEXT_COMPLETIONS_CAUSE_IRQ, "IRQ" }, \ + { FW_ISO_CONTEXT_COMPLETIONS_CAUSE_HEADER_OVERFLOW, "HEADER_OVERFLOW" } \ + ) +#endif + +DECLARE_EVENT_CLASS(isoc_single_completions_template, + TP_PROTO(const struct fw_iso_context *ctx, u16 timestamp, enum fw_iso_context_completions_cause cause, const u32 *header, unsigned int header_length), + TP_ARGS(ctx, timestamp, cause, header, header_length), + TP_STRUCT__entry( + __field(u64, context) + __field(u8, card_index) + __field(u16, timestamp) + __field(u8, cause) + __dynamic_array(u32, header, header_length / QUADLET_SIZE) + ), + TP_fast_assign( + __entry->context = (uintptr_t)ctx; + __entry->card_index = ctx->card->index; + __entry->timestamp = timestamp; + __entry->cause = cause; + memcpy(__get_dynamic_array(header), header, __get_dynamic_array_len(header)); + ), + TP_printk( + "context=0x%llx card_index=%u timestap=0x%04x cause=%s header=%s", + __entry->context, + __entry->card_index, + __entry->timestamp, + show_cause(__entry->cause), + __print_array(__get_dynamic_array(header), + __get_dynamic_array_len(header) / QUADLET_SIZE, QUADLET_SIZE) + ) +) + +DEFINE_EVENT_CONDITION(isoc_single_completions_template, isoc_outbound_completions, + TP_PROTO(const struct fw_iso_context *ctx, u16 timestamp, enum fw_iso_context_completions_cause cause, const u32 *header, unsigned int header_length), + TP_ARGS(ctx, timestamp, cause, header, header_length), + TP_CONDITION(ctx->type == FW_ISO_CONTEXT_TRANSMIT) +); + +DEFINE_EVENT_CONDITION(isoc_single_completions_template, isoc_inbound_single_completions, + TP_PROTO(const struct fw_iso_context *ctx, u16 timestamp, enum fw_iso_context_completions_cause cause, const u32 *header, unsigned int header_length), + TP_ARGS(ctx, timestamp, cause, header, header_length), + TP_CONDITION(ctx->type == FW_ISO_CONTEXT_RECEIVE) +); + +TRACE_EVENT(isoc_inbound_multiple_completions, + TP_PROTO(const struct fw_iso_context *ctx, unsigned int completed, enum fw_iso_context_completions_cause cause), + TP_ARGS(ctx, completed, cause), + TP_STRUCT__entry( + __field(u64, context) + __field(u8, card_index) + __field(u16, completed) + __field(u8, cause) + ), + TP_fast_assign( + __entry->context = (uintptr_t)ctx; + __entry->card_index = ctx->card->index; + __entry->completed = completed; + __entry->cause = cause; + ), + TP_printk( + "context=0x%llx card_index=%u comleted=%u cause=%s", + __entry->context, + __entry->card_index, + __entry->completed, + show_cause(__entry->cause) + ) +); + #undef QUADLET_SIZE #endif // _FIREWIRE_TRACE_EVENT_H -- cgit v1.2.3 From 07e4fd4c0592ad57fe4c5033393d30362dbfaa5d Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:21:51 +0200 Subject: locking/local_lock: Introduce guard definition for local_lock. Introduce lock guard definition for local_lock_t. There are no users yet. Acked-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-2-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/linux/local_lock.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h index e55010fa7329..82366a37f447 100644 --- a/include/linux/local_lock.h +++ b/include/linux/local_lock.h @@ -51,4 +51,15 @@ #define local_unlock_irqrestore(lock, flags) \ __local_unlock_irqrestore(lock, flags) +DEFINE_GUARD(local_lock, local_lock_t __percpu*, + local_lock(_T), + local_unlock(_T)) +DEFINE_GUARD(local_lock_irq, local_lock_t __percpu*, + local_lock_irq(_T), + local_unlock_irq(_T)) +DEFINE_LOCK_GUARD_1(local_lock_irqsave, local_lock_t __percpu, + local_lock_irqsave(_T->lock, _T->flags), + local_unlock_irqrestore(_T->lock, _T->flags), + unsigned long flags) + #endif -- cgit v1.2.3 From c5bcab7558220bbad8bd4863576afd1b340ce29f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:21:52 +0200 Subject: locking/local_lock: Add local nested BH locking infrastructure. Add local_lock_nested_bh() locking. It is based on local_lock_t and the naming follows the preempt_disable_nested() example. For !PREEMPT_RT + !LOCKDEP it is a per-CPU annotation for locking assumptions based on local_bh_disable(). The macro is optimized away during compilation. For !PREEMPT_RT + LOCKDEP the local_lock_nested_bh() is reduced to the usual lock-acquire plus lockdep_assert_in_softirq() - ensuring that BH is disabled. For PREEMPT_RT local_lock_nested_bh() acquires the specified per-CPU lock. It does not disable CPU migration because it relies on local_bh_disable() disabling CPU migration. With LOCKDEP it performans the usual lockdep checks as with !PREEMPT_RT. Due to include hell the softirq check has been moved spinlock.c. The intention is to use this locking in places where locking of a per-CPU variable relies on BH being disabled. Instead of treating disabled bottom halves as a big per-CPU lock, PREEMPT_RT can use this to reduce the locking scope to what actually needs protecting. A side effect is that it also documents the protection scope of the per-CPU variables. Acked-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-3-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/linux/local_lock.h | 10 ++++++++++ include/linux/local_lock_internal.h | 31 +++++++++++++++++++++++++++++++ include/linux/lockdep.h | 3 +++ 3 files changed, 44 insertions(+) (limited to 'include') diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h index 82366a37f447..091dc0b6bdfb 100644 --- a/include/linux/local_lock.h +++ b/include/linux/local_lock.h @@ -62,4 +62,14 @@ DEFINE_LOCK_GUARD_1(local_lock_irqsave, local_lock_t __percpu, local_unlock_irqrestore(_T->lock, _T->flags), unsigned long flags) +#define local_lock_nested_bh(_lock) \ + __local_lock_nested_bh(_lock) + +#define local_unlock_nested_bh(_lock) \ + __local_unlock_nested_bh(_lock) + +DEFINE_GUARD(local_lock_nested_bh, local_lock_t __percpu*, + local_lock_nested_bh(_T), + local_unlock_nested_bh(_T)) + #endif diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index 975e33b793a7..8dd71fbbb6d2 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -62,6 +62,17 @@ do { \ local_lock_debug_init(lock); \ } while (0) +#define __spinlock_nested_bh_init(lock) \ +do { \ + static struct lock_class_key __key; \ + \ + debug_check_no_locks_freed((void *)lock, sizeof(*lock));\ + lockdep_init_map_type(&(lock)->dep_map, #lock, &__key, \ + 0, LD_WAIT_CONFIG, LD_WAIT_INV, \ + LD_LOCK_NORMAL); \ + local_lock_debug_init(lock); \ +} while (0) + #define __local_lock(lock) \ do { \ preempt_disable(); \ @@ -98,6 +109,15 @@ do { \ local_irq_restore(flags); \ } while (0) +#define __local_lock_nested_bh(lock) \ + do { \ + lockdep_assert_in_softirq(); \ + local_lock_acquire(this_cpu_ptr(lock)); \ + } while (0) + +#define __local_unlock_nested_bh(lock) \ + local_lock_release(this_cpu_ptr(lock)) + #else /* !CONFIG_PREEMPT_RT */ /* @@ -138,4 +158,15 @@ typedef spinlock_t local_lock_t; #define __local_unlock_irqrestore(lock, flags) __local_unlock(lock) +#define __local_lock_nested_bh(lock) \ +do { \ + lockdep_assert_in_softirq_func(); \ + spin_lock(this_cpu_ptr(lock)); \ +} while (0) + +#define __local_unlock_nested_bh(lock) \ +do { \ + spin_unlock(this_cpu_ptr((lock))); \ +} while (0) + #endif /* CONFIG_PREEMPT_RT */ diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 08b0d1d9d78b..3f5a551579cc 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -600,6 +600,8 @@ do { \ (!in_softirq() || in_irq() || in_nmi())); \ } while (0) +extern void lockdep_assert_in_softirq_func(void); + #else # define might_lock(lock) do { } while (0) # define might_lock_read(lock) do { } while (0) @@ -613,6 +615,7 @@ do { \ # define lockdep_assert_preemption_enabled() do { } while (0) # define lockdep_assert_preemption_disabled() do { } while (0) # define lockdep_assert_in_softirq() do { } while (0) +# define lockdep_assert_in_softirq_func() do { } while (0) #endif #ifdef CONFIG_PROVE_RAW_LOCK_NESTING -- cgit v1.2.3 From ebad6d0334793f16a16e5320182f665292a31e0c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:21:56 +0200 Subject: net/ipv4: Use nested-BH locking for ipv4_tcp_sk. ipv4_tcp_sk is a per-CPU variable and relies on disabled BH for its locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. Make a struct with a sock member (original ipv4_tcp_sk) and a local_lock_t and use local_lock_nested_bh() for locking. This change adds only lockdep coverage and does not alter the functional behaviour for !PREEMPT_RT. Cc: David Ahern Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-7-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/net/sock.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index b30ea0c342a6..cce23ac4d514 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -544,6 +544,11 @@ struct sock { netns_tracker ns_tracker; }; +struct sock_bh_locked { + struct sock *sock; + local_lock_t bh_lock; +}; + enum sk_pacing { SK_PACING_NONE = 0, SK_PACING_NEEDED = 1, -- cgit v1.2.3 From ecefbc09e8ee768ae85b7bb7a1de8c8287397d68 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:21:58 +0200 Subject: net: softnet_data: Make xmit per task. Softirq is preemptible on PREEMPT_RT. Without a per-CPU lock in local_bh_disable() there is no guarantee that only one device is transmitting at a time. With preemption and multiple senders it is possible that the per-CPU `recursion' counter gets incremented by different threads and exceeds XMIT_RECURSION_LIMIT leading to a false positive recursion alert. The `more' member is subject to similar problems if set by one thread for one driver and wrongly used by another driver within another thread. Instead of adding a lock to protect the per-CPU variable it is simpler to make xmit per-task. Sending and receiving skbs happens always in thread context anyway. Having a lock to protected the per-CPU counter would block/ serialize two sending threads needlessly. It would also require a recursive lock to ensure that the owner can increment the counter further. Make the softnet_data.xmit a task_struct member on PREEMPT_RT. Add needed wrapper. Cc: Ben Segall Cc: Daniel Bristot de Oliveira Cc: Dietmar Eggemann Cc: Juri Lelli Cc: Mel Gorman Cc: Steven Rostedt Cc: Valentin Schneider Cc: Vincent Guittot Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-9-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 42 +++++++++++++++++++++++++++++++----------- include/linux/netdevice_xmit.h | 13 +++++++++++++ include/linux/sched.h | 5 ++++- 3 files changed, 48 insertions(+), 12 deletions(-) create mode 100644 include/linux/netdevice_xmit.h (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c83b390191d4..f6fc9066147d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -43,6 +43,7 @@ #include #include +#include #include #include #include @@ -3223,13 +3224,7 @@ struct softnet_data { struct sk_buff_head xfrm_backlog; #endif /* written and read only by owning cpu: */ - struct { - u16 recursion; - u8 more; -#ifdef CONFIG_NET_EGRESS - u8 skip_txqueue; -#endif - } xmit; + struct netdev_xmit xmit; #ifdef CONFIG_RPS /* input_queue_head should be written by cpu owning this struct, * and only read by other cpus. Worth using a cache line. @@ -3257,10 +3252,18 @@ struct softnet_data { DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); +#ifndef CONFIG_PREEMPT_RT static inline int dev_recursion_level(void) { return this_cpu_read(softnet_data.xmit.recursion); } +#else +static inline int dev_recursion_level(void) +{ + return current->net_xmit.recursion; +} + +#endif void __netif_schedule(struct Qdisc *q); void netif_schedule_queue(struct netdev_queue *txq); @@ -4872,18 +4875,35 @@ static inline ktime_t netdev_get_tstamp(struct net_device *dev, return hwtstamps->hwtstamp; } -static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, - struct sk_buff *skb, struct net_device *dev, - bool more) +#ifndef CONFIG_PREEMPT_RT +static inline void netdev_xmit_set_more(bool more) { __this_cpu_write(softnet_data.xmit.more, more); - return ops->ndo_start_xmit(skb, dev); } static inline bool netdev_xmit_more(void) { return __this_cpu_read(softnet_data.xmit.more); } +#else +static inline void netdev_xmit_set_more(bool more) +{ + current->net_xmit.more = more; +} + +static inline bool netdev_xmit_more(void) +{ + return current->net_xmit.more; +} +#endif + +static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, + struct sk_buff *skb, struct net_device *dev, + bool more) +{ + netdev_xmit_set_more(more); + return ops->ndo_start_xmit(skb, dev); +} static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, bool more) diff --git a/include/linux/netdevice_xmit.h b/include/linux/netdevice_xmit.h new file mode 100644 index 000000000000..38325e070296 --- /dev/null +++ b/include/linux/netdevice_xmit.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _LINUX_NETDEVICE_XMIT_H +#define _LINUX_NETDEVICE_XMIT_H + +struct netdev_xmit { + u16 recursion; + u8 more; +#ifdef CONFIG_NET_EGRESS + u8 skip_txqueue; +#endif +}; + +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 61591ac6eab6..5187486c2522 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -975,7 +976,9 @@ struct task_struct { /* delay due to memory thrashing */ unsigned in_thrashing:1; #endif - +#ifdef CONFIG_PREEMPT_RT + struct netdev_xmit net_xmit; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ struct restart_block restart_block; -- cgit v1.2.3 From b22800f9d3b142bf2550dd47ff738b9feedc1093 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:22:00 +0200 Subject: dev: Use nested-BH locking for softnet_data.process_queue. softnet_data::process_queue is a per-CPU variable and relies on disabled BH for its locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. softnet_data::input_queue_head can be updated lockless. This is fine because this value is only update CPU local by the local backlog_napi thread. Add a local_lock_t to softnet_data and use local_lock_nested_bh() for locking of process_queue. This change adds only lockdep coverage and does not alter the functional behaviour for !PREEMPT_RT. Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-11-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f6fc9066147d..4e81660b4462 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3202,6 +3202,7 @@ static inline bool dev_has_header(const struct net_device *dev) struct softnet_data { struct list_head poll_list; struct sk_buff_head process_queue; + local_lock_t process_queue_bh_lock; /* stats */ unsigned int processed; -- cgit v1.2.3 From d1542d4ae4dfdc47c9b3205ebe849ed23af213dd Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:22:02 +0200 Subject: seg6: Use nested-BH locking for seg6_bpf_srh_states. The access to seg6_bpf_srh_states is protected by disabling preemption. Based on the code, the entry point is input_action_end_bpf() and every other function (the bpf helper functions bpf_lwt_seg6_*()), that is accessing seg6_bpf_srh_states, should be called from within input_action_end_bpf(). input_action_end_bpf() accesses seg6_bpf_srh_states first at the top of the function and then disables preemption. This looks wrong because if preemption needs to be disabled as part of the locking mechanism then the variable shouldn't be accessed beforehand. Looking at how it is used via test_lwt_seg6local.sh then input_action_end_bpf() is always invoked from softirq context. If this is always the case then the preempt_disable() statement is superfluous. If this is not always invoked from softirq then disabling only preemption is not sufficient. Replace the preempt_disable() statement with nested-BH locking. This is not an equivalent replacement as it assumes that the invocation of input_action_end_bpf() always occurs in softirq context and thus the preempt_disable() is superfluous. Add a local_lock_t the data structure and use local_lock_nested_bh() for locking. Add lockdep_assert_held() to ensure the lock is held while the per-CPU variable is referenced in the helper functions. Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: David Ahern Cc: Hao Luo Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Martin KaFai Lau Cc: Song Liu Cc: Stanislav Fomichev Cc: Yonghong Song Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-13-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/net/seg6_local.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/seg6_local.h b/include/net/seg6_local.h index 3fab9dec2ec4..888c1ce6f527 100644 --- a/include/net/seg6_local.h +++ b/include/net/seg6_local.h @@ -19,6 +19,7 @@ extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, extern bool seg6_bpf_has_valid_srh(struct sk_buff *skb); struct seg6_bpf_srh_state { + local_lock_t bh_lock; struct ipv6_sr_hdr *srh; u16 hdrlen; bool valid; -- cgit v1.2.3 From 401cb7dae8130fd34eb84648e02ab4c506df7d5e Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:22:04 +0200 Subject: net: Reference bpf_redirect_info via task_struct on PREEMPT_RT. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The XDP redirect process is two staged: - bpf_prog_run_xdp() is invoked to run a eBPF program which inspects the packet and makes decisions. While doing that, the per-CPU variable bpf_redirect_info is used. - Afterwards xdp_do_redirect() is invoked and accesses bpf_redirect_info and it may also access other per-CPU variables like xskmap_flush_list. At the very end of the NAPI callback, xdp_do_flush() is invoked which does not access bpf_redirect_info but will touch the individual per-CPU lists. The per-CPU variables are only used in the NAPI callback hence disabling bottom halves is the only protection mechanism. Users from preemptible context (like cpu_map_kthread_run()) explicitly disable bottom halves for protections reasons. Without locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. PREEMPT_RT has forced-threaded interrupts enabled and every NAPI-callback runs in a thread. If each thread has its own data structure then locking can be avoided. Create a struct bpf_net_context which contains struct bpf_redirect_info. Define the variable on stack, use bpf_net_ctx_set() to save a pointer to it, bpf_net_ctx_clear() removes it again. The bpf_net_ctx_set() may nest. For instance a function can be used from within NET_RX_SOFTIRQ/ net_rx_action which uses bpf_net_ctx_set() and NET_TX_SOFTIRQ which does not. Therefore only the first invocations updates the pointer. Use bpf_net_ctx_get_ri() as a wrapper to retrieve the current struct bpf_redirect_info. The returned data structure is zero initialized to ensure nothing is leaked from stack. This is done on first usage of the struct. bpf_net_ctx_set() sets bpf_redirect_info::kern_flags to 0 to note that initialisation is required. First invocation of bpf_net_ctx_get_ri() will memset() the data structure and update bpf_redirect_info::kern_flags. bpf_redirect_info::nh is excluded from memset because it is only used once BPF_F_NEIGH is set which also sets the nh member. The kern_flags is moved past nh to exclude it from memset. The pointer to bpf_net_context is saved task's task_struct. Using always the bpf_net_context approach has the advantage that there is almost zero differences between PREEMPT_RT and non-PREEMPT_RT builds. Cc: Andrii Nakryiko Cc: Eduard Zingerman Cc: Hao Luo Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Martin KaFai Lau Cc: Song Liu Cc: Stanislav Fomichev Cc: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Jesper Dangaard Brouer Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-15-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/linux/filter.h | 56 +++++++++++++++++++++++++++++++++++++++++--------- include/linux/sched.h | 3 +++ 2 files changed, 49 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index b02aea291b7e..0a7f6e4a00b6 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -733,21 +733,59 @@ struct bpf_nh_params { }; }; +/* flags for bpf_redirect_info kern_flags */ +#define BPF_RI_F_RF_NO_DIRECT BIT(0) /* no napi_direct on return_frame */ +#define BPF_RI_F_RI_INIT BIT(1) + struct bpf_redirect_info { u64 tgt_index; void *tgt_value; struct bpf_map *map; u32 flags; - u32 kern_flags; u32 map_id; enum bpf_map_type map_type; struct bpf_nh_params nh; + u32 kern_flags; }; -DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); +struct bpf_net_context { + struct bpf_redirect_info ri; +}; -/* flags for bpf_redirect_info kern_flags */ -#define BPF_RI_F_RF_NO_DIRECT BIT(0) /* no napi_direct on return_frame */ +static inline struct bpf_net_context *bpf_net_ctx_set(struct bpf_net_context *bpf_net_ctx) +{ + struct task_struct *tsk = current; + + if (tsk->bpf_net_context != NULL) + return NULL; + bpf_net_ctx->ri.kern_flags = 0; + + tsk->bpf_net_context = bpf_net_ctx; + return bpf_net_ctx; +} + +static inline void bpf_net_ctx_clear(struct bpf_net_context *bpf_net_ctx) +{ + if (bpf_net_ctx) + current->bpf_net_context = NULL; +} + +static inline struct bpf_net_context *bpf_net_ctx_get(void) +{ + return current->bpf_net_context; +} + +static inline struct bpf_redirect_info *bpf_net_ctx_get_ri(void) +{ + struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get(); + + if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_RI_INIT)) { + memset(&bpf_net_ctx->ri, 0, offsetof(struct bpf_net_context, ri.nh)); + bpf_net_ctx->ri.kern_flags |= BPF_RI_F_RI_INIT; + } + + return &bpf_net_ctx->ri; +} /* Compute the linear packet data range [data, data_end) which * will be accessed by various program types (cls_bpf, act_bpf, @@ -1018,25 +1056,23 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt); -void bpf_clear_redirect_map(struct bpf_map *map); - static inline bool xdp_return_frame_no_direct(void) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); return ri->kern_flags & BPF_RI_F_RF_NO_DIRECT; } static inline void xdp_set_return_frame_no_direct(void) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); ri->kern_flags |= BPF_RI_F_RF_NO_DIRECT; } static inline void xdp_clear_return_frame_no_direct(void) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); ri->kern_flags &= ~BPF_RI_F_RF_NO_DIRECT; } @@ -1592,7 +1628,7 @@ static __always_inline long __bpf_xdp_redirect_map(struct bpf_map *map, u64 inde u64 flags, const u64 flag_mask, void *lookup_elem(struct bpf_map *map, u32 key)) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); const u64 action_mask = XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX; /* Lower bits of the flags are used as return code on lookup failure */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 5187486c2522..5ff5e65a4627 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -54,6 +54,7 @@ struct bio_list; struct blk_plug; struct bpf_local_storage; struct bpf_run_ctx; +struct bpf_net_context; struct capture_control; struct cfs_rq; struct fs_struct; @@ -1509,6 +1510,8 @@ struct task_struct { /* Used for BPF run context */ struct bpf_run_ctx *bpf_ctx; #endif + /* Used by BPF for per-TASK xdp storage */ + struct bpf_net_context *bpf_net_context; #ifdef CONFIG_GCC_PLUGIN_STACKLEAK unsigned long lowest_stack; -- cgit v1.2.3 From 3f9fe37d9e16a6cfd5f4d1f536686ea71db3196f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Jun 2024 15:22:05 +0200 Subject: net: Move per-CPU flush-lists to bpf_net_context on PREEMPT_RT. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The per-CPU flush lists, which are accessed from within the NAPI callback (xdp_do_flush() for instance), are per-CPU. There are subject to the same problem as struct bpf_redirect_info. Add the per-CPU lists cpu_map_flush_list, dev_map_flush_list and xskmap_map_flush_list to struct bpf_net_context. Add wrappers for the access. The lists initialized on first usage (similar to bpf_net_ctx_get_ri()). Cc: "Björn Töpel" Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Eduard Zingerman Cc: Hao Luo Cc: Jiri Olsa Cc: John Fastabend Cc: Jonathan Lemon Cc: KP Singh Cc: Maciej Fijalkowski Cc: Magnus Karlsson Cc: Martin KaFai Lau Cc: Song Liu Cc: Stanislav Fomichev Cc: Yonghong Song Acked-by: Jesper Dangaard Brouer Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20240620132727.660738-16-bigeasy@linutronix.de Signed-off-by: Jakub Kicinski --- include/linux/filter.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 0a7f6e4a00b6..c0349522de8f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -736,6 +736,9 @@ struct bpf_nh_params { /* flags for bpf_redirect_info kern_flags */ #define BPF_RI_F_RF_NO_DIRECT BIT(0) /* no napi_direct on return_frame */ #define BPF_RI_F_RI_INIT BIT(1) +#define BPF_RI_F_CPU_MAP_INIT BIT(2) +#define BPF_RI_F_DEV_MAP_INIT BIT(3) +#define BPF_RI_F_XSK_MAP_INIT BIT(4) struct bpf_redirect_info { u64 tgt_index; @@ -750,6 +753,9 @@ struct bpf_redirect_info { struct bpf_net_context { struct bpf_redirect_info ri; + struct list_head cpu_map_flush_list; + struct list_head dev_map_flush_list; + struct list_head xskmap_map_flush_list; }; static inline struct bpf_net_context *bpf_net_ctx_set(struct bpf_net_context *bpf_net_ctx) @@ -787,6 +793,42 @@ static inline struct bpf_redirect_info *bpf_net_ctx_get_ri(void) return &bpf_net_ctx->ri; } +static inline struct list_head *bpf_net_ctx_get_cpu_map_flush_list(void) +{ + struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get(); + + if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_CPU_MAP_INIT)) { + INIT_LIST_HEAD(&bpf_net_ctx->cpu_map_flush_list); + bpf_net_ctx->ri.kern_flags |= BPF_RI_F_CPU_MAP_INIT; + } + + return &bpf_net_ctx->cpu_map_flush_list; +} + +static inline struct list_head *bpf_net_ctx_get_dev_flush_list(void) +{ + struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get(); + + if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_DEV_MAP_INIT)) { + INIT_LIST_HEAD(&bpf_net_ctx->dev_map_flush_list); + bpf_net_ctx->ri.kern_flags |= BPF_RI_F_DEV_MAP_INIT; + } + + return &bpf_net_ctx->dev_map_flush_list; +} + +static inline struct list_head *bpf_net_ctx_get_xskmap_flush_list(void) +{ + struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get(); + + if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_XSK_MAP_INIT)) { + INIT_LIST_HEAD(&bpf_net_ctx->xskmap_map_flush_list); + bpf_net_ctx->ri.kern_flags |= BPF_RI_F_XSK_MAP_INIT; + } + + return &bpf_net_ctx->xskmap_map_flush_list; +} + /* Compute the linear packet data range [data, data_end) which * will be accessed by various program types (cls_bpf, act_bpf, * lwt, ...). Subsystems allowing direct data access must (!) -- cgit v1.2.3 From 26b97668e5339434e5df8ddc7b1898a37a350112 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 24 Jun 2024 13:47:22 -0600 Subject: io_uring: remove dead struct io_submit_state member When the intermediate CQE aux cache got removed, any usage of the this member went away. As it isn't used anymore, kill it. Fixes: 902ce82c2aa1 ("io_uring: get rid of intermediate aux cqe caches") Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index b48570eaa449..7abdc0927124 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -207,7 +207,6 @@ struct io_submit_state { bool need_plug; bool cq_flush; unsigned short submit_nr; - unsigned int cqes_count; struct blk_plug plug; }; -- cgit v1.2.3 From 399ab86ea55039f9d0a5f621a68cb4631f796f37 Mon Sep 17 00:00:00 2001 From: Jeff Xu Date: Fri, 14 Jun 2024 23:20:14 +0000 Subject: /proc/pid/smaps: add mseal info for vma MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add sl in /proc/pid/smaps to indicate vma is sealed Link: https://lkml.kernel.org/r/20240614232014.806352-2-jeffxu@google.com Fixes: 8be7258aad44 ("mseal: add mseal syscall") Signed-off-by: Jeff Xu Acked-by: David Hildenbrand Cc: Adhemerval Zanella Cc: Jann Horn Cc: Jorge Lucangeli Obes Cc: Kees Cook Cc: Randy Dunlap Cc: Stephen Röttger Signed-off-by: Andrew Morton --- include/linux/mm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9a5652c5fadd..eb7c96d24ac0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -406,6 +406,11 @@ extern unsigned int kobjsize(const void *objp); #define VM_ALLOW_ANY_UNCACHED VM_NONE #endif +#ifdef CONFIG_64BIT +/* VM is sealed, in vm_flags */ +#define VM_SEALED _BITUL(63) +#endif + /* Bits set in the VMA until the stack is in its final location */ #define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY) -- cgit v1.2.3 From ff202303c398ed56386ca4954154de9a96eb732a Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Fri, 7 Jun 2024 13:29:53 -0700 Subject: mm: convert page type macros to enum Changing PG_slab from a page flag to a page type in commit 46df8e73a4a3 ("mm: free up PG_slab") in has the unintended consequence of removing the PG_slab constant from kernel debuginfo. The commit does add the value to the vmcoreinfo note, which allows debuggers to find the value without hardcoding it. However it's most flexible to continue representing the constant with an enum. To that end, convert the page type fields into an enum. Debuggers will now be able to detect that PG_slab's type has changed from enum pageflags to enum pagetype. Link: https://lkml.kernel.org/r/20240607202954.1198180-1-stephen.s.brennan@oracle.com Fixes: 46df8e73a4a3 ("mm: free up PG_slab") Signed-off-by: Stephen Brennan Acked-by: Vlastimil Babka Cc: David Hildenbrand Cc: Hao Ge Cc: Matthew Wilcox (Oracle) Cc: Omar Sandoval Cc: Vishal Moola (Oracle) Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 104078afe0b1..b9e914e1face 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -944,15 +944,18 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) * mistaken for a page type value. */ -#define PAGE_TYPE_BASE 0xf0000000 -/* Reserve 0x0000007f to catch underflows of _mapcount */ -#define PAGE_MAPCOUNT_RESERVE -128 -#define PG_buddy 0x00000080 -#define PG_offline 0x00000100 -#define PG_table 0x00000200 -#define PG_guard 0x00000400 -#define PG_hugetlb 0x00000800 -#define PG_slab 0x00001000 +enum pagetype { + PG_buddy = 0x00000080, + PG_offline = 0x00000100, + PG_table = 0x00000200, + PG_guard = 0x00000400, + PG_hugetlb = 0x00000800, + PG_slab = 0x00001000, + + PAGE_TYPE_BASE = 0xf0000000, + /* Reserve 0x0000007f to catch underflows of _mapcount */ + PAGE_MAPCOUNT_RESERVE = -128, +}; #define PageType(page, flag) \ ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) -- cgit v1.2.3 From bf14ed81f571f8dba31cd72ab2e50fbcc877cc31 Mon Sep 17 00:00:00 2001 From: yangge Date: Thu, 20 Jun 2024 08:59:50 +0800 Subject: mm/page_alloc: Separate THP PCP into movable and non-movable categories Since commit 5d0a661d808f ("mm/page_alloc: use only one PCP list for THP-sized allocations") no longer differentiates the migration type of pages in THP-sized PCP list, it's possible that non-movable allocation requests may get a CMA page from the list, in some cases, it's not acceptable. If a large number of CMA memory are configured in system (for example, the CMA memory accounts for 50% of the system memory), starting a virtual machine with device passthrough will get stuck. During starting the virtual machine, it will call pin_user_pages_remote(..., FOLL_LONGTERM, ...) to pin memory. Normally if a page is present and in CMA area, pin_user_pages_remote() will migrate the page from CMA area to non-CMA area because of FOLL_LONGTERM flag. But if non-movable allocation requests return CMA memory, migrate_longterm_unpinnable_pages() will migrate a CMA page to another CMA page, which will fail to pass the check in check_and_migrate_movable_pages() and cause migration endless. Call trace: pin_user_pages_remote --__gup_longterm_locked // endless loops in this function ----_get_user_pages_locked ----check_and_migrate_movable_pages ------migrate_longterm_unpinnable_pages --------alloc_migration_target This problem will also have a negative impact on CMA itself. For example, when CMA is borrowed by THP, and we need to reclaim it through cma_alloc() or dma_alloc_coherent(), we must move those pages out to ensure CMA's users can retrieve that contigous memory. Currently, CMA's memory is occupied by non-movable pages, meaning we can't relocate them. As a result, cma_alloc() is more likely to fail. To fix the problem above, we add one PCP list for THP, which will not introduce a new cacheline for struct per_cpu_pages. THP will have 2 PCP lists, one PCP list is used by MOVABLE allocation, and the other PCP list is used by UNMOVABLE allocation. MOVABLE allocation contains GPF_MOVABLE, and UNMOVABLE allocation contains GFP_UNMOVABLE and GFP_RECLAIMABLE. Link: https://lkml.kernel.org/r/1718845190-4456-1-git-send-email-yangge1116@126.com Fixes: 5d0a661d808f ("mm/page_alloc: use only one PCP list for THP-sized allocations") Signed-off-by: yangge Cc: Baolin Wang Cc: Barry Song <21cnbao@gmail.com> Cc: Mel Gorman Cc: Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 8f9c9590a42c..586a8f0104d7 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -654,13 +654,12 @@ enum zone_watermarks { }; /* - * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER. One additional list - * for THP which will usually be GFP_MOVABLE. Even if it is another type, - * it should not contribute to serious fragmentation causing THP allocation - * failures. + * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER. Two additional lists + * are added for THP. One PCP list is used by GPF_MOVABLE, and the other PCP list + * is used by GFP_UNMOVABLE and GFP_RECLAIMABLE. */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define NR_PCP_THP 1 +#define NR_PCP_THP 2 #else #define NR_PCP_THP 0 #endif -- cgit v1.2.3 From cf28d7716e0c777f593948eea109dc273047dac7 Mon Sep 17 00:00:00 2001 From: Wei-Hsin Yeh Date: Sun, 12 May 2024 13:42:11 +0800 Subject: include/linux/jhash.h: fix typos Drop one '-' to adhere to coding style. Replace 'arbitray' with 'arbitrary'. Link: https://lkml.kernel.org/r/20240512054211.24726-1-weihsinyeh168@gmail.com Signed-off-by: Wei-Hsin Yeh Signed-off-by: Andrew Morton --- include/linux/jhash.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/jhash.h b/include/linux/jhash.h index ab7f8c152b89..fa26a2dd3b52 100644 --- a/include/linux/jhash.h +++ b/include/linux/jhash.h @@ -31,7 +31,7 @@ /* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */ #define jhash_mask(n) (jhash_size(n)-1) -/* __jhash_mix -- mix 3 32-bit values reversibly. */ +/* __jhash_mix - mix 3 32-bit values reversibly. */ #define __jhash_mix(a, b, c) \ { \ a -= c; a ^= rol32(c, 4); c += b; \ @@ -60,7 +60,7 @@ /* jhash - hash an arbitrary key * @k: sequence of bytes as key * @length: the length of the key - * @initval: the previous hash, or an arbitray value + * @initval: the previous hash, or an arbitrary value * * The generic version, hashes an arbitrary sequence of bytes. * No alignment or length assumptions are made about the input key. @@ -110,7 +110,7 @@ static inline u32 jhash(const void *key, u32 length, u32 initval) /* jhash2 - hash an array of u32's * @k: the key which must be an array of u32's * @length: the number of u32's in the key - * @initval: the previous hash, or an arbitray value + * @initval: the previous hash, or an arbitrary value * * Returns the hash value of the key. */ -- cgit v1.2.3 From 873ce25766019dd017c1a3a10c19ac3fc4bf24aa Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:46 +0800 Subject: lib min_heap: add type safe interface Implement a type-safe interface for min_heap using strong type pointers instead of void * in the data field. This change includes adding small macro wrappers around functions, enabling the use of __minheap_cast and __minheap_obj_size macros for type casting and obtaining element size. This implementation removes the necessity of passing element size in min_heap_callbacks. Additionally, introduce the MIN_HEAP_PREALLOCATED macro for preallocating some elements. Link: https://lkml.kernel.org/ioyfizrzq7w7mjrqcadtzsfgpuntowtjdw5pgn4qhvsdp4mqqg@nrlek5vmisbu Link: https://lkml.kernel.org/r/20240524152958.919343-5-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton --- include/linux/min_heap.h | 79 +++++++++++++++++++++++++++++++----------------- 1 file changed, 51 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index d52daf45861b..92830f41642a 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -7,45 +7,53 @@ #include /** - * struct min_heap - Data structure to hold a min-heap. - * @data: Start of array holding the heap elements. + * Data structure to hold a min-heap. * @nr: Number of elements currently in the heap. * @size: Maximum number of elements that can be held in current storage. + * @data: Pointer to the start of array holding the heap elements. + * @preallocated: Start of the static preallocated array holding the heap elements. */ -struct min_heap { - void *data; - int nr; - int size; -}; +#define MIN_HEAP_PREALLOCATED(_type, _name, _nr) \ +struct _name { \ + int nr; \ + int size; \ + _type *data; \ + _type preallocated[_nr]; \ +} + +#define DEFINE_MIN_HEAP(_type, _name) MIN_HEAP_PREALLOCATED(_type, _name, 0) + +typedef DEFINE_MIN_HEAP(char, min_heap_char) min_heap_char; + +#define __minheap_cast(_heap) (typeof((_heap)->data[0]) *) +#define __minheap_obj_size(_heap) sizeof((_heap)->data[0]) /** * struct min_heap_callbacks - Data/functions to customise the min_heap. - * @elem_size: The nr of each element in bytes. * @less: Partial order function for this heap. * @swp: Swap elements function. */ struct min_heap_callbacks { - int elem_size; bool (*less)(const void *lhs, const void *rhs); void (*swp)(void *lhs, void *rhs); }; /* Sift the element at pos down the heap. */ static __always_inline -void min_heapify(struct min_heap *heap, int pos, +void __min_heapify(min_heap_char *heap, int pos, size_t elem_size, const struct min_heap_callbacks *func) { void *left, *right; void *data = heap->data; - void *root = data + pos * func->elem_size; + void *root = data + pos * elem_size; int i = pos, j; /* Find the sift-down path all the way to the leaves. */ for (;;) { if (i * 2 + 2 >= heap->nr) break; - left = data + (i * 2 + 1) * func->elem_size; - right = data + (i * 2 + 2) * func->elem_size; + left = data + (i * 2 + 1) * elem_size; + right = data + (i * 2 + 2) * elem_size; i = func->less(left, right) ? i * 2 + 1 : i * 2 + 2; } @@ -54,31 +62,37 @@ void min_heapify(struct min_heap *heap, int pos, i = i * 2 + 1; /* Backtrack to the correct location. */ - while (i != pos && func->less(root, data + i * func->elem_size)) + while (i != pos && func->less(root, data + i * elem_size)) i = (i - 1) / 2; /* Shift the element into its correct place. */ j = i; while (i != pos) { i = (i - 1) / 2; - func->swp(data + i * func->elem_size, data + j * func->elem_size); + func->swp(data + i * elem_size, data + j * elem_size); } } +#define min_heapify(_heap, _pos, _func) \ + __min_heapify((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func) + /* Floyd's approach to heapification that is O(nr). */ static __always_inline -void min_heapify_all(struct min_heap *heap, +void __min_heapify_all(min_heap_char *heap, size_t elem_size, const struct min_heap_callbacks *func) { int i; for (i = heap->nr / 2 - 1; i >= 0; i--) - min_heapify(heap, i, func); + __min_heapify(heap, i, elem_size, func); } +#define min_heapify_all(_heap, _func) \ + __min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func) + /* Remove minimum element from the heap, O(log2(nr)). */ static __always_inline -void min_heap_pop(struct min_heap *heap, +void __min_heap_pop(min_heap_char *heap, size_t elem_size, const struct min_heap_callbacks *func) { void *data = heap->data; @@ -88,27 +102,33 @@ void min_heap_pop(struct min_heap *heap, /* Place last element at the root (position 0) and then sift down. */ heap->nr--; - memcpy(data, data + (heap->nr * func->elem_size), func->elem_size); - min_heapify(heap, 0, func); + memcpy(data, data + (heap->nr * elem_size), elem_size); + __min_heapify(heap, 0, elem_size, func); } +#define min_heap_pop(_heap, _func) \ + __min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func) + /* * Remove the minimum element and then push the given element. The * implementation performs 1 sift (O(log2(nr))) and is therefore more * efficient than a pop followed by a push that does 2. */ static __always_inline -void min_heap_pop_push(struct min_heap *heap, - const void *element, +void __min_heap_pop_push(min_heap_char *heap, + const void *element, size_t elem_size, const struct min_heap_callbacks *func) { - memcpy(heap->data, element, func->elem_size); - min_heapify(heap, 0, func); + memcpy(heap->data, element, elem_size); + __min_heapify(heap, 0, elem_size, func); } +#define min_heap_pop_push(_heap, _element, _func) \ + __min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func) + /* Push an element on to the heap, O(log2(nr)). */ static __always_inline -void min_heap_push(struct min_heap *heap, const void *element, +void __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, const struct min_heap_callbacks *func) { void *data = heap->data; @@ -120,17 +140,20 @@ void min_heap_push(struct min_heap *heap, const void *element, /* Place at the end of data. */ pos = heap->nr; - memcpy(data + (pos * func->elem_size), element, func->elem_size); + memcpy(data + (pos * elem_size), element, elem_size); heap->nr++; /* Sift child at pos up. */ for (; pos > 0; pos = (pos - 1) / 2) { - child = data + (pos * func->elem_size); - parent = data + ((pos - 1) / 2) * func->elem_size; + child = data + (pos * elem_size); + parent = data + ((pos - 1) / 2) * elem_size; if (func->less(parent, child)) break; func->swp(parent, child); } } +#define min_heap_push(_heap, _element, _func) \ + __min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func) + #endif /* _LINUX_MIN_HEAP_H */ -- cgit v1.2.3 From e146683ccff9af2d26b70a9ea4c87f5a86eff0c4 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:47 +0800 Subject: lib min_heap: add min_heap_init() Add min_heap_init() for initializing heap with data, nr, and size. Link: https://lkml.kernel.org/r/20240524152958.919343-6-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton --- include/linux/min_heap.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 92830f41642a..b384a4ea2afa 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -38,6 +38,21 @@ struct min_heap_callbacks { void (*swp)(void *lhs, void *rhs); }; +/* Initialize a min-heap. */ +static __always_inline +void __min_heap_init(min_heap_char *heap, void *data, int size) +{ + heap->nr = 0; + heap->size = size; + if (data) + heap->data = data; + else + heap->data = heap->preallocated; +} + +#define min_heap_init(_heap, _data, _size) \ + __min_heap_init((min_heap_char *)_heap, _data, _size) + /* Sift the element at pos down the heap. */ static __always_inline void __min_heapify(min_heap_char *heap, int pos, size_t elem_size, -- cgit v1.2.3 From 0562d54ddc3dc195f338bd8e816dd8ff154f44ad Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:48 +0800 Subject: lib min_heap: add min_heap_peek() Add min_heap_peek() to retrieve a pointer to the smallest element. The pointer is cast to the appropriate type of heap elements. Link: https://lkml.kernel.org/r/20240524152958.919343-7-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton --- include/linux/min_heap.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index b384a4ea2afa..d9c4ae7ad0cc 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -53,6 +53,16 @@ void __min_heap_init(min_heap_char *heap, void *data, int size) #define min_heap_init(_heap, _data, _size) \ __min_heap_init((min_heap_char *)_heap, _data, _size) +/* Get the minimum element from the heap. */ +static __always_inline +void *__min_heap_peek(struct min_heap_char *heap) +{ + return heap->nr ? heap->data : NULL; +} + +#define min_heap_peek(_heap) \ + (__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap)) + /* Sift the element at pos down the heap. */ static __always_inline void __min_heapify(min_heap_char *heap, int pos, size_t elem_size, -- cgit v1.2.3 From b9d720e65a72c9754faf689e0859a5632853f4bc Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:49 +0800 Subject: lib min_heap: add min_heap_full() Add min_heap_full() which returns a boolean value indicating whether the heap has reached its maximum capacity. Link: https://lkml.kernel.org/r/20240524152958.919343-8-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton --- include/linux/min_heap.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index d9c4ae7ad0cc..f41898c05f5a 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -63,6 +63,16 @@ void *__min_heap_peek(struct min_heap_char *heap) #define min_heap_peek(_heap) \ (__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap)) +/* Check if the heap is full. */ +static __always_inline +bool __min_heap_full(min_heap_char *heap) +{ + return heap->nr == heap->size; +} + +#define min_heap_full(_heap) \ + __min_heap_full((min_heap_char *)_heap) + /* Sift the element at pos down the heap. */ static __always_inline void __min_heapify(min_heap_char *heap, int pos, size_t elem_size, -- cgit v1.2.3 From 267607e87599509a6a39a5f7dd3959365e58af27 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:50 +0800 Subject: lib min_heap: add args for min_heap_callbacks Add a third parameter 'args' for the 'less' and 'swp' functions in the 'struct min_heap_callbacks'. This additional parameter allows these comparison and swap functions to handle extra arguments when necessary. Link: https://lkml.kernel.org/r/20240524152958.919343-9-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton --- include/linux/min_heap.h | 51 ++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index f41898c05f5a..4acd0f4b3faf 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -34,8 +34,8 @@ typedef DEFINE_MIN_HEAP(char, min_heap_char) min_heap_char; * @swp: Swap elements function. */ struct min_heap_callbacks { - bool (*less)(const void *lhs, const void *rhs); - void (*swp)(void *lhs, void *rhs); + bool (*less)(const void *lhs, const void *rhs, void *args); + void (*swp)(void *lhs, void *rhs, void *args); }; /* Initialize a min-heap. */ @@ -76,7 +76,7 @@ bool __min_heap_full(min_heap_char *heap) /* Sift the element at pos down the heap. */ static __always_inline void __min_heapify(min_heap_char *heap, int pos, size_t elem_size, - const struct min_heap_callbacks *func) + const struct min_heap_callbacks *func, void *args) { void *left, *right; void *data = heap->data; @@ -89,7 +89,7 @@ void __min_heapify(min_heap_char *heap, int pos, size_t elem_size, break; left = data + (i * 2 + 1) * elem_size; right = data + (i * 2 + 2) * elem_size; - i = func->less(left, right) ? i * 2 + 1 : i * 2 + 2; + i = func->less(left, right, args) ? i * 2 + 1 : i * 2 + 2; } /* Special case for the last leaf with no sibling. */ @@ -97,38 +97,38 @@ void __min_heapify(min_heap_char *heap, int pos, size_t elem_size, i = i * 2 + 1; /* Backtrack to the correct location. */ - while (i != pos && func->less(root, data + i * elem_size)) + while (i != pos && func->less(root, data + i * elem_size, args)) i = (i - 1) / 2; /* Shift the element into its correct place. */ j = i; while (i != pos) { i = (i - 1) / 2; - func->swp(data + i * elem_size, data + j * elem_size); + func->swp(data + i * elem_size, data + j * elem_size, args); } } -#define min_heapify(_heap, _pos, _func) \ - __min_heapify((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func) +#define min_heapify(_heap, _pos, _func, _args) \ + __min_heapify((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args) /* Floyd's approach to heapification that is O(nr). */ static __always_inline void __min_heapify_all(min_heap_char *heap, size_t elem_size, - const struct min_heap_callbacks *func) + const struct min_heap_callbacks *func, void *args) { int i; for (i = heap->nr / 2 - 1; i >= 0; i--) - __min_heapify(heap, i, elem_size, func); + __min_heapify(heap, i, elem_size, func, args); } -#define min_heapify_all(_heap, _func) \ - __min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func) +#define min_heapify_all(_heap, _func, _args) \ + __min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) /* Remove minimum element from the heap, O(log2(nr)). */ static __always_inline void __min_heap_pop(min_heap_char *heap, size_t elem_size, - const struct min_heap_callbacks *func) + const struct min_heap_callbacks *func, void *args) { void *data = heap->data; @@ -138,11 +138,11 @@ void __min_heap_pop(min_heap_char *heap, size_t elem_size, /* Place last element at the root (position 0) and then sift down. */ heap->nr--; memcpy(data, data + (heap->nr * elem_size), elem_size); - __min_heapify(heap, 0, elem_size, func); + __min_heapify(heap, 0, elem_size, func, args); } -#define min_heap_pop(_heap, _func) \ - __min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func) +#define min_heap_pop(_heap, _func, _args) \ + __min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) /* * Remove the minimum element and then push the given element. The @@ -152,19 +152,20 @@ void __min_heap_pop(min_heap_char *heap, size_t elem_size, static __always_inline void __min_heap_pop_push(min_heap_char *heap, const void *element, size_t elem_size, - const struct min_heap_callbacks *func) + const struct min_heap_callbacks *func, + void *args) { memcpy(heap->data, element, elem_size); - __min_heapify(heap, 0, elem_size, func); + __min_heapify(heap, 0, elem_size, func, args); } -#define min_heap_pop_push(_heap, _element, _func) \ - __min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func) +#define min_heap_pop_push(_heap, _element, _func, _args) \ + __min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args) /* Push an element on to the heap, O(log2(nr)). */ static __always_inline void __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, - const struct min_heap_callbacks *func) + const struct min_heap_callbacks *func, void *args) { void *data = heap->data; void *child, *parent; @@ -182,13 +183,13 @@ void __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, for (; pos > 0; pos = (pos - 1) / 2) { child = data + (pos * elem_size); parent = data + ((pos - 1) / 2) * elem_size; - if (func->less(parent, child)) + if (func->less(parent, child, args)) break; - func->swp(parent, child); + func->swp(parent, child, args); } } -#define min_heap_push(_heap, _element, _func) \ - __min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func) +#define min_heap_push(_heap, _element, _func, _args) \ + __min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args) #endif /* _LINUX_MIN_HEAP_H */ -- cgit v1.2.3 From eaa0bc7119247942200b9209d40ffd86ace347d4 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:51 +0800 Subject: lib min_heap: add min_heap_sift_up() Add min_heap_sift_up() to sift up the element at index 'idx' in the heap. Link: https://lkml.kernel.org/r/20240524152958.919343-10-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton --- include/linux/min_heap.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 4acd0f4b3faf..ddd0186afb77 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -111,6 +111,26 @@ void __min_heapify(min_heap_char *heap, int pos, size_t elem_size, #define min_heapify(_heap, _pos, _func, _args) \ __min_heapify((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args) +/* Sift up ith element from the heap, O(log2(nr)). */ +static __always_inline +void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args) +{ + void *data = heap->data; + size_t parent; + + while (idx) { + parent = (idx - 1) / 2; + if (func->less(data + parent * elem_size, data + idx * elem_size, args)) + break; + func->swp(data + parent * elem_size, data + idx * elem_size, args); + idx = parent; + } +} + +#define min_heap_sift_up(_heap, _idx, _func, _args) \ + __min_heap_sift_up((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args) + /* Floyd's approach to heapification that is O(nr). */ static __always_inline void __min_heapify_all(min_heap_char *heap, size_t elem_size, -- cgit v1.2.3 From 420f171031207a13c83560b2ebb32ccc3c8ed6df Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:52 +0800 Subject: lib min_heap: add min_heap_del() Add min_heap_del() to delete the element at index 'idx' in the heap. Link: https://lkml.kernel.org/r/20240524152958.919343-11-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton --- include/linux/min_heap.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index ddd0186afb77..98e838195e7e 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -212,4 +212,28 @@ void __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, #define min_heap_push(_heap, _element, _func, _args) \ __min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args) +/* Remove ith element from the heap, O(log2(nr)). */ +static __always_inline +bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args) +{ + void *data = heap->data; + + if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap")) + return false; + + /* Place last element at the root (position 0) and then sift down. */ + heap->nr--; + if (idx == heap->nr) + return true; + func->swp(data + (idx * elem_size), data + (heap->nr * elem_size), args); + __min_heap_sift_up(heap, elem_size, idx, func, args); + __min_heapify(heap, idx, elem_size, func, args); + + return true; +} + +#define min_heap_del(_heap, _idx, _func, _args) \ + __min_heap_del((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args) + #endif /* _LINUX_MIN_HEAP_H */ -- cgit v1.2.3 From 2eb637c649a3d89c5483dc851ce98d56717a36d2 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:53 +0800 Subject: lib min_heap: update min_heap_push() and min_heap_pop() to return bool values Modify the min_heap_push() and min_heap_pop() to return a boolean value. They now return false when the operation fails and true when it succeeds. Link: https://lkml.kernel.org/r/20240524152958.919343-12-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton --- include/linux/min_heap.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 98e838195e7e..3410a5f907ad 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -147,18 +147,20 @@ void __min_heapify_all(min_heap_char *heap, size_t elem_size, /* Remove minimum element from the heap, O(log2(nr)). */ static __always_inline -void __min_heap_pop(min_heap_char *heap, size_t elem_size, +bool __min_heap_pop(min_heap_char *heap, size_t elem_size, const struct min_heap_callbacks *func, void *args) { void *data = heap->data; if (WARN_ONCE(heap->nr <= 0, "Popping an empty heap")) - return; + return false; /* Place last element at the root (position 0) and then sift down. */ heap->nr--; memcpy(data, data + (heap->nr * elem_size), elem_size); __min_heapify(heap, 0, elem_size, func, args); + + return true; } #define min_heap_pop(_heap, _func, _args) \ @@ -184,7 +186,7 @@ void __min_heap_pop_push(min_heap_char *heap, /* Push an element on to the heap, O(log2(nr)). */ static __always_inline -void __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, +bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, const struct min_heap_callbacks *func, void *args) { void *data = heap->data; @@ -192,7 +194,7 @@ void __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, int pos; if (WARN_ONCE(heap->nr >= heap->size, "Pushing on a full heap")) - return; + return false; /* Place at the end of data. */ pos = heap->nr; @@ -207,6 +209,8 @@ void __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, break; func->swp(parent, child, args); } + + return true; } #define min_heap_push(_heap, _element, _func, _args) \ -- cgit v1.2.3 From bfe3127180418f1b569999954cb653b9926f75ae Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:54 +0800 Subject: lib min_heap: rename min_heapify() to min_heap_sift_down() After adding min_heap_sift_up(), the naming convention has been adjusted to maintain consistency with the min_heap_sift_up(). Consequently, min_heapify() has been renamed to min_heap_sift_down(). Link: https://lkml.kernel.org/CAP-5=fVcBAxt8Mw72=NCJPRJfjDaJcqk4rjbadgouAEAHz_q1A@mail.gmail.com Link: https://lkml.kernel.org/r/20240524152958.919343-13-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton --- include/linux/min_heap.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 3410a5f907ad..0baee5787247 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -75,7 +75,7 @@ bool __min_heap_full(min_heap_char *heap) /* Sift the element at pos down the heap. */ static __always_inline -void __min_heapify(min_heap_char *heap, int pos, size_t elem_size, +void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, const struct min_heap_callbacks *func, void *args) { void *left, *right; @@ -108,8 +108,8 @@ void __min_heapify(min_heap_char *heap, int pos, size_t elem_size, } } -#define min_heapify(_heap, _pos, _func, _args) \ - __min_heapify((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args) +#define min_heap_sift_down(_heap, _pos, _func, _args) \ + __min_heap_sift_down((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args) /* Sift up ith element from the heap, O(log2(nr)). */ static __always_inline @@ -139,7 +139,7 @@ void __min_heapify_all(min_heap_char *heap, size_t elem_size, int i; for (i = heap->nr / 2 - 1; i >= 0; i--) - __min_heapify(heap, i, elem_size, func, args); + __min_heap_sift_down(heap, i, elem_size, func, args); } #define min_heapify_all(_heap, _func, _args) \ @@ -158,7 +158,7 @@ bool __min_heap_pop(min_heap_char *heap, size_t elem_size, /* Place last element at the root (position 0) and then sift down. */ heap->nr--; memcpy(data, data + (heap->nr * elem_size), elem_size); - __min_heapify(heap, 0, elem_size, func, args); + __min_heap_sift_down(heap, 0, elem_size, func, args); return true; } @@ -178,7 +178,7 @@ void __min_heap_pop_push(min_heap_char *heap, void *args) { memcpy(heap->data, element, elem_size); - __min_heapify(heap, 0, elem_size, func, args); + __min_heap_sift_down(heap, 0, elem_size, func, args); } #define min_heap_pop_push(_heap, _element, _func, _args) \ @@ -232,7 +232,7 @@ bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, return true; func->swp(data + (idx * elem_size), data + (heap->nr * elem_size), args); __min_heap_sift_up(heap, elem_size, idx, func, args); - __min_heapify(heap, idx, elem_size, func, args); + __min_heap_sift_down(heap, idx, elem_size, func, args); return true; } -- cgit v1.2.3 From e596930fc78b97b71df80df0fb1cbf2efb9fc6e4 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 24 May 2024 23:29:55 +0800 Subject: lib min_heap: update min_heap_push() to use min_heap_sift_up() Update min_heap_push() to use min_heap_sift_up() rather than its origin inline version. Link: https://lkml.kernel.org/r/20240524152958.919343-14-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Bagas Sanjaya Cc: Brian Foster Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kent Overstreet Cc: Mark Rutland Cc: Matthew Sakai Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Randy Dunlap Signed-off-by: Andrew Morton --- include/linux/min_heap.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 0baee5787247..43a7b9dcf15e 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -190,7 +190,6 @@ bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, const struct min_heap_callbacks *func, void *args) { void *data = heap->data; - void *child, *parent; int pos; if (WARN_ONCE(heap->nr >= heap->size, "Pushing on a full heap")) @@ -202,13 +201,7 @@ bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, heap->nr++; /* Sift child at pos up. */ - for (; pos > 0; pos = (pos - 1) / 2) { - child = data + (pos * elem_size); - parent = data + ((pos - 1) / 2) * elem_size; - if (func->less(parent, child, args)) - break; - func->swp(parent, child, args); - } + __min_heap_sift_up(heap, elem_size, pos, func, args); return true; } -- cgit v1.2.3 From 7c45d8282660cf4ce011d96bf918df50b7481b3c Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Mon, 27 May 2024 17:56:44 -0700 Subject: sched: avoid using ilog2() in sched.h indirectly via cpumask.h path includes the ilog2.h header to calculate ilog2(TASK_REPORT_MAX). The following patches drops sched.h dependency on cpumask.h, and to have a successful build, the header has to be included explicitly. sched.h is a frequently included header, and it's better to keep the dependency list as small as possible. So, instead of including ilog2.h for a single BUILD_BUG_ON() check, the same check may be implemented by taking exponent of the other part of equation. Link: https://lkml.kernel.org/r/20240528005648.182376-3-yury.norov@gmail.com Signed-off-by: Yury Norov Cc: Amit Daniel Kachhap Cc: Anna-Maria Behnsen Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Dennis Zhou Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Juri Lelli Cc: Kees Cook Cc: Mathieu Desnoyers Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Rasmus Villemoes Cc: Tejun Heo Cc: Thomas Gleixner Cc: Ulf Hansson Cc: Vincent Guittot Cc: Viresh Kumar Cc: Yury Norov Signed-off-by: Andrew Morton --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 61591ac6eab6..98abb07de149 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1604,7 +1604,7 @@ static inline char task_index_to_char(unsigned int state) { static const char state_char[] = "RSDTtXZPI"; - BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != sizeof(state_char) - 1); + BUILD_BUG_ON(TASK_REPORT_MAX * 2 != 1 << (sizeof(state_char) - 1)); return state_char[state]; } -- cgit v1.2.3 From eb4faa36d674eed60a522a72a3a40384f4b285d4 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Mon, 27 May 2024 17:56:45 -0700 Subject: cpumask: split out include/linux/cpumask_types.h Many core headers, like sched.h, include cpumask.h mostly for struct cpumask and cpumask_var_t. Those are frequently used headers and shouldn't pull more than the bare minimum. Link: https://lkml.kernel.org/r/20240528005648.182376-4-yury.norov@gmail.com Signed-off-by: Yury Norov Cc: Amit Daniel Kachhap Cc: Anna-Maria Behnsen Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Dennis Zhou Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Juri Lelli Cc: Kees Cook Cc: Mathieu Desnoyers Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Rasmus Villemoes Cc: Tejun Heo Cc: Thomas Gleixner Cc: Ulf Hansson Cc: Vincent Guittot Cc: Viresh Kumar Signed-off-by: Andrew Morton --- include/linux/cpumask.h | 56 +----------------------------------- include/linux/cpumask_types.h | 66 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 55 deletions(-) create mode 100644 include/linux/cpumask_types.h (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 23686bed441d..76dca7b86189 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -9,25 +9,13 @@ */ #include #include -#include #include +#include #include #include #include #include -/* Don't assign or return these: may not be this big! */ -typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; - -/** - * cpumask_bits - get the bits in a cpumask - * @maskp: the struct cpumask * - * - * You should only assume nr_cpu_ids bits of this mask are valid. This is - * a macro so it's const-correct. - */ -#define cpumask_bits(maskp) ((maskp)->bits) - /** * cpumask_pr_args - printf args to output a cpumask * @maskp: cpumask to be printed @@ -922,48 +910,7 @@ static inline unsigned int cpumask_size(void) return bitmap_size(large_cpumask_bits); } -/* - * cpumask_var_t: struct cpumask for stack usage. - * - * Oh, the wicked games we play! In order to make kernel coding a - * little more difficult, we typedef cpumask_var_t to an array or a - * pointer: doing &mask on an array is a noop, so it still works. - * - * i.e. - * cpumask_var_t tmpmask; - * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) - * return -ENOMEM; - * - * ... use 'tmpmask' like a normal struct cpumask * ... - * - * free_cpumask_var(tmpmask); - * - * - * However, one notable exception is there. alloc_cpumask_var() allocates - * only nr_cpumask_bits bits (in the other hand, real cpumask_t always has - * NR_CPUS bits). Therefore you don't have to dereference cpumask_var_t. - * - * cpumask_var_t tmpmask; - * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) - * return -ENOMEM; - * - * var = *tmpmask; - * - * This code makes NR_CPUS length memcopy and brings to a memory corruption. - * cpumask_copy() provide safe copy functionality. - * - * Note that there is another evil here: If you define a cpumask_var_t - * as a percpu variable then the way to obtain the address of the cpumask - * structure differently influences what this_cpu_* operation needs to be - * used. Please use this_cpu_cpumask_var_t in those cases. The direct use - * of this_cpu_ptr() or this_cpu_read() will lead to failures when the - * other type of cpumask_var_t implementation is configured. - * - * Please also note that __cpumask_var_read_mostly can be used to declare - * a cpumask_var_t variable itself (not its content) as read mostly. - */ #ifdef CONFIG_CPUMASK_OFFSTACK -typedef struct cpumask *cpumask_var_t; #define this_cpu_cpumask_var_ptr(x) this_cpu_read(x) #define __cpumask_var_read_mostly __read_mostly @@ -1010,7 +957,6 @@ static inline bool cpumask_available(cpumask_var_t mask) } #else -typedef struct cpumask cpumask_var_t[1]; #define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x) #define __cpumask_var_read_mostly diff --git a/include/linux/cpumask_types.h b/include/linux/cpumask_types.h new file mode 100644 index 000000000000..461ed1b6bcdb --- /dev/null +++ b/include/linux/cpumask_types.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_CPUMASK_TYPES_H +#define __LINUX_CPUMASK_TYPES_H + +#include +#include + +/* Don't assign or return these: may not be this big! */ +typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; + +/** + * cpumask_bits - get the bits in a cpumask + * @maskp: the struct cpumask * + * + * You should only assume nr_cpu_ids bits of this mask are valid. This is + * a macro so it's const-correct. + */ +#define cpumask_bits(maskp) ((maskp)->bits) + +/* + * cpumask_var_t: struct cpumask for stack usage. + * + * Oh, the wicked games we play! In order to make kernel coding a + * little more difficult, we typedef cpumask_var_t to an array or a + * pointer: doing &mask on an array is a noop, so it still works. + * + * i.e. + * cpumask_var_t tmpmask; + * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) + * return -ENOMEM; + * + * ... use 'tmpmask' like a normal struct cpumask * ... + * + * free_cpumask_var(tmpmask); + * + * + * However, one notable exception is there. alloc_cpumask_var() allocates + * only nr_cpumask_bits bits (in the other hand, real cpumask_t always has + * NR_CPUS bits). Therefore you don't have to dereference cpumask_var_t. + * + * cpumask_var_t tmpmask; + * if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL)) + * return -ENOMEM; + * + * var = *tmpmask; + * + * This code makes NR_CPUS length memcopy and brings to a memory corruption. + * cpumask_copy() provide safe copy functionality. + * + * Note that there is another evil here: If you define a cpumask_var_t + * as a percpu variable then the way to obtain the address of the cpumask + * structure differently influences what this_cpu_* operation needs to be + * used. Please use this_cpu_cpumask_var_t in those cases. The direct use + * of this_cpu_ptr() or this_cpu_read() will lead to failures when the + * other type of cpumask_var_t implementation is configured. + * + * Please also note that __cpumask_var_read_mostly can be used to declare + * a cpumask_var_t variable itself (not its content) as read mostly. + */ +#ifdef CONFIG_CPUMASK_OFFSTACK +typedef struct cpumask *cpumask_var_t; +#else +typedef struct cpumask cpumask_var_t[1]; +#endif /* CONFIG_CPUMASK_OFFSTACK */ + +#endif /* __LINUX_CPUMASK_TYPES_H */ -- cgit v1.2.3 From 361c1f04f3b43b00997b2845c7a9e1d0a9b4c38a Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Mon, 27 May 2024 17:56:46 -0700 Subject: sched: drop sched.h dependency on cpumask sched.h needs cpumask.h mostly for types declaration. Now that we have cpumask_types.h, which is a significantly smaller header, we can rely on it. The only exception is UP stub for set_cpus_allowed_ptr(). The function needs to test bit #0 in a @new_mask, which can be trivially opencoded. Link: https://lkml.kernel.org/r/20240528005648.182376-5-yury.norov@gmail.com Signed-off-by: Yury Norov Cc: Amit Daniel Kachhap Cc: Anna-Maria Behnsen Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Dennis Zhou Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Juri Lelli Cc: Kees Cook Cc: Mathieu Desnoyers Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Rasmus Villemoes Cc: Tejun Heo Cc: Thomas Gleixner Cc: Ulf Hansson Cc: Vincent Guittot Cc: Viresh Kumar Cc: Yury Norov Signed-off-by: Andrew Morton --- include/linux/sched.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 98abb07de149..f2f907ef1389 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include @@ -1778,7 +1778,8 @@ static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpuma } static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) { - if (!cpumask_test_cpu(0, new_mask)) + /* Opencoded cpumask_test_cpu(0, new_mask) to avoid dependency on cpumask.h */ + if ((*cpumask_bits(new_mask) & 1) == 0) return -EINVAL; return 0; } -- cgit v1.2.3 From 7f36688f126ba4a4ec510fa81466b1dacdec97ee Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Mon, 27 May 2024 17:56:47 -0700 Subject: cpumask: cleanup core headers inclusion Many core headers include cpumask.h for nothing. Drop it. Link: https://lkml.kernel.org/r/20240528005648.182376-6-yury.norov@gmail.com Signed-off-by: Yury Norov Cc: Amit Daniel Kachhap Cc: Anna-Maria Behnsen Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Dennis Zhou Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Juri Lelli Cc: Kees Cook Cc: Mathieu Desnoyers Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Rasmus Villemoes Cc: Tejun Heo Cc: Thomas Gleixner Cc: Ulf Hansson Cc: Vincent Guittot Cc: Viresh Kumar Cc: Yury Norov Signed-off-by: Andrew Morton --- include/linux/cgroup.h | 1 - include/linux/cpu.h | 1 - include/linux/cpu_cooling.h | 1 - include/linux/kernel_stat.h | 1 - include/linux/node.h | 1 - include/linux/percpu.h | 1 - include/linux/profile.h | 1 - include/linux/rcupdate.h | 1 - include/linux/seq_file.h | 1 - include/linux/tracepoint.h | 1 - 10 files changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 2150ca60394b..c60ba0ab1462 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -10,7 +10,6 @@ */ #include -#include #include #include #include diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 861c3bfc5f17..ea6ac8f98e4a 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -16,7 +16,6 @@ #include #include -#include #include #include diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index a3bdc8a98f2c..2c774fb3c091 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -15,7 +15,6 @@ #include #include -#include struct cpufreq_policy; diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 9c042c6384bb..b97ce2df376f 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/node.h b/include/linux/node.h index dfc004e4bee7..9a881c2208b3 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -16,7 +16,6 @@ #define _LINUX_NODE_H_ #include -#include #include /** diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 03053de557cf..4b2047b78b67 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/profile.h b/include/linux/profile.h index 04ae5ebcb637..2fb487f61d12 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -4,7 +4,6 @@ #include #include -#include #include #include diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index dfd2399f2cde..fb8ab4618d63 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -29,7 +29,6 @@ #include #include #include -#include #include #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 8bd4fda6e027..2fb266ea69fa 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 689b6d71590e..6be396bb4297 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From e1b6705bcfb2797ea182e313d5ec4f57fa8571f2 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Mon, 27 May 2024 17:56:48 -0700 Subject: cpumask: make core headers including cpumask_types.h where possible Now that cpumask types are split out to a separate smaller header, many frequently included core headers may switch to using it. Link: https://lkml.kernel.org/r/20240528005648.182376-7-yury.norov@gmail.com Signed-off-by: Yury Norov Cc: Amit Daniel Kachhap Cc: Anna-Maria Behnsen Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Dennis Zhou Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Juri Lelli Cc: Kees Cook Cc: Mathieu Desnoyers Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Rasmus Villemoes Cc: Tejun Heo Cc: Thomas Gleixner Cc: Ulf Hansson Cc: Vincent Guittot Cc: Viresh Kumar Cc: Yury Norov Signed-off-by: Andrew Morton --- include/linux/cacheinfo.h | 2 +- include/linux/clockchips.h | 2 +- include/linux/cpu_rmap.h | 2 +- include/linux/interrupt.h | 2 +- include/linux/irqchip/irq-partition-percpu.h | 2 +- include/linux/msi.h | 2 +- include/linux/pm_domain.h | 2 +- include/linux/stop_machine.h | 2 +- include/linux/torture.h | 2 +- include/linux/workqueue.h | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 2cb15fe4fe12..286db104e054 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -3,7 +3,7 @@ #define _LINUX_CACHEINFO_H #include -#include +#include #include struct device_node; diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 9aac31d856f3..b0df28ddd394 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -12,7 +12,7 @@ #ifdef CONFIG_GENERIC_CLOCKEVENTS # include -# include +# include # include # include diff --git a/include/linux/cpu_rmap.h b/include/linux/cpu_rmap.h index cae324d10965..20b5729903d7 100644 --- a/include/linux/cpu_rmap.h +++ b/include/linux/cpu_rmap.h @@ -7,7 +7,7 @@ * Copyright 2011 Solarflare Communications Inc. */ -#include +#include #include #include #include diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 5c9bdd3ffccc..136a55455529 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -5,13 +5,13 @@ #include #include -#include #include #include #include #include #include #include +#include #include #include diff --git a/include/linux/irqchip/irq-partition-percpu.h b/include/linux/irqchip/irq-partition-percpu.h index 2f6ae7551748..b35ee22c278f 100644 --- a/include/linux/irqchip/irq-partition-percpu.h +++ b/include/linux/irqchip/irq-partition-percpu.h @@ -8,7 +8,7 @@ #define __LINUX_IRQCHIP_IRQ_PARTITION_PERCPU_H #include -#include +#include #include struct partition_affinity { diff --git a/include/linux/msi.h b/include/linux/msi.h index dc27cf3903d5..26588da88bdd 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -19,7 +19,7 @@ */ #include -#include +#include #include #include #include diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index f24546a3d3db..71e4f0fb8867 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include /* diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index ea7a74ea7389..3132262a404d 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -3,7 +3,7 @@ #define _LINUX_STOP_MACHINE #include -#include +#include #include #include diff --git a/include/linux/torture.h b/include/linux/torture.h index 1541454da03e..c2e979f82f8d 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index fb3993894536..52496f07fba5 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include -- cgit v1.2.3 From 2f183c68345a26213e5e7f798399bee68d1c4a97 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 31 May 2024 12:04:57 +0300 Subject: kernel/panic: add verbose logging of kernel taints in backtraces With nearly 20 taint flags and respective characters, it's getting a bit difficult to remember what each taint flag character means. Add verbose logging of the set taints in the format: Tainted: [P]=PROPRIETARY_MODULE, [W]=WARN in dump_stack_print_info() when there are taints. Note that the "negative flag" G is not included. Link: https://lkml.kernel.org/r/7321e306166cb2ca2807ab8639e665baa2462e9c.1717146197.git.jani.nikula@intel.com Signed-off-by: Jani Nikula Reviewed-by: Greg Kroah-Hartman Signed-off-by: Andrew Morton --- include/linux/panic.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/panic.h b/include/linux/panic.h index 6717b15e798c..3130e0b5116b 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -77,9 +77,10 @@ static inline void set_arch_panic_timeout(int timeout, int arch_default_timeout) #define TAINT_FLAGS_MAX ((1UL << TAINT_FLAGS_COUNT) - 1) struct taint_flag { - char c_true; /* character printed when tainted */ - char c_false; /* character printed when not tainted */ - bool module; /* also show as a per-module taint flag */ + char c_true; /* character printed when tainted */ + char c_false; /* character printed when not tainted */ + bool module; /* also show as a per-module taint flag */ + const char *desc; /* verbose description of the set taint flag */ }; extern const struct taint_flag taint_flags[TAINT_FLAGS_COUNT]; @@ -90,6 +91,7 @@ enum lockdep_ok { }; extern const char *print_tainted(void); +extern const char *print_tainted_verbose(void); extern void add_taint(unsigned flag, enum lockdep_ok); extern int test_taint(unsigned flag); extern unsigned long get_taint(void); -- cgit v1.2.3 From cc8d5a2f09a54405321769abfd6ec3395482336a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 25 Jun 2024 07:58:10 +0200 Subject: Revert "printk: Save console options for add_preferred_console_match()" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit f03e8c1060f86c23eb49bafee99d9fcbd1c1bd77. Let's roll back all of the serial core and printk console changes that went into 6.10-rc1 as there still are problems with them that need to be sorted out. Link: https://lore.kernel.org/r/ZnpRozsdw6zbjqze@tlindgre-MOBL1 Reported-by: Petr Mladek Reported-by: Tony Lindgren Cc: Jiri Slaby Cc: John Ogness Cc: Sergey Senozhatsky Cc: Andy Shevchenko Cc: Ilpo Järvinen Signed-off-by: Greg Kroah-Hartman --- include/linux/printk.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/printk.h b/include/linux/printk.h index 40afab23881a..65c5184470f1 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -60,9 +60,6 @@ static inline const char *printk_skip_headers(const char *buffer) #define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT #define CONSOLE_LOGLEVEL_QUIET CONFIG_CONSOLE_LOGLEVEL_QUIET -int add_preferred_console_match(const char *match, const char *name, - const short idx); - extern int console_printk[]; #define console_loglevel (console_printk[0]) -- cgit v1.2.3 From 07b87f9eea0c30675084d50c82532d20168da009 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Thu, 20 Jun 2024 08:47:24 +0200 Subject: xfrm: Fix unregister netdevice hang on hardware offload. When offloading xfrm states to hardware, the offloading device is attached to the skbs secpath. If a skb is free is deferred, an unregister netdevice hangs because the netdevice is still refcounted. Fix this by removing the netdevice from the xfrm states when the netdevice is unregistered. To find all xfrm states that need to be cleared we add another list where skbs linked to that are unlinked from the lists (deleted) but not yet freed. Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API") Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 36 ++++++++++-------------------------- 1 file changed, 10 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 77ebf5bcf0b9..7d4c2235252c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -178,7 +178,10 @@ struct xfrm_state { struct hlist_node gclist; struct hlist_node bydst; }; - struct hlist_node bysrc; + union { + struct hlist_node dev_gclist; + struct hlist_node bysrc; + }; struct hlist_node byspi; struct hlist_node byseq; @@ -1588,7 +1591,7 @@ void xfrm_state_update_stats(struct net *net); static inline void xfrm_dev_state_update_stats(struct xfrm_state *x) { struct xfrm_dev_offload *xdo = &x->xso; - struct net_device *dev = xdo->dev; + struct net_device *dev = READ_ONCE(xdo->dev); if (dev && dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_state_update_stats) @@ -1946,13 +1949,16 @@ int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp, struct xfrm_user_offload *xuo, u8 dir, struct netlink_ext_ack *extack); bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x); +void xfrm_dev_state_delete(struct xfrm_state *x); +void xfrm_dev_state_free(struct xfrm_state *x); static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x) { struct xfrm_dev_offload *xso = &x->xso; + struct net_device *dev = READ_ONCE(xso->dev); - if (xso->dev && xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn) - xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn(x); + if (dev && dev->xfrmdev_ops->xdo_dev_state_advance_esn) + dev->xfrmdev_ops->xdo_dev_state_advance_esn(x); } static inline bool xfrm_dst_offload_ok(struct dst_entry *dst) @@ -1973,28 +1979,6 @@ static inline bool xfrm_dst_offload_ok(struct dst_entry *dst) return false; } -static inline void xfrm_dev_state_delete(struct xfrm_state *x) -{ - struct xfrm_dev_offload *xso = &x->xso; - - if (xso->dev) - xso->dev->xfrmdev_ops->xdo_dev_state_delete(x); -} - -static inline void xfrm_dev_state_free(struct xfrm_state *x) -{ - struct xfrm_dev_offload *xso = &x->xso; - struct net_device *dev = xso->dev; - - if (dev && dev->xfrmdev_ops) { - if (dev->xfrmdev_ops->xdo_dev_state_free) - dev->xfrmdev_ops->xdo_dev_state_free(x); - xso->dev = NULL; - xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED; - netdev_put(dev, &xso->dev_tracker); - } -} - static inline void xfrm_dev_policy_delete(struct xfrm_policy *x) { struct xfrm_dev_offload *xdo = &x->xdo; -- cgit v1.2.3 From 98f706de445b464f25220360210a4bcb9cc6c41a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:16 -0700 Subject: af_unix: Define locking order for U_LOCK_SECOND in unix_stream_connect(). While a SOCK_(STREAM|SEQPACKET) socket connect()s to another, we hold two locks of them by unix_state_lock() and unix_state_lock_nested() in unix_stream_connect(). Before unix_state_lock_nested(), the following is guaranteed by checking sk->sk_state: 1. The first socket is TCP_LISTEN 2. The second socket is not the first one 3. Simultaneous connect() must fail So, the client state can be TCP_CLOSE or TCP_LISTEN or TCP_ESTABLISHED. Let's define the expected states as unix_state_lock_cmp_fn() instead of using unix_state_lock_nested(). Note that 2. is detected by debug_spin_lock_before() and 3. cannot be expressed as lock_cmp_fn. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/af_unix.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index b6eedf7650da..fd813ad73ab8 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -98,7 +98,6 @@ struct unix_sock { #define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) enum unix_socket_lock_class { U_LOCK_NORMAL, - U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */ U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */ U_LOCK_GC_LISTENER, /* used for listening socket while determining gc * candidates to close a small race window. -- cgit v1.2.3 From c4da4661d985fd3cbaea3ea6101e2dd0d2ad4b74 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:18 -0700 Subject: af_unix: Remove U_LOCK_DIAG. sk_diag_dump_icons() acquires embryo's lock by unix_state_lock_nested() to fetch its peer. The embryo's ->peer is set to NULL only when its parent listener is close()d. Then, unix_release_sock() is called for each embryo after unlinking skb by skb_dequeue(). In sk_diag_dump_icons(), we hold the parent's recvq lock, so we need not acquire unix_state_lock_nested(), and peer is always non-NULL. Let's remove unnecessary unix_state_lock_nested() and non-NULL test for peer. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/af_unix.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index fd813ad73ab8..c42645199cee 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -98,7 +98,6 @@ struct unix_sock { #define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) enum unix_socket_lock_class { U_LOCK_NORMAL, - U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */ U_LOCK_GC_LISTENER, /* used for listening socket while determining gc * candidates to close a small race window. */ -- cgit v1.2.3 From 7202cb591624b860bd7b688d4bd0f5bee79c7f44 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:19 -0700 Subject: af_unix: Remove U_LOCK_GC_LISTENER. Commit 1971d13ffa84 ("af_unix: Suppress false-positive lockdep splat for spin_lock() in __unix_gc().") added U_LOCK_GC_LISTENER for the old GC, but it's no longer needed for the new GC. Let's remove U_LOCK_GC_LISTENER and unix_state_lock_nested() as there's no user. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/af_unix.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index c42645199cee..63129c79b8cb 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -96,18 +96,6 @@ struct unix_sock { #define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) #define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) -enum unix_socket_lock_class { - U_LOCK_NORMAL, - U_LOCK_GC_LISTENER, /* used for listening socket while determining gc - * candidates to close a small race window. - */ -}; - -static inline void unix_state_lock_nested(struct sock *sk, - enum unix_socket_lock_class subclass) -{ - spin_lock_nested(&unix_sk(sk)->lock, subclass); -} #define peer_wait peer_wq.wait -- cgit v1.2.3 From 9b6a14f08b4875aa22ea0b5bc35042e2580b311b Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Thu, 20 Jun 2024 11:23:33 +0800 Subject: fs: Export in_group_or_capable() Export in_group_or_capable() as a VFS helper function. Signed-off-by: Youling Tang Link: https://lore.kernel.org/r/20240620032335.147136-1-youling.tang@linux.dev Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index bfc1e6407bf6..942cb11dba96 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1942,6 +1942,8 @@ void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode, extern bool may_open_dev(const struct path *path); umode_t mode_strip_sgid(struct mnt_idmap *idmap, const struct inode *dir, umode_t mode); +bool in_group_or_capable(struct mnt_idmap *idmap, + const struct inode *inode, vfsgid_t vfsgid); /* * This is the "filldir" function type, used by readdir() to let -- cgit v1.2.3 From ff46e3b4421923937b7f6e44ffcd3549a074f321 Mon Sep 17 00:00:00 2001 From: luoxuanqiang Date: Fri, 21 Jun 2024 09:39:29 +0800 Subject: Fix race for duplicate reqsk on identical SYN When bonding is configured in BOND_MODE_BROADCAST mode, if two identical SYN packets are received at the same time and processed on different CPUs, it can potentially create the same sk (sock) but two different reqsk (request_sock) in tcp_conn_request(). These two different reqsk will respond with two SYNACK packets, and since the generation of the seq (ISN) incorporates a timestamp, the final two SYNACK packets will have different seq values. The consequence is that when the Client receives and replies with an ACK to the earlier SYNACK packet, we will reset(RST) it. ======================================================================== This behavior is consistently reproducible in my local setup, which comprises: | NETA1 ------ NETB1 | PC_A --- bond --- | | --- bond --- PC_B | NETA2 ------ NETB2 | - PC_A is the Server and has two network cards, NETA1 and NETA2. I have bonded these two cards using BOND_MODE_BROADCAST mode and configured them to be handled by different CPU. - PC_B is the Client, also equipped with two network cards, NETB1 and NETB2, which are also bonded and configured in BOND_MODE_BROADCAST mode. If the client attempts a TCP connection to the server, it might encounter a failure. Capturing packets from the server side reveals: 10.10.10.10.45182 > localhost: Flags [S], seq 320236027, 10.10.10.10.45182 > localhost: Flags [S], seq 320236027, localhost > 10.10.10.10.45182: Flags [S.], seq 2967855116, localhost > 10.10.10.10.45182: Flags [S.], seq 2967855123, <== 10.10.10.10.45182 > localhost: Flags [.], ack 4294967290, 10.10.10.10.45182 > localhost: Flags [.], ack 4294967290, localhost > 10.10.10.10.45182: Flags [R], seq 2967855117, <== localhost > 10.10.10.10.45182: Flags [R], seq 2967855117, Two SYNACKs with different seq numbers are sent by localhost, resulting in an anomaly. ======================================================================== The attempted solution is as follows: Add a return value to inet_csk_reqsk_queue_hash_add() to confirm if the ehash insertion is successful (Up to now, the reason for unsuccessful insertion is that a reqsk for the same connection has already been inserted). If the insertion fails, release the reqsk. Due to the refcnt, Kuniyuki suggests also adding a return value check for the DCCP module; if ehash insertion fails, indicating a successful insertion of the same connection, simply release the reqsk as well. Simultaneously, In the reqsk_queue_hash_req(), the start of the req->rsk_timer is adjusted to be after successful insertion. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: luoxuanqiang Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240621013929.1386815-1-luoxuanqiang@kylinos.cn Signed-off-by: Paolo Abeni --- include/net/inet_connection_sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 7d6b1254c92d..c0deaafebfdc 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -263,7 +263,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, struct sock *inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, struct sock *child); -void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, +bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, unsigned long timeout); struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, struct request_sock *req, -- cgit v1.2.3 From 06ec7893a4b48a1fad9e94cb670862ddd65b6eab Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 24 Jun 2024 20:39:58 +0300 Subject: drm/connector: hdmi: shorten too long function name If CONFIG_MODVERSIONS is enabled, then using the HDMI Connector framework can result in build failures. Rename the function to make it fit into the name requirements. ERROR: modpost: too long symbol "drm_atomic_helper_connector_hdmi_disable_audio_infoframe" [drivers/gpu/drm/msm/msm.ko] Reported-by: Mark Brown Reviewed-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240624-hdmi-connector-shorten-name-v1-1-5bd3410138db@linaro.org Signed-off-by: Dmitry Baryshkov --- include/drm/display/drm_hdmi_state_helper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/display/drm_hdmi_state_helper.h b/include/drm/display/drm_hdmi_state_helper.h index 285f366cf716..2d45fcfa4619 100644 --- a/include/drm/display/drm_hdmi_state_helper.h +++ b/include/drm/display/drm_hdmi_state_helper.h @@ -16,7 +16,7 @@ int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector, int drm_atomic_helper_connector_hdmi_update_audio_infoframe(struct drm_connector *connector, struct hdmi_audio_infoframe *frame); -int drm_atomic_helper_connector_hdmi_disable_audio_infoframe(struct drm_connector *connector); +int drm_atomic_helper_connector_hdmi_clear_audio_infoframe(struct drm_connector *connector); int drm_atomic_helper_connector_hdmi_update_infoframes(struct drm_connector *connector, struct drm_atomic_state *state); -- cgit v1.2.3 From b65bd7874bc7dc709645fd8acb53603b73ae950a Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 24 Jun 2024 21:59:42 +0200 Subject: fbdev: mmp: Constify struct mmp_overlay_ops 'struct mmp_overlay_ops' is not modified in this driver. Constifying this structure moves some data to a read-only section, so increase overall security. On a x86_64, with allmodconfig, as an example: Before: ====== text data bss dec hex filename 11798 555 16 12369 3051 drivers/video/fbdev/mmp/hw/mmp_ctrl.o After: ===== text data bss dec hex filename 11834 507 16 12357 3045 drivers/video/fbdev/mmp/hw/mmp_ctrl.o Signed-off-by: Christophe JAILLET Signed-off-by: Helge Deller --- include/video/mmp_disp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/video/mmp_disp.h b/include/video/mmp_disp.h index a722dcbf5073..41354bd49895 100644 --- a/include/video/mmp_disp.h +++ b/include/video/mmp_disp.h @@ -156,7 +156,7 @@ struct mmp_overlay { int status; struct mutex access_ok; - struct mmp_overlay_ops *ops; + const struct mmp_overlay_ops *ops; }; /* panel type */ @@ -299,7 +299,7 @@ struct mmp_path_info { int overlay_num; void (*set_mode)(struct mmp_path *path, struct mmp_mode *mode); void (*set_onoff)(struct mmp_path *path, int status); - struct mmp_overlay_ops *overlay_ops; + const struct mmp_overlay_ops *overlay_ops; void *plat_data; }; -- cgit v1.2.3 From 001c1ff5dc397bc26a3d1a0da8733c179f39c946 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Tue, 25 Jun 2024 12:18:05 +0900 Subject: firewire: ohci: add support for Linux kernel tracepoints The Linux Kernel Tracepoints framework is enough useful to trace the interaction between 1394 OHCI hardware and its driver. This commit adds firewire_ohci subsystem to use the framework. It is defined as the different subsystem from the existing firewire subsystem. The definition file for the existing subsystem is slightly changed so that both subsystems are available in 1394 OHCI driver. Link: https://lore.kernel.org/r/20240625031806.956650-2-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 1 + include/trace/events/firewire_ohci.h | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 include/trace/events/firewire_ohci.h (limited to 'include') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index d9158a134beb..aa00c9f33551 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later // Copyright (c) 2024 Takashi Sakamoto +#undef TRACE_SYSTEM #define TRACE_SYSTEM firewire #if !defined(_FIREWIRE_TRACE_EVENT_H) || defined(TRACE_HEADER_MULTI_READ) diff --git a/include/trace/events/firewire_ohci.h b/include/trace/events/firewire_ohci.h new file mode 100644 index 000000000000..67fa3c1c8f6d --- /dev/null +++ b/include/trace/events/firewire_ohci.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2024 Takashi Sakamoto + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM firewire_ohci + +#if !defined(_FIREWIRE_OHCI_TRACE_EVENT_H) || defined(TRACE_HEADER_MULTI_READ) +#define _FIREWIRE_OHCI_TRACE_EVENT_H + +#include + +// Placeholder for future use. + +#endif // _FIREWIRE_OHCI_TRACE_EVENT_H + +#include -- cgit v1.2.3 From 0d8914165dd17b0fd330538871968efabb3227c0 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Tue, 25 Jun 2024 12:18:06 +0900 Subject: firewire: ohci: add tracepoints event for hardIRQ event 1394 OHCI hardware triggers PCI interrupts to notify any events to software. Current driver for the hardware is programmed by the typical way to utilize top- and bottom- halves, thus it has a timing gap to handle the notification in softIRQ (tasklet). This commit adds a tracepoint event for the hardIRQ event. The comparison of the tracepoint event to tracepoints events in firewire subsystem is helpful to diagnose the timing gap. Link: https://lore.kernel.org/r/20240625031806.956650-3-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire_ohci.h | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/firewire_ohci.h b/include/trace/events/firewire_ohci.h index 67fa3c1c8f6d..483aeeb033af 100644 --- a/include/trace/events/firewire_ohci.h +++ b/include/trace/events/firewire_ohci.h @@ -9,7 +9,38 @@ #include -// Placeholder for future use. +TRACE_EVENT(irqs, + TP_PROTO(unsigned int card_index, u32 events), + TP_ARGS(card_index, events), + TP_STRUCT__entry( + __field(u8, card_index) + __field(u32, events) + ), + TP_fast_assign( + __entry->card_index = card_index; + __entry->events = events; + ), + TP_printk( + "card_index=%u events=%s", + __entry->card_index, + __print_flags(__entry->events, "|", + { OHCI1394_selfIDComplete, "selfIDComplete" }, + { OHCI1394_RQPkt, "RQPkt" }, + { OHCI1394_RSPkt, "RSPkt" }, + { OHCI1394_reqTxComplete, "reqTxComplete" }, + { OHCI1394_respTxComplete, "respTxComplete" }, + { OHCI1394_isochRx, "isochRx" }, + { OHCI1394_isochTx, "isochTx" }, + { OHCI1394_postedWriteErr, "postedWriteErr" }, + { OHCI1394_cycleTooLong, "cycleTooLong" }, + { OHCI1394_cycle64Seconds, "cycle64Seconds" }, + { OHCI1394_cycleInconsistent, "cycleInconsistent" }, + { OHCI1394_regAccessFail, "regAccessFail" }, + { OHCI1394_unrecoverableError, "unrecoverableError" }, + { OHCI1394_busReset, "busReset" } + ) + ) +); #endif // _FIREWIRE_OHCI_TRACE_EVENT_H -- cgit v1.2.3 From d3882564a77c21eb746ba5364f3fa89b88de3d61 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 20 Jun 2024 14:16:37 +0200 Subject: syscalls: fix compat_sys_io_pgetevents_time64 usage Using sys_io_pgetevents() as the entry point for compat mode tasks works almost correctly, but misses the sign extension for the min_nr and nr arguments. This was addressed on parisc by switching to compat_sys_io_pgetevents_time64() in commit 6431e92fc827 ("parisc: io_pgetevents_time64() needs compat syscall in 32-bit compat mode"), as well as by using more sophisticated system call wrappers on x86 and s390. However, arm64, mips, powerpc, sparc and riscv still have the same bug. Change all of them over to use compat_sys_io_pgetevents_time64() like parisc already does. This was clearly the intention when the function was originally added, but it got hooked up incorrectly in the tables. Cc: stable@vger.kernel.org Fixes: 48166e6ea47d ("y2038: add 64-bit time_t syscalls to all 32-bit architectures") Acked-by: Heiko Carstens # s390 Signed-off-by: Arnd Bergmann --- include/uapi/asm-generic/unistd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index d983c48a3b6a..d4cc26932ff4 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -737,7 +737,7 @@ __SC_COMP(__NR_pselect6_time64, sys_pselect6, compat_sys_pselect6_time64) #define __NR_ppoll_time64 414 __SC_COMP(__NR_ppoll_time64, sys_ppoll, compat_sys_ppoll_time64) #define __NR_io_pgetevents_time64 416 -__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents) +__SC_COMP(__NR_io_pgetevents_time64, sys_io_pgetevents, compat_sys_io_pgetevents_time64) #define __NR_recvmmsg_time64 417 __SC_COMP(__NR_recvmmsg_time64, sys_recvmmsg, compat_sys_recvmmsg_time64) #define __NR_mq_timedsend_time64 418 -- cgit v1.2.3 From 295f10061af024099440b46602bcc47364551db7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 4 Jun 2024 14:20:26 +0200 Subject: syscalls: mmap(): use unsigned offset type consistently Most architectures that implement the old-style mmap() with byte offset use 'unsigned long' as the type for that offset, but microblaze and riscv have the off_t type that is shared with userspace, matching the prototype in include/asm-generic/syscalls.h. Make this consistent by using an unsigned argument everywhere. This changes the behavior slightly, as the argument is shifted to a page number, and an user input with the top bit set would result in a negative page offset rather than a large one as we use elsewhere. For riscv, the 32-bit sys_mmap2() definition actually used a custom type that is different from the global declaration, but this was missed due to an incorrect type check. Signed-off-by: Arnd Bergmann --- include/asm-generic/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h index 933ca6581aba..fabcefe8a80a 100644 --- a/include/asm-generic/syscalls.h +++ b/include/asm-generic/syscalls.h @@ -19,7 +19,7 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, #ifndef sys_mmap asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, - unsigned long fd, off_t pgoff); + unsigned long fd, unsigned long off); #endif #ifndef sys_rt_sigreturn -- cgit v1.2.3 From 0fa8ab5f3533b307a7d0e438ab08ecd92725dad7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 20 Jun 2024 14:47:27 +0200 Subject: linux/syscalls.h: add missing __user annotations A couple of declarations in linux/syscalls.h are missing __user annotations on their pointers, which can lead to warnings from sparse because these don't match the implementation that have the correct address space annotations. Signed-off-by: Arnd Bergmann --- include/linux/syscalls.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index ba9337709878..63424af87bba 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -322,13 +322,13 @@ asmlinkage long sys_io_pgetevents(aio_context_t ctx_id, long nr, struct io_event __user *events, struct __kernel_timespec __user *timeout, - const struct __aio_sigset *sig); + const struct __aio_sigset __user *sig); asmlinkage long sys_io_pgetevents_time32(aio_context_t ctx_id, long min_nr, long nr, struct io_event __user *events, struct old_timespec32 __user *timeout, - const struct __aio_sigset *sig); + const struct __aio_sigset __user *sig); asmlinkage long sys_io_uring_setup(u32 entries, struct io_uring_params __user *p); asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit, @@ -441,7 +441,7 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group); asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, umode_t mode); asmlinkage long sys_openat2(int dfd, const char __user *filename, - struct open_how *how, size_t size); + struct open_how __user *how, size_t size); asmlinkage long sys_close(unsigned int fd); asmlinkage long sys_close_range(unsigned int fd, unsigned int max_fd, unsigned int flags); @@ -555,7 +555,7 @@ asmlinkage long sys_get_robust_list(int pid, asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, size_t len); -asmlinkage long sys_futex_waitv(struct futex_waitv *waiters, +asmlinkage long sys_futex_waitv(struct futex_waitv __user *waiters, unsigned int nr_futexes, unsigned int flags, struct __kernel_timespec __user *timeout, clockid_t clockid); @@ -907,7 +907,7 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, asmlinkage long sys_getrandom(char __user *buf, size_t count, unsigned int flags); asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags); -asmlinkage long sys_bpf(int cmd, union bpf_attr *attr, unsigned int size); +asmlinkage long sys_bpf(int cmd, union bpf_attr __user *attr, unsigned int size); asmlinkage long sys_execveat(int dfd, const char __user *filename, const char __user *const __user *argv, const char __user *const __user *envp, int flags); @@ -960,11 +960,11 @@ asmlinkage long sys_cachestat(unsigned int fd, struct cachestat_range __user *cstat_range, struct cachestat __user *cstat, unsigned int flags); asmlinkage long sys_map_shadow_stack(unsigned long addr, unsigned long size, unsigned int flags); -asmlinkage long sys_lsm_get_self_attr(unsigned int attr, struct lsm_ctx *ctx, - u32 *size, u32 flags); -asmlinkage long sys_lsm_set_self_attr(unsigned int attr, struct lsm_ctx *ctx, +asmlinkage long sys_lsm_get_self_attr(unsigned int attr, struct lsm_ctx __user *ctx, + u32 __user *size, u32 flags); +asmlinkage long sys_lsm_set_self_attr(unsigned int attr, struct lsm_ctx __user *ctx, u32 size, u32 flags); -asmlinkage long sys_lsm_list_modules(u64 *ids, u32 *size, u32 flags); +asmlinkage long sys_lsm_list_modules(u64 __user *ids, u32 __user *size, u32 flags); /* * Architecture-specific system calls -- cgit v1.2.3 From 605efd54b50437ed9f3915690539d0afddca9d95 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 13 May 2024 15:00:41 +0200 Subject: netfilter: nf_tables: make struct nft_trans first member of derived subtypes There is 'struct nft_trans', the basic structure for all transactional objects, and the the various different transactional objects, such as nft_trans_table, chain, set, set_elem and so on. Right now 'struct nft_trans' uses a flexible member at the tail (data[]), and casting is needed to access the actual type-specific members. Change this to make the hierarchy visible in source code, i.e. make struct nft_trans the first member of all derived subtypes. This has several advantages: 1. pahole output reflects the real size needed by the particular subtype 2. allows to use container_of() to convert the base type to the actual object type instead of casting ->data to the overlay structure. 3. It makes it easy to add intermediate types. 'struct nft_trans' contains a 'binding_list' that is only needed by two subtypes, so it should be part of the two subtypes, not in the base structure. But that makes it hard to interate over the binding_list, because there is no common base structure. A follow patch moves the bind list to a new struct: struct nft_trans_binding { struct nft_trans nft_trans; struct list_head binding_list; }; ... and makes that structure the new 'first member' for both nft_trans_chain and nft_trans_set. No functional change intended in this patch. Some numbers: struct nft_trans { /* size: 88, cachelines: 2, members: 5 */ struct nft_trans_chain { /* size: 152, cachelines: 3, members: 10 */ struct nft_trans_elem { /* size: 112, cachelines: 2, members: 4 */ struct nft_trans_flowtable { /* size: 128, cachelines: 2, members: 5 */ struct nft_trans_obj { /* size: 112, cachelines: 2, members: 4 */ struct nft_trans_rule { /* size: 112, cachelines: 2, members: 5 */ struct nft_trans_set { /* size: 120, cachelines: 2, members: 8 */ struct nft_trans_table { /* size: 96, cachelines: 2, members: 2 */ Of particular interest is nft_trans_elem, which needs to be allocated once for each pending (to be added or removed) set element. Add BUILD_BUG_ON to check struct nft_trans is placed at the top of the container structure. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 162 ++++++++++++++++++++++---------------- 1 file changed, 92 insertions(+), 70 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 2796153b03da..b25df037fceb 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1608,14 +1608,16 @@ static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext) } /** - * struct nft_trans - nf_tables object update in transaction + * struct nft_trans - nf_tables object update in transaction * - * @list: used internally - * @binding_list: list of objects with possible bindings - * @msg_type: message type - * @put_net: ctx->net needs to be put - * @ctx: transaction context - * @data: internal information related to the transaction + * @list: used internally + * @binding_list: list of objects with possible bindings + * @msg_type: message type + * @put_net: ctx->net needs to be put + * @ctx: transaction context + * + * This is the information common to all objects in the transaction, + * this must always be the first member of derived sub-types. */ struct nft_trans { struct list_head list; @@ -1623,26 +1625,29 @@ struct nft_trans { int msg_type; bool put_net; struct nft_ctx ctx; - char data[]; }; struct nft_trans_rule { + struct nft_trans nft_trans; struct nft_rule *rule; struct nft_flow_rule *flow; u32 rule_id; bool bound; }; -#define nft_trans_rule(trans) \ - (((struct nft_trans_rule *)trans->data)->rule) -#define nft_trans_flow_rule(trans) \ - (((struct nft_trans_rule *)trans->data)->flow) -#define nft_trans_rule_id(trans) \ - (((struct nft_trans_rule *)trans->data)->rule_id) -#define nft_trans_rule_bound(trans) \ - (((struct nft_trans_rule *)trans->data)->bound) +#define nft_trans_container_rule(trans) \ + container_of(trans, struct nft_trans_rule, nft_trans) +#define nft_trans_rule(trans) \ + nft_trans_container_rule(trans)->rule +#define nft_trans_flow_rule(trans) \ + nft_trans_container_rule(trans)->flow +#define nft_trans_rule_id(trans) \ + nft_trans_container_rule(trans)->rule_id +#define nft_trans_rule_bound(trans) \ + nft_trans_container_rule(trans)->bound struct nft_trans_set { + struct nft_trans nft_trans; struct nft_set *set; u32 set_id; u32 gc_int; @@ -1652,22 +1657,25 @@ struct nft_trans_set { u32 size; }; -#define nft_trans_set(trans) \ - (((struct nft_trans_set *)trans->data)->set) -#define nft_trans_set_id(trans) \ - (((struct nft_trans_set *)trans->data)->set_id) -#define nft_trans_set_bound(trans) \ - (((struct nft_trans_set *)trans->data)->bound) -#define nft_trans_set_update(trans) \ - (((struct nft_trans_set *)trans->data)->update) -#define nft_trans_set_timeout(trans) \ - (((struct nft_trans_set *)trans->data)->timeout) -#define nft_trans_set_gc_int(trans) \ - (((struct nft_trans_set *)trans->data)->gc_int) -#define nft_trans_set_size(trans) \ - (((struct nft_trans_set *)trans->data)->size) +#define nft_trans_container_set(trans) \ + container_of(trans, struct nft_trans_set, nft_trans) +#define nft_trans_set(trans) \ + nft_trans_container_set(trans)->set +#define nft_trans_set_id(trans) \ + nft_trans_container_set(trans)->set_id +#define nft_trans_set_bound(trans) \ + nft_trans_container_set(trans)->bound +#define nft_trans_set_update(trans) \ + nft_trans_container_set(trans)->update +#define nft_trans_set_timeout(trans) \ + nft_trans_container_set(trans)->timeout +#define nft_trans_set_gc_int(trans) \ + nft_trans_container_set(trans)->gc_int +#define nft_trans_set_size(trans) \ + nft_trans_container_set(trans)->size struct nft_trans_chain { + struct nft_trans nft_trans; struct nft_chain *chain; bool update; char *name; @@ -1679,73 +1687,87 @@ struct nft_trans_chain { struct list_head hook_list; }; -#define nft_trans_chain(trans) \ - (((struct nft_trans_chain *)trans->data)->chain) -#define nft_trans_chain_update(trans) \ - (((struct nft_trans_chain *)trans->data)->update) -#define nft_trans_chain_name(trans) \ - (((struct nft_trans_chain *)trans->data)->name) -#define nft_trans_chain_stats(trans) \ - (((struct nft_trans_chain *)trans->data)->stats) -#define nft_trans_chain_policy(trans) \ - (((struct nft_trans_chain *)trans->data)->policy) -#define nft_trans_chain_bound(trans) \ - (((struct nft_trans_chain *)trans->data)->bound) -#define nft_trans_chain_id(trans) \ - (((struct nft_trans_chain *)trans->data)->chain_id) -#define nft_trans_basechain(trans) \ - (((struct nft_trans_chain *)trans->data)->basechain) -#define nft_trans_chain_hooks(trans) \ - (((struct nft_trans_chain *)trans->data)->hook_list) +#define nft_trans_container_chain(trans) \ + container_of(trans, struct nft_trans_chain, nft_trans) +#define nft_trans_chain(trans) \ + nft_trans_container_chain(trans)->chain +#define nft_trans_chain_update(trans) \ + nft_trans_container_chain(trans)->update +#define nft_trans_chain_name(trans) \ + nft_trans_container_chain(trans)->name +#define nft_trans_chain_stats(trans) \ + nft_trans_container_chain(trans)->stats +#define nft_trans_chain_policy(trans) \ + nft_trans_container_chain(trans)->policy +#define nft_trans_chain_bound(trans) \ + nft_trans_container_chain(trans)->bound +#define nft_trans_chain_id(trans) \ + nft_trans_container_chain(trans)->chain_id +#define nft_trans_basechain(trans) \ + nft_trans_container_chain(trans)->basechain +#define nft_trans_chain_hooks(trans) \ + nft_trans_container_chain(trans)->hook_list struct nft_trans_table { + struct nft_trans nft_trans; bool update; }; -#define nft_trans_table_update(trans) \ - (((struct nft_trans_table *)trans->data)->update) +#define nft_trans_container_table(trans) \ + container_of(trans, struct nft_trans_table, nft_trans) +#define nft_trans_table_update(trans) \ + nft_trans_container_table(trans)->update struct nft_trans_elem { + struct nft_trans nft_trans; struct nft_set *set; struct nft_elem_priv *elem_priv; bool bound; }; -#define nft_trans_elem_set(trans) \ - (((struct nft_trans_elem *)trans->data)->set) -#define nft_trans_elem_priv(trans) \ - (((struct nft_trans_elem *)trans->data)->elem_priv) -#define nft_trans_elem_set_bound(trans) \ - (((struct nft_trans_elem *)trans->data)->bound) +#define nft_trans_container_elem(t) \ + container_of(t, struct nft_trans_elem, nft_trans) +#define nft_trans_elem_set(trans) \ + nft_trans_container_elem(trans)->set +#define nft_trans_elem_priv(trans) \ + nft_trans_container_elem(trans)->elem_priv +#define nft_trans_elem_set_bound(trans) \ + nft_trans_container_elem(trans)->bound struct nft_trans_obj { + struct nft_trans nft_trans; struct nft_object *obj; struct nft_object *newobj; bool update; }; -#define nft_trans_obj(trans) \ - (((struct nft_trans_obj *)trans->data)->obj) -#define nft_trans_obj_newobj(trans) \ - (((struct nft_trans_obj *)trans->data)->newobj) -#define nft_trans_obj_update(trans) \ - (((struct nft_trans_obj *)trans->data)->update) +#define nft_trans_container_obj(t) \ + container_of(t, struct nft_trans_obj, nft_trans) +#define nft_trans_obj(trans) \ + nft_trans_container_obj(trans)->obj +#define nft_trans_obj_newobj(trans) \ + nft_trans_container_obj(trans)->newobj +#define nft_trans_obj_update(trans) \ + nft_trans_container_obj(trans)->update struct nft_trans_flowtable { + struct nft_trans nft_trans; struct nft_flowtable *flowtable; bool update; struct list_head hook_list; u32 flags; }; -#define nft_trans_flowtable(trans) \ - (((struct nft_trans_flowtable *)trans->data)->flowtable) -#define nft_trans_flowtable_update(trans) \ - (((struct nft_trans_flowtable *)trans->data)->update) -#define nft_trans_flowtable_hooks(trans) \ - (((struct nft_trans_flowtable *)trans->data)->hook_list) -#define nft_trans_flowtable_flags(trans) \ - (((struct nft_trans_flowtable *)trans->data)->flags) +#define nft_trans_container_flowtable(t) \ + container_of(t, struct nft_trans_flowtable, nft_trans) +#define nft_trans_flowtable(trans) \ + nft_trans_container_flowtable(trans)->flowtable +#define nft_trans_flowtable_update(trans) \ + nft_trans_container_flowtable(trans)->update +#define nft_trans_flowtable_hooks(trans) \ + nft_trans_container_flowtable(trans)->hook_list +#define nft_trans_flowtable_flags(trans) \ + nft_trans_container_flowtable(trans)->flags #define NFT_TRANS_GC_BATCHCOUNT 256 -- cgit v1.2.3 From 17d8f3ad36a5fa5c93afab90ed03ba7ec748dd03 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 24 Jun 2024 20:53:16 +0200 Subject: netfilter: nf_tables: move bind list_head into relevant subtypes Only nft_trans_chain and nft_trans_set subtypes use the trans->binding_list member. Add a new common binding subtype and move the member there. This reduces size of all other subtypes by 16 bytes on 64bit platforms. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index b25df037fceb..f72448095833 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1611,7 +1611,6 @@ static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext) * struct nft_trans - nf_tables object update in transaction * * @list: used internally - * @binding_list: list of objects with possible bindings * @msg_type: message type * @put_net: ctx->net needs to be put * @ctx: transaction context @@ -1621,12 +1620,23 @@ static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext) */ struct nft_trans { struct list_head list; - struct list_head binding_list; int msg_type; bool put_net; struct nft_ctx ctx; }; +/** + * struct nft_trans_binding - nf_tables object with binding support in transaction + * @nft_trans: base structure, MUST be first member + * @binding_list: list of objects with possible bindings + * + * This is the base type used by objects that can be bound to a chain. + */ +struct nft_trans_binding { + struct nft_trans nft_trans; + struct list_head binding_list; +}; + struct nft_trans_rule { struct nft_trans nft_trans; struct nft_rule *rule; @@ -1647,7 +1657,7 @@ struct nft_trans_rule { nft_trans_container_rule(trans)->bound struct nft_trans_set { - struct nft_trans nft_trans; + struct nft_trans_binding nft_trans_binding; struct nft_set *set; u32 set_id; u32 gc_int; @@ -1657,8 +1667,8 @@ struct nft_trans_set { u32 size; }; -#define nft_trans_container_set(trans) \ - container_of(trans, struct nft_trans_set, nft_trans) +#define nft_trans_container_set(t) \ + container_of(t, struct nft_trans_set, nft_trans_binding.nft_trans) #define nft_trans_set(trans) \ nft_trans_container_set(trans)->set #define nft_trans_set_id(trans) \ @@ -1675,7 +1685,7 @@ struct nft_trans_set { nft_trans_container_set(trans)->size struct nft_trans_chain { - struct nft_trans nft_trans; + struct nft_trans_binding nft_trans_binding; struct nft_chain *chain; bool update; char *name; @@ -1687,8 +1697,8 @@ struct nft_trans_chain { struct list_head hook_list; }; -#define nft_trans_container_chain(trans) \ - container_of(trans, struct nft_trans_chain, nft_trans) +#define nft_trans_container_chain(t) \ + container_of(t, struct nft_trans_chain, nft_trans_binding.nft_trans) #define nft_trans_chain(trans) \ nft_trans_container_chain(trans)->chain #define nft_trans_chain_update(trans) \ -- cgit v1.2.3 From b3f4c216f7af37fa60e50d2ebb3ec9dd0f93886c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 13 May 2024 15:00:43 +0200 Subject: netfilter: nf_tables: compact chain+ft transaction objects Cover holes to reduce both structures by 8 byte. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f72448095833..1f0607b671ac 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1687,10 +1687,10 @@ struct nft_trans_set { struct nft_trans_chain { struct nft_trans_binding nft_trans_binding; struct nft_chain *chain; - bool update; char *name; struct nft_stats __percpu *stats; u8 policy; + bool update; bool bound; u32 chain_id; struct nft_base_chain *basechain; @@ -1763,9 +1763,9 @@ struct nft_trans_obj { struct nft_trans_flowtable { struct nft_trans nft_trans; struct nft_flowtable *flowtable; - bool update; struct list_head hook_list; u32 flags; + bool update; }; #define nft_trans_container_flowtable(t) \ -- cgit v1.2.3 From 8965d42bcf54d42cbc72fe34a9d0ec3f8527debd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 13 May 2024 15:00:45 +0200 Subject: netfilter: nf_tables: pass nft_chain to destroy function, not nft_ctx It would be better to not store nft_ctx inside nft_trans object, the netlink ctx strucutre is huge and most of its information is never needed in places that use trans->ctx. Avoid/reduce its usage if possible, no runtime behaviour change intended. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 1f0607b671ac..328fdc140551 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1171,7 +1171,7 @@ static inline bool nft_chain_is_bound(struct nft_chain *chain) int nft_chain_add(struct nft_table *table, struct nft_chain *chain); void nft_chain_del(struct nft_chain *chain); -void nf_tables_chain_destroy(struct nft_ctx *ctx); +void nf_tables_chain_destroy(struct nft_chain *chain); struct nft_stats { u64 bytes; -- cgit v1.2.3 From 13f20bc9ec4f9f25935bf52337d3d1708787bd55 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 24 Jun 2024 20:57:03 +0200 Subject: netfilter: nf_tables: store chain pointer in rule transaction Currently the chain can be derived from trans->ctx.chain, but the ctx will go away soon. Thus add the chain pointer to nft_trans_rule structure itself. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 328fdc140551..86e6bd63a205 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1640,6 +1640,7 @@ struct nft_trans_binding { struct nft_trans_rule { struct nft_trans nft_trans; struct nft_rule *rule; + struct nft_chain *chain; struct nft_flow_rule *flow; u32 rule_id; bool bound; @@ -1655,6 +1656,8 @@ struct nft_trans_rule { nft_trans_container_rule(trans)->rule_id #define nft_trans_rule_bound(trans) \ nft_trans_container_rule(trans)->bound +#define nft_trans_rule_chain(trans) \ + nft_trans_container_rule(trans)->chain struct nft_trans_set { struct nft_trans_binding nft_trans_binding; -- cgit v1.2.3 From e169285f8c56b8d5702475de0582dc83650c6cee Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 13 May 2024 15:00:51 +0200 Subject: netfilter: nf_tables: do not store nft_ctx in transaction objects nft_ctx is huge and most of the information stored within isn't used at all. Remove nft_ctx member from the base transaction structure and store only what is needed. After this change, relevant struct sizes are: struct nft_trans_chain { /* size: 120 (-32), cachelines: 2, members: 10 */ struct nft_trans_elem { /* size: 72 (-40), cachelines: 2, members: 4 */ struct nft_trans_flowtable { /* size: 80 (-48), cachelines: 2, members: 5 */ struct nft_trans_obj { /* size: 72 (-40), cachelines: 2, members: 4 */ struct nft_trans_rule { /* size: 80 (-32), cachelines: 2, members: 6 */ struct nft_trans_set { /* size: 96 (-24), cachelines: 2, members: 8 */ struct nft_trans_table { /* size: 56 (-40), cachelines: 1, members: 2 */ struct nft_trans_elem can now be allocated from kmalloc-96 instead of kmalloc-128 slab. A further reduction by 8 bytes would even allow for kmalloc-64. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 43 +++++++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 86e6bd63a205..1e8da1b882ac 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1611,18 +1611,26 @@ static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext) * struct nft_trans - nf_tables object update in transaction * * @list: used internally + * @net: struct net + * @table: struct nft_table the object resides in * @msg_type: message type - * @put_net: ctx->net needs to be put - * @ctx: transaction context + * @seq: netlink sequence number + * @flags: modifiers to new request + * @report: notify via unicast netlink message + * @put_net: net needs to be put * * This is the information common to all objects in the transaction, * this must always be the first member of derived sub-types. */ struct nft_trans { struct list_head list; + struct net *net; + struct nft_table *table; int msg_type; - bool put_net; - struct nft_ctx ctx; + u32 seq; + u16 flags; + u8 report:1; + u8 put_net:1; }; /** @@ -1794,6 +1802,33 @@ struct nft_trans_gc { struct rcu_head rcu; }; +static inline void nft_ctx_update(struct nft_ctx *ctx, + const struct nft_trans *trans) +{ + switch (trans->msg_type) { + case NFT_MSG_NEWRULE: + case NFT_MSG_DELRULE: + case NFT_MSG_DESTROYRULE: + ctx->chain = nft_trans_rule_chain(trans); + break; + case NFT_MSG_NEWCHAIN: + case NFT_MSG_DELCHAIN: + case NFT_MSG_DESTROYCHAIN: + ctx->chain = nft_trans_chain(trans); + break; + default: + ctx->chain = NULL; + break; + } + + ctx->net = trans->net; + ctx->table = trans->table; + ctx->family = trans->table->family; + ctx->report = trans->report; + ctx->flags = trans->flags; + ctx->seq = trans->seq; +} + struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, unsigned int gc_seq, gfp_t gfp); void nft_trans_gc_destroy(struct nft_trans_gc *trans); -- cgit v1.2.3 From 98a563de231fcc91d65c6028c7f646fe28faf83a Mon Sep 17 00:00:00 2001 From: Dumitru Ceclan Date: Fri, 7 Jun 2024 17:53:09 +0300 Subject: iio: adc: ad_sigma_delta: add disable_one callback Sigma delta ADCs with a sequencer need to disable the previously enabled channel when reading using ad_sigma_delta_single_conversion(). This was done manually in drivers for devices with sequencers. This patch implements handling of single channel disabling after a single conversion. Reviewed-by: Nuno Sa Signed-off-by: Dumitru Ceclan Reviewed-by: David Lechner Link: https://patch.msgid.link/20240607-ad4111-v7-3-97e3855900a0@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/adc/ad_sigma_delta.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 383614ebd760..f8c1d2505940 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -37,6 +37,10 @@ struct iio_dev; * @append_status: Will be called to enable status append at the end of the sample, may be NULL. * @set_mode: Will be called to select the current mode, may be NULL. * @disable_all: Will be called to disable all channels, may be NULL. + * @disable_one: Will be called to disable a single channel after + * ad_sigma_delta_single_conversion(), may be NULL. + * Usage of this callback expects iio_chan_spec.address to contain + * the value required for the driver to identify the channel. * @postprocess_sample: Is called for each sampled data word, can be used to * modify or drop the sample data, it, may be NULL. * @has_registers: true if the device has writable and readable registers, false @@ -55,6 +59,7 @@ struct ad_sigma_delta_info { int (*append_status)(struct ad_sigma_delta *, bool append); int (*set_mode)(struct ad_sigma_delta *, enum ad_sigma_delta_mode mode); int (*disable_all)(struct ad_sigma_delta *); + int (*disable_one)(struct ad_sigma_delta *, unsigned int chan); int (*postprocess_sample)(struct ad_sigma_delta *, unsigned int raw_sample); bool has_registers; unsigned int addr_shift; @@ -140,6 +145,15 @@ static inline int ad_sigma_delta_disable_all(struct ad_sigma_delta *sd) return 0; } +static inline int ad_sigma_delta_disable_one(struct ad_sigma_delta *sd, + unsigned int chan) +{ + if (sd->info->disable_one) + return sd->info->disable_one(sd, chan); + + return 0; +} + static inline int ad_sigma_delta_set_mode(struct ad_sigma_delta *sd, unsigned int mode) { -- cgit v1.2.3 From d305b7f34ee11c4901b8da150b641bcffbd3e951 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Tue, 18 Jun 2024 15:32:05 +0200 Subject: iio: imu: adis: move to the cleanup magic This makes locking and handling error paths simpler. Signed-off-by: Nuno Sa Link: https://patch.msgid.link/20240618-dev-iio-adis-cleanup-v1-2-bd93ce7845c7@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/imu/adis.h | 54 +++++++++++--------------------------------- 1 file changed, 13 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index 4bb0a53cf7ea..93dad627378f 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -9,6 +9,7 @@ #ifndef __IIO_ADIS_H__ #define __IIO_ADIS_H__ +#include #include #include #include @@ -150,13 +151,8 @@ int __adis_reset(struct adis *adis); */ static inline int adis_reset(struct adis *adis) { - int ret; - - mutex_lock(&adis->state_lock); - ret = __adis_reset(adis); - mutex_unlock(&adis->state_lock); - - return ret; + guard(mutex)(&adis->state_lock); + return __adis_reset(adis); } int __adis_write_reg(struct adis *adis, unsigned int reg, @@ -248,13 +244,8 @@ static inline int __adis_read_reg_32(struct adis *adis, unsigned int reg, static inline int adis_write_reg(struct adis *adis, unsigned int reg, unsigned int val, unsigned int size) { - int ret; - - mutex_lock(&adis->state_lock); - ret = __adis_write_reg(adis, reg, val, size); - mutex_unlock(&adis->state_lock); - - return ret; + guard(mutex)(&adis->state_lock); + return __adis_write_reg(adis, reg, val, size); } /** @@ -267,13 +258,8 @@ static inline int adis_write_reg(struct adis *adis, unsigned int reg, static int adis_read_reg(struct adis *adis, unsigned int reg, unsigned int *val, unsigned int size) { - int ret; - - mutex_lock(&adis->state_lock); - ret = __adis_read_reg(adis, reg, val, size); - mutex_unlock(&adis->state_lock); - - return ret; + guard(mutex)(&adis->state_lock); + return __adis_read_reg(adis, reg, val, size); } /** @@ -365,12 +351,8 @@ int __adis_update_bits_base(struct adis *adis, unsigned int reg, const u32 mask, static inline int adis_update_bits_base(struct adis *adis, unsigned int reg, const u32 mask, const u32 val, u8 size) { - int ret; - - mutex_lock(&adis->state_lock); - ret = __adis_update_bits_base(adis, reg, mask, val, size); - mutex_unlock(&adis->state_lock); - return ret; + guard(mutex)(&adis->state_lock); + return __adis_update_bits_base(adis, reg, mask, val, size); } /** @@ -411,24 +393,14 @@ int __adis_enable_irq(struct adis *adis, bool enable); static inline int adis_enable_irq(struct adis *adis, bool enable) { - int ret; - - mutex_lock(&adis->state_lock); - ret = __adis_enable_irq(adis, enable); - mutex_unlock(&adis->state_lock); - - return ret; + guard(mutex)(&adis->state_lock); + return __adis_enable_irq(adis, enable); } static inline int adis_check_status(struct adis *adis) { - int ret; - - mutex_lock(&adis->state_lock); - ret = __adis_check_status(adis); - mutex_unlock(&adis->state_lock); - - return ret; + guard(mutex)(&adis->state_lock); + return __adis_check_status(adis); } static inline void adis_dev_lock(struct adis *adis) -- cgit v1.2.3 From e6cab1ad9769a6b03dd1ba7ace106c36de85b1a8 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Tue, 18 Jun 2024 15:32:06 +0200 Subject: iio: imu: adis: add cleanup based lock helpers Add two new lock helpers that make use of the cleanup guard() and scoped_guard() macros. Thus, users won't have to worry about unlocking which is less prone to errors and allows for simpler error paths. Signed-off-by: Nuno Sa Link: https://patch.msgid.link/20240618-dev-iio-adis-cleanup-v1-3-bd93ce7845c7@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/imu/adis.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index 93dad627378f..bc7332d12c44 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -403,6 +403,10 @@ static inline int adis_check_status(struct adis *adis) return __adis_check_status(adis); } +#define adis_dev_auto_lock(adis) guard(mutex)(&(adis)->state_lock) +#define adis_dev_auto_scoped_lock(adis) \ + scoped_guard(mutex, &(adis)->state_lock) + static inline void adis_dev_lock(struct adis *adis) { mutex_lock(&adis->state_lock); -- cgit v1.2.3 From bb78ad627659fc7a738356951999f2ba79e68059 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Tue, 18 Jun 2024 15:32:12 +0200 Subject: iio: imu: adis: remove legacy lock helpers Since all users were converted to the new cleanup based helper, adis_dev_lock() and adis_dev_unlock() can now be removed from the lib. Signed-off-by: Nuno Sa Link: https://patch.msgid.link/20240618-dev-iio-adis-cleanup-v1-9-bd93ce7845c7@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/imu/adis.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index bc7332d12c44..e6a75356567a 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -407,16 +407,6 @@ static inline int adis_check_status(struct adis *adis) #define adis_dev_auto_scoped_lock(adis) \ scoped_guard(mutex, &(adis)->state_lock) -static inline void adis_dev_lock(struct adis *adis) -{ - mutex_lock(&adis->state_lock); -} - -static inline void adis_dev_unlock(struct adis *adis) -{ - mutex_unlock(&adis->state_lock); -} - int adis_single_conversion(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, unsigned int error_mask, int *val); -- cgit v1.2.3 From ca567df74a28a9fb368c6b2d93e864113f73f5c2 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 7 Jun 2020 22:47:08 +0200 Subject: nsfs: add pid translation ioctls Add ioctl()s to translate pids between pid namespaces. LXCFS is a tiny fuse filesystem used to virtualize various aspects of procfs. LXCFS is run on the host. The files and directories it creates can be bind-mounted by e.g. a container at startup and mounted over the various procfs files the container wishes to have virtualized. When e.g. a read request for uptime is received, LXCFS will receive the pid of the reader. In order to virtualize the corresponding read, LXCFS needs to know the pid of the init process of the reader's pid namespace. In order to do this, LXCFS first needs to fork() two helper processes. The first helper process setns() to the readers pid namespace. The second helper process is needed to create a process that is a proper member of the pid namespace. The second helper process then creates a ucred message with ucred.pid set to 1 and sends it back to LXCFS. The kernel will translate the ucred.pid field to the corresponding pid number in LXCFS's pid namespace. This way LXCFS can learn the init pid number of the reader's pid namespace and can go on to virtualize. Since these two forks() are costly LXCFS maintains an init pid cache that caches a given pid for a fixed amount of time. The cache is pruned during new read requests. However, even with the cache the hit of the two forks() is singificant when a very large number of containers are running. With this simple patch we add an ns ioctl that let's a caller retrieve the init pid nr of a pid namespace through its pid namespace fd. This significantly improves performance with a very simple change. Support translation of pids and tgids. Other concepts can be added but there are no obvious users for this right now. To protect against races pidfds can be used to check whether the process is still valid. If needed, this can also be extended to work on pidfds directly. Link: https://lore.kernel.org/r/20240619-work-ns_ioctl-v1-1-7c0097e6bb6b@kernel.org Reviewed-by: Alexander Mikhalitsyn Signed-off-by: Christian Brauner --- include/uapi/linux/nsfs.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index a0c8552b64ee..faeb9195da08 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -15,5 +15,13 @@ #define NS_GET_NSTYPE _IO(NSIO, 0x3) /* Get owner UID (in the caller's user namespace) for a user namespace */ #define NS_GET_OWNER_UID _IO(NSIO, 0x4) +/* Translate pid from target pid namespace into the caller's pid namespace. */ +#define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x5, int) +/* Return thread-group leader id of pid in the callers pid namespace. */ +#define NS_GET_TGID_FROM_PIDNS _IOR(NSIO, 0x7, int) +/* Translate pid from caller's pid namespace into a target pid namespace. */ +#define NS_GET_PID_IN_PIDNS _IOR(NSIO, 0x6, int) +/* Return thread-group leader id of pid in the target pid namespace. */ +#define NS_GET_TGID_IN_PIDNS _IOR(NSIO, 0x8, int) #endif /* __LINUX_NSFS_H */ -- cgit v1.2.3 From e29630247be24c3987e2b048f8e152771b32d38b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 3 Jun 2024 20:16:59 +0200 Subject: netfilter: nf_tables: rise cap on SELinux secmark context secmark context is artificially limited 256 bytes, rise it to 4Kbytes. Fixes: fb961945457f ("netfilter: nf_tables: add SECMARK support") Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index aa4094ca2444..639894ed1b97 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1376,7 +1376,7 @@ enum nft_secmark_attributes { #define NFTA_SECMARK_MAX (__NFTA_SECMARK_MAX - 1) /* Max security context length */ -#define NFT_SECMARK_CTX_MAXLEN 256 +#define NFT_SECMARK_CTX_MAXLEN 4096 /** * enum nft_reject_types - nf_tables reject expression reject types -- cgit v1.2.3 From a6a61b9701d1add3bb6d86d8e259d833ea91a1a6 Mon Sep 17 00:00:00 2001 From: Jagadeesh Kona Date: Sun, 2 Jun 2024 17:14:33 +0530 Subject: dt-bindings: clock: qcom: Add SM8650 video clock controller SM8650 video clock controller has most clocks same as SM8450, but it also has few additional clocks and resets. Add device tree bindings for the video clock controller on Qualcomm SM8650 platform by defining these additional clocks and resets on top of SM8450. Signed-off-by: Jagadeesh Kona Reviewed-by: Bryan O'Donoghue Reviewed-by: Krzysztof Kozlowski Reviewed-by: Vladimir Zapolskiy Link: https://lore.kernel.org/r/20240602114439.1611-3-quic_jkona@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,sm8650-videocc.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,sm8650-videocc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,sm8650-videocc.h b/include/dt-bindings/clock/qcom,sm8650-videocc.h new file mode 100644 index 000000000000..4e3c2d87280f --- /dev/null +++ b/include/dt-bindings/clock/qcom,sm8650-videocc.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_VIDEO_CC_SM8650_H +#define _DT_BINDINGS_CLK_QCOM_VIDEO_CC_SM8650_H + +#include "qcom,sm8450-videocc.h" + +/* SM8650 introduces below new clocks and resets compared to SM8450 */ + +/* VIDEO_CC clocks */ +#define VIDEO_CC_MVS0_SHIFT_CLK 12 +#define VIDEO_CC_MVS0C_SHIFT_CLK 13 +#define VIDEO_CC_MVS1_SHIFT_CLK 14 +#define VIDEO_CC_MVS1C_SHIFT_CLK 15 +#define VIDEO_CC_XO_CLK_SRC 16 + +/* VIDEO_CC resets */ +#define VIDEO_CC_XO_CLK_ARES 7 + +#endif -- cgit v1.2.3 From 1ae3f0578e0e623e774db45870c0e34c47d8dd15 Mon Sep 17 00:00:00 2001 From: Jagadeesh Kona Date: Sun, 2 Jun 2024 17:14:37 +0530 Subject: dt-bindings: clock: qcom: Add SM8650 camera clock controller Add device tree bindings for the camera clock controller on Qualcomm SM8650 platform. Signed-off-by: Jagadeesh Kona Reviewed-by: Krzysztof Kozlowski Acked-by: Vladimir Zapolskiy Link: https://lore.kernel.org/r/20240602114439.1611-7-quic_jkona@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,sm8650-camcc.h | 195 ++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 include/dt-bindings/clock/qcom,sm8650-camcc.h (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,sm8650-camcc.h b/include/dt-bindings/clock/qcom,sm8650-camcc.h new file mode 100644 index 000000000000..df73bf35f4bf --- /dev/null +++ b/include/dt-bindings/clock/qcom,sm8650-camcc.h @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_CAM_CC_SM8650_H +#define _DT_BINDINGS_CLK_QCOM_CAM_CC_SM8650_H + +/* CAM_CC clocks */ +#define CAM_CC_BPS_AHB_CLK 0 +#define CAM_CC_BPS_CLK 1 +#define CAM_CC_BPS_CLK_SRC 2 +#define CAM_CC_BPS_FAST_AHB_CLK 3 +#define CAM_CC_BPS_SHIFT_CLK 4 +#define CAM_CC_CAMNOC_AXI_NRT_CLK 5 +#define CAM_CC_CAMNOC_AXI_RT_CLK 6 +#define CAM_CC_CAMNOC_AXI_RT_CLK_SRC 7 +#define CAM_CC_CAMNOC_DCD_XO_CLK 8 +#define CAM_CC_CAMNOC_XO_CLK 9 +#define CAM_CC_CCI_0_CLK 10 +#define CAM_CC_CCI_0_CLK_SRC 11 +#define CAM_CC_CCI_1_CLK 12 +#define CAM_CC_CCI_1_CLK_SRC 13 +#define CAM_CC_CCI_2_CLK 14 +#define CAM_CC_CCI_2_CLK_SRC 15 +#define CAM_CC_CORE_AHB_CLK 16 +#define CAM_CC_CPAS_AHB_CLK 17 +#define CAM_CC_CPAS_BPS_CLK 18 +#define CAM_CC_CPAS_CRE_CLK 19 +#define CAM_CC_CPAS_FAST_AHB_CLK 20 +#define CAM_CC_CPAS_IFE_0_CLK 21 +#define CAM_CC_CPAS_IFE_1_CLK 22 +#define CAM_CC_CPAS_IFE_2_CLK 23 +#define CAM_CC_CPAS_IFE_LITE_CLK 24 +#define CAM_CC_CPAS_IPE_NPS_CLK 25 +#define CAM_CC_CPAS_SBI_CLK 26 +#define CAM_CC_CPAS_SFE_0_CLK 27 +#define CAM_CC_CPAS_SFE_1_CLK 28 +#define CAM_CC_CPAS_SFE_2_CLK 29 +#define CAM_CC_CPHY_RX_CLK_SRC 30 +#define CAM_CC_CRE_AHB_CLK 31 +#define CAM_CC_CRE_CLK 32 +#define CAM_CC_CRE_CLK_SRC 33 +#define CAM_CC_CSI0PHYTIMER_CLK 34 +#define CAM_CC_CSI0PHYTIMER_CLK_SRC 35 +#define CAM_CC_CSI1PHYTIMER_CLK 36 +#define CAM_CC_CSI1PHYTIMER_CLK_SRC 37 +#define CAM_CC_CSI2PHYTIMER_CLK 38 +#define CAM_CC_CSI2PHYTIMER_CLK_SRC 39 +#define CAM_CC_CSI3PHYTIMER_CLK 40 +#define CAM_CC_CSI3PHYTIMER_CLK_SRC 41 +#define CAM_CC_CSI4PHYTIMER_CLK 42 +#define CAM_CC_CSI4PHYTIMER_CLK_SRC 43 +#define CAM_CC_CSI5PHYTIMER_CLK 44 +#define CAM_CC_CSI5PHYTIMER_CLK_SRC 45 +#define CAM_CC_CSI6PHYTIMER_CLK 46 +#define CAM_CC_CSI6PHYTIMER_CLK_SRC 47 +#define CAM_CC_CSI7PHYTIMER_CLK 48 +#define CAM_CC_CSI7PHYTIMER_CLK_SRC 49 +#define CAM_CC_CSID_CLK 50 +#define CAM_CC_CSID_CLK_SRC 51 +#define CAM_CC_CSID_CSIPHY_RX_CLK 52 +#define CAM_CC_CSIPHY0_CLK 53 +#define CAM_CC_CSIPHY1_CLK 54 +#define CAM_CC_CSIPHY2_CLK 55 +#define CAM_CC_CSIPHY3_CLK 56 +#define CAM_CC_CSIPHY4_CLK 57 +#define CAM_CC_CSIPHY5_CLK 58 +#define CAM_CC_CSIPHY6_CLK 59 +#define CAM_CC_CSIPHY7_CLK 60 +#define CAM_CC_DRV_AHB_CLK 61 +#define CAM_CC_DRV_XO_CLK 62 +#define CAM_CC_FAST_AHB_CLK_SRC 63 +#define CAM_CC_GDSC_CLK 64 +#define CAM_CC_ICP_AHB_CLK 65 +#define CAM_CC_ICP_CLK 66 +#define CAM_CC_ICP_CLK_SRC 67 +#define CAM_CC_IFE_0_CLK 68 +#define CAM_CC_IFE_0_CLK_SRC 69 +#define CAM_CC_IFE_0_FAST_AHB_CLK 70 +#define CAM_CC_IFE_0_SHIFT_CLK 71 +#define CAM_CC_IFE_1_CLK 72 +#define CAM_CC_IFE_1_CLK_SRC 73 +#define CAM_CC_IFE_1_FAST_AHB_CLK 74 +#define CAM_CC_IFE_1_SHIFT_CLK 75 +#define CAM_CC_IFE_2_CLK 76 +#define CAM_CC_IFE_2_CLK_SRC 77 +#define CAM_CC_IFE_2_FAST_AHB_CLK 78 +#define CAM_CC_IFE_2_SHIFT_CLK 79 +#define CAM_CC_IFE_LITE_AHB_CLK 80 +#define CAM_CC_IFE_LITE_CLK 81 +#define CAM_CC_IFE_LITE_CLK_SRC 82 +#define CAM_CC_IFE_LITE_CPHY_RX_CLK 83 +#define CAM_CC_IFE_LITE_CSID_CLK 84 +#define CAM_CC_IFE_LITE_CSID_CLK_SRC 85 +#define CAM_CC_IPE_NPS_AHB_CLK 86 +#define CAM_CC_IPE_NPS_CLK 87 +#define CAM_CC_IPE_NPS_CLK_SRC 88 +#define CAM_CC_IPE_NPS_FAST_AHB_CLK 89 +#define CAM_CC_IPE_PPS_CLK 90 +#define CAM_CC_IPE_PPS_FAST_AHB_CLK 91 +#define CAM_CC_IPE_SHIFT_CLK 92 +#define CAM_CC_JPEG_1_CLK 93 +#define CAM_CC_JPEG_CLK 94 +#define CAM_CC_JPEG_CLK_SRC 95 +#define CAM_CC_MCLK0_CLK 96 +#define CAM_CC_MCLK0_CLK_SRC 97 +#define CAM_CC_MCLK1_CLK 98 +#define CAM_CC_MCLK1_CLK_SRC 99 +#define CAM_CC_MCLK2_CLK 100 +#define CAM_CC_MCLK2_CLK_SRC 101 +#define CAM_CC_MCLK3_CLK 102 +#define CAM_CC_MCLK3_CLK_SRC 103 +#define CAM_CC_MCLK4_CLK 104 +#define CAM_CC_MCLK4_CLK_SRC 105 +#define CAM_CC_MCLK5_CLK 106 +#define CAM_CC_MCLK5_CLK_SRC 107 +#define CAM_CC_MCLK6_CLK 108 +#define CAM_CC_MCLK6_CLK_SRC 109 +#define CAM_CC_MCLK7_CLK 110 +#define CAM_CC_MCLK7_CLK_SRC 111 +#define CAM_CC_PLL0 112 +#define CAM_CC_PLL0_OUT_EVEN 113 +#define CAM_CC_PLL0_OUT_ODD 114 +#define CAM_CC_PLL1 115 +#define CAM_CC_PLL1_OUT_EVEN 116 +#define CAM_CC_PLL2 117 +#define CAM_CC_PLL3 118 +#define CAM_CC_PLL3_OUT_EVEN 119 +#define CAM_CC_PLL4 120 +#define CAM_CC_PLL4_OUT_EVEN 121 +#define CAM_CC_PLL5 122 +#define CAM_CC_PLL5_OUT_EVEN 123 +#define CAM_CC_PLL6 124 +#define CAM_CC_PLL6_OUT_EVEN 125 +#define CAM_CC_PLL7 126 +#define CAM_CC_PLL7_OUT_EVEN 127 +#define CAM_CC_PLL8 128 +#define CAM_CC_PLL8_OUT_EVEN 129 +#define CAM_CC_PLL9 130 +#define CAM_CC_PLL9_OUT_EVEN 131 +#define CAM_CC_PLL9_OUT_ODD 132 +#define CAM_CC_PLL10 133 +#define CAM_CC_PLL10_OUT_EVEN 134 +#define CAM_CC_QDSS_DEBUG_CLK 135 +#define CAM_CC_QDSS_DEBUG_CLK_SRC 136 +#define CAM_CC_QDSS_DEBUG_XO_CLK 137 +#define CAM_CC_SBI_CLK 138 +#define CAM_CC_SBI_FAST_AHB_CLK 139 +#define CAM_CC_SBI_SHIFT_CLK 140 +#define CAM_CC_SFE_0_CLK 141 +#define CAM_CC_SFE_0_CLK_SRC 142 +#define CAM_CC_SFE_0_FAST_AHB_CLK 143 +#define CAM_CC_SFE_0_SHIFT_CLK 144 +#define CAM_CC_SFE_1_CLK 145 +#define CAM_CC_SFE_1_CLK_SRC 146 +#define CAM_CC_SFE_1_FAST_AHB_CLK 147 +#define CAM_CC_SFE_1_SHIFT_CLK 148 +#define CAM_CC_SFE_2_CLK 149 +#define CAM_CC_SFE_2_CLK_SRC 150 +#define CAM_CC_SFE_2_FAST_AHB_CLK 151 +#define CAM_CC_SFE_2_SHIFT_CLK 152 +#define CAM_CC_SLEEP_CLK 153 +#define CAM_CC_SLEEP_CLK_SRC 154 +#define CAM_CC_SLOW_AHB_CLK_SRC 155 +#define CAM_CC_TITAN_TOP_SHIFT_CLK 156 +#define CAM_CC_XO_CLK_SRC 157 + +/* CAM_CC power domains */ +#define CAM_CC_TITAN_TOP_GDSC 0 +#define CAM_CC_BPS_GDSC 1 +#define CAM_CC_IFE_0_GDSC 2 +#define CAM_CC_IFE_1_GDSC 3 +#define CAM_CC_IFE_2_GDSC 4 +#define CAM_CC_IPE_0_GDSC 5 +#define CAM_CC_SBI_GDSC 6 +#define CAM_CC_SFE_0_GDSC 7 +#define CAM_CC_SFE_1_GDSC 8 +#define CAM_CC_SFE_2_GDSC 9 + +/* CAM_CC resets */ +#define CAM_CC_BPS_BCR 0 +#define CAM_CC_DRV_BCR 1 +#define CAM_CC_ICP_BCR 2 +#define CAM_CC_IFE_0_BCR 3 +#define CAM_CC_IFE_1_BCR 4 +#define CAM_CC_IFE_2_BCR 5 +#define CAM_CC_IPE_0_BCR 6 +#define CAM_CC_QDSS_DEBUG_BCR 7 +#define CAM_CC_SBI_BCR 8 +#define CAM_CC_SFE_0_BCR 9 +#define CAM_CC_SFE_1_BCR 10 +#define CAM_CC_SFE_2_BCR 11 + +#endif -- cgit v1.2.3 From 0e942053e4dc42a760f48c1981f3239825430f15 Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Fri, 21 Jun 2024 18:13:49 +0800 Subject: linux/dim: move useful macros to .h file Useful macros will be used effectively elsewhere. These will be utilized in subsequent patches. Signed-off-by: Heng Qi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240621101353.107425-2-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski --- include/linux/dim.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/dim.h b/include/linux/dim.h index f343bc9aa2ec..43398f5eade2 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -10,6 +10,13 @@ #include #include +/* Number of DIM profiles and period mode. */ +#define NET_DIM_PARAMS_NUM_PROFILES 5 +#define NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE 256 +#define NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE 128 +#define NET_DIM_DEF_PROFILE_CQE 1 +#define NET_DIM_DEF_PROFILE_EQE 1 + /* * Number of events between DIM iterations. * Causes a moderation of the algorithm run. -- cgit v1.2.3 From f750dfe825b904164688adeb147950e0e0c4d262 Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Fri, 21 Jun 2024 18:13:51 +0800 Subject: ethtool: provide customized dim profile management The NetDIM library, currently leveraged by an array of NICs, delivers excellent acceleration benefits. Nevertheless, NICs vary significantly in their dim profile list prerequisites. Specifically, virtio-net backends may present diverse sw or hw device implementation, making a one-size-fits-all parameter list impractical. On Alibaba Cloud, the virtio DPU's performance under the default DIM profile falls short of expectations, partly due to a mismatch in parameter configuration. I also noticed that ice/idpf/ena and other NICs have customized profilelist or placed some restrictions on dim capabilities. Motivated by this, I tried adding new params for "ethtool -C" that provides a per-device control to modify and access a device's interrupt parameters. Usage ======== The target NIC is named ethx. Assume that ethx only declares support for rx profile setting (with DIM_PROFILE_RX flag set in profile_flags) and supports modification of usec and pkt fields. 1. Query the currently customized list of the device $ ethtool -c ethx ... rx-profile: {.usec = 1, .pkts = 256, .comps = n/a,}, {.usec = 8, .pkts = 256, .comps = n/a,}, {.usec = 64, .pkts = 256, .comps = n/a,}, {.usec = 128, .pkts = 256, .comps = n/a,}, {.usec = 256, .pkts = 256, .comps = n/a,} tx-profile: n/a 2. Tune $ ethtool -C ethx rx-profile 1,1,n_2,n,n_3,3,n_4,4,n_n,5,n "n" means do not modify this field. $ ethtool -c ethx ... rx-profile: {.usec = 1, .pkts = 1, .comps = n/a,}, {.usec = 2, .pkts = 256, .comps = n/a,}, {.usec = 3, .pkts = 3, .comps = n/a,}, {.usec = 4, .pkts = 4, .comps = n/a,}, {.usec = 256, .pkts = 5, .comps = n/a,} tx-profile: n/a 3. Hint If the device does not support some type of customized dim profiles, the corresponding "n/a" will display. If the "n/a" field is being modified, -EOPNOTSUPP will be reported. Signed-off-by: Heng Qi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240621101353.107425-4-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski --- include/linux/dim.h | 58 ++++++++++++++++++++++++++++++++++++ include/linux/ethtool.h | 4 ++- include/linux/netdevice.h | 3 ++ include/uapi/linux/ethtool_netlink.h | 22 ++++++++++++++ 4 files changed, 86 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dim.h b/include/linux/dim.h index 43398f5eade2..e0f39bd85432 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -10,6 +10,8 @@ #include #include +struct net_device; + /* Number of DIM profiles and period mode. */ #define NET_DIM_PARAMS_NUM_PROFILES 5 #define NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE 256 @@ -45,12 +47,45 @@ * @pkts: CQ packet counter suggestion (by DIM) * @comps: Completion counter * @cq_period_mode: CQ period count mode (from CQE/EQE) + * @rcu: for asynchronous kfree_rcu */ struct dim_cq_moder { u16 usec; u16 pkts; u16 comps; u8 cq_period_mode; + struct rcu_head rcu; +}; + +#define DIM_PROFILE_RX BIT(0) /* support rx profile modification */ +#define DIM_PROFILE_TX BIT(1) /* support tx profile modification */ + +#define DIM_COALESCE_USEC BIT(0) /* support usec field modification */ +#define DIM_COALESCE_PKTS BIT(1) /* support pkts field modification */ +#define DIM_COALESCE_COMPS BIT(2) /* support comps field modification */ + +/** + * struct dim_irq_moder - Structure for irq moderation information. + * Used to collect irq moderation related information. + * + * @profile_flags: DIM_PROFILE_* + * @coal_flags: DIM_COALESCE_* for Rx and Tx + * @dim_rx_mode: Rx DIM period count mode: CQE or EQE + * @dim_tx_mode: Tx DIM period count mode: CQE or EQE + * @rx_profile: DIM profile list for Rx + * @tx_profile: DIM profile list for Tx + * @rx_dim_work: Rx DIM worker scheduled by net_dim() + * @tx_dim_work: Tx DIM worker scheduled by net_dim() + */ +struct dim_irq_moder { + u8 profile_flags; + u8 coal_flags; + u8 dim_rx_mode; + u8 dim_tx_mode; + struct dim_cq_moder __rcu *rx_profile; + struct dim_cq_moder __rcu *tx_profile; + void (*rx_dim_work)(struct work_struct *work); + void (*tx_dim_work)(struct work_struct *work); }; /** @@ -198,6 +233,29 @@ enum dim_step_result { DIM_ON_EDGE, }; +/** + * net_dim_init_irq_moder - collect information to initialize irq moderation + * @dev: target network device + * @profile_flags: Rx or Tx profile modification capability + * @coal_flags: irq moderation params flags + * @rx_mode: CQ period mode for Rx + * @tx_mode: CQ period mode for Tx + * @rx_dim_work: Rx worker called after dim decision + * @tx_dim_work: Tx worker called after dim decision + * + * Return: 0 on success or a negative error code. + */ +int net_dim_init_irq_moder(struct net_device *dev, u8 profile_flags, + u8 coal_flags, u8 rx_mode, u8 tx_mode, + void (*rx_dim_work)(struct work_struct *work), + void (*tx_dim_work)(struct work_struct *work)); + +/** + * net_dim_free_irq_moder - free fields for irq moderation + * @dev: target network device + */ +void net_dim_free_irq_moder(struct net_device *dev); + /** * dim_on_top - check if current state is a good place to stop (top location) * @dim: DIM context diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 6fd9107d3cc0..959196af7f5a 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -284,7 +284,9 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, #define ETHTOOL_COALESCE_TX_AGGR_MAX_BYTES BIT(24) #define ETHTOOL_COALESCE_TX_AGGR_MAX_FRAMES BIT(25) #define ETHTOOL_COALESCE_TX_AGGR_TIME_USECS BIT(26) -#define ETHTOOL_COALESCE_ALL_PARAMS GENMASK(26, 0) +#define ETHTOOL_COALESCE_RX_PROFILE BIT(27) +#define ETHTOOL_COALESCE_TX_PROFILE BIT(28) +#define ETHTOOL_COALESCE_ALL_PARAMS GENMASK(28, 0) #define ETHTOOL_COALESCE_USECS \ (ETHTOOL_COALESCE_RX_USECS | ETHTOOL_COALESCE_TX_USECS) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4e81660b4462..cc18acd3c58b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2402,6 +2402,9 @@ struct net_device { /** @page_pools: page pools created for this netdevice */ struct hlist_head page_pools; #endif + + /** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */ + struct dim_irq_moder *irq_moder; }; #define to_net_dev(d) container_of(d, struct net_device, dev) diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index b49b804b9495..d15856c7e001 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -415,12 +415,34 @@ enum { ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES, /* u32 */ ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES, /* u32 */ ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, /* u32 */ + /* nest - _A_PROFILE_IRQ_MODERATION */ + ETHTOOL_A_COALESCE_RX_PROFILE, + /* nest - _A_PROFILE_IRQ_MODERATION */ + ETHTOOL_A_COALESCE_TX_PROFILE, /* add new constants above here */ __ETHTOOL_A_COALESCE_CNT, ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1) }; +enum { + ETHTOOL_A_PROFILE_UNSPEC, + /* nest, _A_IRQ_MODERATION_* */ + ETHTOOL_A_PROFILE_IRQ_MODERATION, + __ETHTOOL_A_PROFILE_CNT, + ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1) +}; + +enum { + ETHTOOL_A_IRQ_MODERATION_UNSPEC, + ETHTOOL_A_IRQ_MODERATION_USEC, /* u32 */ + ETHTOOL_A_IRQ_MODERATION_PKTS, /* u32 */ + ETHTOOL_A_IRQ_MODERATION_COMPS, /* u32 */ + + __ETHTOOL_A_IRQ_MODERATION_CNT, + ETHTOOL_A_IRQ_MODERATION_MAX = (__ETHTOOL_A_IRQ_MODERATION_CNT - 1) +}; + /* PAUSE */ enum { -- cgit v1.2.3 From 13ba28c5cd047e272f9dbbeb5c62c403873d8987 Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Fri, 21 Jun 2024 18:13:52 +0800 Subject: dim: add new interfaces for initialization and getting results DIM-related mode and work have been collected in one same place, so new interfaces are added to provide convenience. Signed-off-by: Heng Qi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240621101353.107425-5-hengqi@linux.alibaba.com Signed-off-by: Jakub Kicinski --- include/linux/dim.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'include') diff --git a/include/linux/dim.h b/include/linux/dim.h index e0f39bd85432..1b581ff25a15 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -256,6 +256,54 @@ int net_dim_init_irq_moder(struct net_device *dev, u8 profile_flags, */ void net_dim_free_irq_moder(struct net_device *dev); +/** + * net_dim_setting - initialize DIM's cq mode and schedule worker + * @dev: target network device + * @dim: DIM context + * @is_tx: true indicates the tx direction, false indicates the rx direction + */ +void net_dim_setting(struct net_device *dev, struct dim *dim, bool is_tx); + +/** + * net_dim_work_cancel - synchronously cancel dim's worker + * @dim: DIM context + */ +void net_dim_work_cancel(struct dim *dim); + +/** + * net_dim_get_rx_irq_moder - get DIM rx results based on profile_ix + * @dev: target network device + * @dim: DIM context + * + * Return: DIM irq moderation + */ +struct dim_cq_moder +net_dim_get_rx_irq_moder(struct net_device *dev, struct dim *dim); + +/** + * net_dim_get_tx_irq_moder - get DIM tx results based on profile_ix + * @dev: target network device + * @dim: DIM context + * + * Return: DIM irq moderation + */ +struct dim_cq_moder +net_dim_get_tx_irq_moder(struct net_device *dev, struct dim *dim); + +/** + * net_dim_set_rx_mode - set DIM rx cq mode + * @dev: target network device + * @rx_mode: target rx cq mode + */ +void net_dim_set_rx_mode(struct net_device *dev, u8 rx_mode); + +/** + * net_dim_set_tx_mode - set DIM tx cq mode + * @dev: target network device + * @tx_mode: target tx cq mode + */ +void net_dim_set_tx_mode(struct net_device *dev, u8 tx_mode); + /** * dim_on_top - check if current state is a good place to stop (top location) * @dim: DIM context -- cgit v1.2.3 From eee5528890d54b22b46f833002355a5ee94c3bb4 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 24 Jun 2024 08:55:01 +0900 Subject: PCI: Add Edimax Vendor ID to pci_ids.h Add the Edimax Vendor ID (0x1432) for an ethernet driver for Tehuti Networks TN40xx chips. This ID can be used for Realtek 8180 and Ralink rt28xx wireless drivers. Signed-off-by: FUJITA Tomonori Acked-by: Bjorn Helgaas Link: https://patch.msgid.link/20240623235507.108147-2-fujita.tomonori@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 942a587bb97e..677aea20d3e1 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2126,6 +2126,8 @@ #define PCI_VENDOR_ID_CHELSIO 0x1425 +#define PCI_VENDOR_ID_EDIMAX 0x1432 + #define PCI_VENDOR_ID_ADLINK 0x144a #define PCI_VENDOR_ID_SAMSUNG 0x144d -- cgit v1.2.3 From e3943f00afdb71684c4f209f9d3a90d6b79771fc Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 19 Jun 2024 16:08:46 +0200 Subject: OPP: Introduce an OF helper function to inform if required-opps is used As being shown from a subsequent change to genpd, it's useful to understand if a device's OF node has an OPP-table described and whether it contains OPP nodes that makes use of the required-opps DT property. For this reason, let's introduce an OPP OF helper function called dev_pm_opp_of_has_required_opp(). Signed-off-by: Ulf Hansson Signed-off-by: Viresh Kumar --- include/linux/pm_opp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index dd7c8441af42..6424692c30b7 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -474,6 +474,7 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpuma struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev); struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp); int of_get_required_opp_performance_state(struct device_node *np, int index); +bool dev_pm_opp_of_has_required_opp(struct device *dev); int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_table *opp_table); int dev_pm_opp_of_register_em(struct device *dev, struct cpumask *cpus); int dev_pm_opp_calc_power(struct device *dev, unsigned long *uW, @@ -552,6 +553,11 @@ static inline int of_get_required_opp_performance_state(struct device_node *np, return -EOPNOTSUPP; } +static inline bool dev_pm_opp_of_has_required_opp(struct device *dev) +{ + return false; +} + static inline int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_table *opp_table) { return -EOPNOTSUPP; -- cgit v1.2.3 From a047b66c0f05c668b4a6d41a3cacfe707efc9ecb Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 20 Jun 2024 01:53:43 +0300 Subject: media: v4l: subdev: Fix typo in documentation Replace the incorrect reference to the v4l2_subdev_enable_stream() function with the correct v4l2_subdev_enable_streams() spelling. Fixes: d0749adb3070 ("media: v4l2-subdev: Add subdev .(enable|disable)_streams() operations") Reviewed-by: Tomi Valkeinen Link: https://lore.kernel.org/r/20240619225343.15873-1-laurent.pinchart@ideasonboard.com Signed-off-by: Laurent Pinchart --- include/media/v4l2-subdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index 1b74e037e440..bd235d325ff9 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -690,7 +690,7 @@ struct v4l2_subdev_pad_config { * * @pad: pad number * @stream: stream number - * @enabled: has the stream been enabled with v4l2_subdev_enable_stream() + * @enabled: has the stream been enabled with v4l2_subdev_enable_streams() * @fmt: &struct v4l2_mbus_framefmt * @crop: &struct v4l2_rect to be used for crop * @compose: &struct v4l2_rect to be used for compose -- cgit v1.2.3 From 1decf05d0f4de78ef67dc3f794709258c689e09e Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 18 Jun 2024 19:25:56 +0300 Subject: wifi: mac80211: inform the low level if drv_stop() is a suspend This will allow the low level driver to take different actions for different flows. Signed-off-by: Emmanuel Grumbach Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20240618192529.739036208b6e.Ie18a2fe8e02bf2717549d39420b350cfdaf3d317@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ecfa65ade226..9c96e8ae9ef7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4444,7 +4444,7 @@ struct ieee80211_ops { struct ieee80211_tx_control *control, struct sk_buff *skb); int (*start)(struct ieee80211_hw *hw); - void (*stop)(struct ieee80211_hw *hw); + void (*stop)(struct ieee80211_hw *hw, bool suspend); #ifdef CONFIG_PM int (*suspend)(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan); int (*resume)(struct ieee80211_hw *hw); -- cgit v1.2.3 From f531d13bdfe3f4f084aaa8acae2cb0f02295f5ae Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Mon, 27 May 2024 20:29:14 -0700 Subject: xfrm: support sending NAT keepalives in ESP in UDP states Add the ability to send out RFC-3948 NAT keepalives from the xfrm stack. To use, Userspace sets an XFRM_NAT_KEEPALIVE_INTERVAL integer property when creating XFRM outbound states which denotes the number of seconds between keepalive messages. Keepalive messages are sent from a per net delayed work which iterates over the xfrm states. The logic is guarded by the xfrm state spinlock due to the xfrm state walk iterator. Possible future enhancements: - Adding counters to keep track of sent keepalives. - deduplicate NAT keepalives between states sharing the same nat keepalive parameters. - provisioning hardware offloads for devices capable of implementing this. - revise xfrm state list to use an rcu list in order to avoid running this under spinlock. Suggested-by: Paul Wouters Tested-by: Paul Wouters Tested-by: Antony Antony Signed-off-by: Eyal Birger Signed-off-by: Steffen Klassert --- include/net/ipv6_stubs.h | 3 +++ include/net/netns/xfrm.h | 1 + include/net/xfrm.h | 10 ++++++++++ include/uapi/linux/xfrm.h | 1 + 4 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 485c39a89866..11cefd50704d 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -9,6 +9,7 @@ #include #include #include +#include /* structs from net/ip6_fib.h */ struct fib6_info; @@ -72,6 +73,8 @@ struct ipv6_stub { int (*output)(struct net *, struct sock *, struct sk_buff *)); struct net_device *(*ipv6_dev_find)(struct net *net, const struct in6_addr *addr, struct net_device *dev); + int (*ip6_xmit)(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, + __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority); }; extern const struct ipv6_stub *ipv6_stub __read_mostly; diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 423b52eca908..d489d9250bff 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -83,6 +83,7 @@ struct netns_xfrm { spinlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; + struct delayed_work nat_keepalive_work; }; #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 77ebf5bcf0b9..46a214a76081 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -229,6 +229,10 @@ struct xfrm_state { struct xfrm_encap_tmpl *encap; struct sock __rcu *encap_sk; + /* NAT keepalive */ + u32 nat_keepalive_interval; /* seconds */ + time64_t nat_keepalive_expiration; + /* Data for care-of address */ xfrm_address_t *coaddr; @@ -2203,4 +2207,10 @@ static inline int register_xfrm_state_bpf(void) } #endif +int xfrm_nat_keepalive_init(unsigned short family); +void xfrm_nat_keepalive_fini(unsigned short family); +int xfrm_nat_keepalive_net_init(struct net *net); +int xfrm_nat_keepalive_net_fini(struct net *net); +void xfrm_nat_keepalive_state_updated(struct xfrm_state *x); + #endif /* _NET_XFRM_H */ diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index d950d02ab791..f28701500714 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -321,6 +321,7 @@ enum xfrm_attr_type_t { XFRMA_IF_ID, /* __u32 */ XFRMA_MTIMER_THRESH, /* __u32 in seconds for input SA */ XFRMA_SA_DIR, /* __u8 */ + XFRMA_NAT_KEEPALIVE_INTERVAL, /* __u32 in seconds for NAT keepalive */ __XFRMA_MAX #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */ -- cgit v1.2.3 From 0c5275bf75ec3708d95654195ae4ed80d946d088 Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Sun, 16 Jun 2024 19:10:36 +0300 Subject: RDMA/mlx5: Use sq timestamp as QP timestamp when RoCE is disabled When creating a QP, one of the attributes is TS format (timestamp). In some devices, we have a limitation that all QPs should have the same ts_format. The ts_format is chosen based on the device's capability. The qp_ts_format cap resides under the RoCE caps table, and the cap will be 0 when RoCE is disabled. So when RoCE is disabled, the value that should be queried is sq_ts_format under HCA caps. Consider the case when the system supports REAL_TIME_TS format (0x2), some QPs are created with REAL_TIME_TS as ts_format, and afterwards RoCE gets disabled. When trying to construct a new QP, we can't use the qp_ts_format, that is queried from the RoCE caps table, Since it leads to passing 0x0 (FREE_RUNNING_TS) as the value of the qp_ts_format, which is different than the ts_format of the previously allocated QPs REAL_TIME_TS format (0x2). Thus, to resolve this, read the sq_ts_format, which also reflect the supported ts format for the QP when RoCE is disabled. Fixes: 4806f1e2fee8 ("net/mlx5: Set QP timestamp mode to default") Signed-off-by: Maher Sanalla Signed-off-by: Or Har-Toov Link: https://lore.kernel.org/r/32801966eb767c7fd62b8dea3b63991d5fbfe213.1718554199.git.leon@kernel.org Reviewed-by: Simon Horman Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/qp.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index f0e55bf3ec8b..ad1ce650146c 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -576,9 +576,12 @@ static inline const char *mlx5_qp_state_str(int state) static inline int mlx5_get_qp_default_ts(struct mlx5_core_dev *dev) { - return !MLX5_CAP_ROCE(dev, qp_ts_format) ? - MLX5_TIMESTAMP_FORMAT_FREE_RUNNING : - MLX5_TIMESTAMP_FORMAT_DEFAULT; + u8 supported_ts_cap = mlx5_get_roce_state(dev) ? + MLX5_CAP_ROCE(dev, qp_ts_format) : + MLX5_CAP_GEN(dev, sq_ts_format); + + return supported_ts_cap ? MLX5_TIMESTAMP_FORMAT_DEFAULT : + MLX5_TIMESTAMP_FORMAT_FREE_RUNNING; } #endif /* MLX5_QP_H */ -- cgit v1.2.3 From 844bc12e6da3e2d8ab0cd96d049fc695d5d8ba68 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 19 Jun 2024 20:11:52 +0300 Subject: IB/core: add support for draining Shared receive queues To avoid leakage for QPs assocoated with SRQ, according to IB spec (section 10.3.1): "Note, for QPs that are associated with an SRQ, the Consumer should take the QP through the Error State before invoking a Destroy QP or a Modify QP to the Reset State. The Consumer may invoke the Destroy QP without first performing a Modify QP to the Error State and waiting for the Affiliated Asynchronous Last WQE Reached Event. However, if the Consumer does not wait for the Affiliated Asynchronous Last WQE Reached Event, then WQE and Data Segment leakage may occur. Therefore, it is good programming practice to teardown a QP that is associated with an SRQ by using the following process: - Put the QP in the Error State; - wait for the Affiliated Asynchronous Last WQE Reached Event; - either: - drain the CQ by invoking the Poll CQ verb and either wait for CQ to be empty or the number of Poll CQ operations has exceeded CQ capacity size; or - post another WR that completes on the same CQ and wait for this WR to return as a WC; - and then invoke a Destroy QP or Reset QP." Catch the Last WQE Reached Event in the core layer during drain QP flow. Signed-off-by: Max Gurtovoy Link: https://lore.kernel.org/r/20240619171153.34631-2-mgurtovoy@nvidia.com Reviewed-by: Sagi Grimberg Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 477bf9dd5e71..5a193008f99c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1788,6 +1788,7 @@ struct ib_qp { struct list_head rdma_mrs; struct list_head sig_mrs; struct ib_srq *srq; + struct completion srq_completion; struct ib_xrcd *xrcd; /* XRC TGT QPs only */ struct list_head xrcd_list; @@ -1797,6 +1798,7 @@ struct ib_qp { struct ib_qp *real_qp; struct ib_uqp_object *uobject; void (*event_handler)(struct ib_event *, void *); + void (*registered_event_handler)(struct ib_event *, void *); void *qp_context; /* sgid_attrs associated with the AV's */ const struct ib_gid_attr *av_sgid_attr; -- cgit v1.2.3 From 210b1f6576e8b367907e7ff51ef425062e1468e4 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 24 Jun 2024 08:56:17 -0700 Subject: nvme-pci: do not directly handle subsys reset fallout Scheduling reset_work after a nvme subsystem reset is expected to fail on pcie, but this also prevents potential handling the platform's pcie services may provide that might successfully recovering the link without re-enumeration. Such examples include AER, DPC, and power's EEH. Provide a pci specific operation that safely initiates a subsystem reset, and instead of scheduling reset work, read back the status register to trigger a pcie read error. Since this only affects pci, the other fabrics drivers subscribe to a generic nvmf subsystem reset that is exactly the same as before. The loop fabric doesn't use it because nvmet doesn't support setting that property anyway. And since we're using the magic NSSR value in two places now, provide a symbolic define for it. Reported-by: Nilay Shroff Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- include/linux/nvme.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 27faae34245d..57e27e48c913 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -25,6 +25,9 @@ #define NVME_NSID_ALL 0xffffffff +/* Special NSSR value, 'NVMe' */ +#define NVME_SUBSYS_RESET 0x4E564D65 + enum nvme_subsys_type { /* Referral to another discovery type target subsystem */ NVME_NQN_DISC = 1, -- cgit v1.2.3 From a6143bdcaf7e37ecce05df2a3d3c1c5d587f87b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 19 Jun 2024 12:11:42 +0200 Subject: mfd: stm32-timers: Unify alignment of register definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use tabs consistently for indention and properly align register names, values and comments. This improves readability (at least for my eyes). Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/da3b7f9af5794d7463aa62cbaa7251abf1af2018.1718791090.git.u.kleine-koenig@baylibre.com Signed-off-by: Lee Jones --- include/linux/mfd/stm32-timers.h | 170 +++++++++++++++++++-------------------- 1 file changed, 85 insertions(+), 85 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h index 9eb17481b07f..5794110b2b28 100644 --- a/include/linux/mfd/stm32-timers.h +++ b/include/linux/mfd/stm32-timers.h @@ -12,97 +12,97 @@ #include #include -#define TIM_CR1 0x00 /* Control Register 1 */ -#define TIM_CR2 0x04 /* Control Register 2 */ -#define TIM_SMCR 0x08 /* Slave mode control reg */ -#define TIM_DIER 0x0C /* DMA/interrupt register */ -#define TIM_SR 0x10 /* Status register */ -#define TIM_EGR 0x14 /* Event Generation Reg */ -#define TIM_CCMR1 0x18 /* Capt/Comp 1 Mode Reg */ -#define TIM_CCMR2 0x1C /* Capt/Comp 2 Mode Reg */ -#define TIM_CCER 0x20 /* Capt/Comp Enable Reg */ -#define TIM_CNT 0x24 /* Counter */ -#define TIM_PSC 0x28 /* Prescaler */ -#define TIM_ARR 0x2c /* Auto-Reload Register */ -#define TIM_CCR1 0x34 /* Capt/Comp Register 1 */ -#define TIM_CCR2 0x38 /* Capt/Comp Register 2 */ -#define TIM_CCR3 0x3C /* Capt/Comp Register 3 */ -#define TIM_CCR4 0x40 /* Capt/Comp Register 4 */ -#define TIM_BDTR 0x44 /* Break and Dead-Time Reg */ -#define TIM_DCR 0x48 /* DMA control register */ -#define TIM_DMAR 0x4C /* DMA register for transfer */ -#define TIM_TISEL 0x68 /* Input Selection */ +#define TIM_CR1 0x00 /* Control Register 1 */ +#define TIM_CR2 0x04 /* Control Register 2 */ +#define TIM_SMCR 0x08 /* Slave mode control reg */ +#define TIM_DIER 0x0C /* DMA/interrupt register */ +#define TIM_SR 0x10 /* Status register */ +#define TIM_EGR 0x14 /* Event Generation Reg */ +#define TIM_CCMR1 0x18 /* Capt/Comp 1 Mode Reg */ +#define TIM_CCMR2 0x1C /* Capt/Comp 2 Mode Reg */ +#define TIM_CCER 0x20 /* Capt/Comp Enable Reg */ +#define TIM_CNT 0x24 /* Counter */ +#define TIM_PSC 0x28 /* Prescaler */ +#define TIM_ARR 0x2c /* Auto-Reload Register */ +#define TIM_CCR1 0x34 /* Capt/Comp Register 1 */ +#define TIM_CCR2 0x38 /* Capt/Comp Register 2 */ +#define TIM_CCR3 0x3C /* Capt/Comp Register 3 */ +#define TIM_CCR4 0x40 /* Capt/Comp Register 4 */ +#define TIM_BDTR 0x44 /* Break and Dead-Time Reg */ +#define TIM_DCR 0x48 /* DMA control register */ +#define TIM_DMAR 0x4C /* DMA register for transfer */ +#define TIM_TISEL 0x68 /* Input Selection */ -#define TIM_CR1_CEN BIT(0) /* Counter Enable */ -#define TIM_CR1_DIR BIT(4) /* Counter Direction */ -#define TIM_CR1_ARPE BIT(7) /* Auto-reload Preload Ena */ -#define TIM_CR2_MMS (BIT(4) | BIT(5) | BIT(6)) /* Master mode selection */ -#define TIM_CR2_MMS2 GENMASK(23, 20) /* Master mode selection 2 */ -#define TIM_SMCR_SMS (BIT(0) | BIT(1) | BIT(2)) /* Slave mode selection */ -#define TIM_SMCR_TS (BIT(4) | BIT(5) | BIT(6)) /* Trigger selection */ -#define TIM_DIER_UIE BIT(0) /* Update interrupt */ -#define TIM_DIER_CC1IE BIT(1) /* CC1 Interrupt Enable */ -#define TIM_DIER_CC2IE BIT(2) /* CC2 Interrupt Enable */ -#define TIM_DIER_CC3IE BIT(3) /* CC3 Interrupt Enable */ -#define TIM_DIER_CC4IE BIT(4) /* CC4 Interrupt Enable */ -#define TIM_DIER_CC_IE(x) BIT((x) + 1) /* CC1, CC2, CC3, CC4 interrupt enable */ -#define TIM_DIER_UDE BIT(8) /* Update DMA request Enable */ -#define TIM_DIER_CC1DE BIT(9) /* CC1 DMA request Enable */ -#define TIM_DIER_CC2DE BIT(10) /* CC2 DMA request Enable */ -#define TIM_DIER_CC3DE BIT(11) /* CC3 DMA request Enable */ -#define TIM_DIER_CC4DE BIT(12) /* CC4 DMA request Enable */ -#define TIM_DIER_COMDE BIT(13) /* COM DMA request Enable */ -#define TIM_DIER_TDE BIT(14) /* Trigger DMA request Enable */ -#define TIM_SR_UIF BIT(0) /* Update interrupt flag */ -#define TIM_SR_CC_IF(x) BIT((x) + 1) /* CC1, CC2, CC3, CC4 interrupt flag */ -#define TIM_EGR_UG BIT(0) /* Update Generation */ -#define TIM_CCMR_PE BIT(3) /* Channel Preload Enable */ -#define TIM_CCMR_M1 (BIT(6) | BIT(5)) /* Channel PWM Mode 1 */ -#define TIM_CCMR_CC1S (BIT(0) | BIT(1)) /* Capture/compare 1 sel */ -#define TIM_CCMR_IC1PSC GENMASK(3, 2) /* Input capture 1 prescaler */ -#define TIM_CCMR_CC2S (BIT(8) | BIT(9)) /* Capture/compare 2 sel */ -#define TIM_CCMR_IC2PSC GENMASK(11, 10) /* Input capture 2 prescaler */ -#define TIM_CCMR_CC1S_TI1 BIT(0) /* IC1/IC3 selects TI1/TI3 */ -#define TIM_CCMR_CC1S_TI2 BIT(1) /* IC1/IC3 selects TI2/TI4 */ -#define TIM_CCMR_CC2S_TI2 BIT(8) /* IC2/IC4 selects TI2/TI4 */ -#define TIM_CCMR_CC2S_TI1 BIT(9) /* IC2/IC4 selects TI1/TI3 */ -#define TIM_CCMR_CC3S (BIT(0) | BIT(1)) /* Capture/compare 3 sel */ -#define TIM_CCMR_CC4S (BIT(8) | BIT(9)) /* Capture/compare 4 sel */ -#define TIM_CCMR_CC3S_TI3 BIT(0) /* IC3 selects TI3 */ -#define TIM_CCMR_CC4S_TI4 BIT(8) /* IC4 selects TI4 */ -#define TIM_CCER_CC1E BIT(0) /* Capt/Comp 1 out Ena */ -#define TIM_CCER_CC1P BIT(1) /* Capt/Comp 1 Polarity */ -#define TIM_CCER_CC1NE BIT(2) /* Capt/Comp 1N out Ena */ -#define TIM_CCER_CC1NP BIT(3) /* Capt/Comp 1N Polarity */ -#define TIM_CCER_CC2E BIT(4) /* Capt/Comp 2 out Ena */ -#define TIM_CCER_CC2P BIT(5) /* Capt/Comp 2 Polarity */ -#define TIM_CCER_CC2NP BIT(7) /* Capt/Comp 2N Polarity */ -#define TIM_CCER_CC3E BIT(8) /* Capt/Comp 3 out Ena */ -#define TIM_CCER_CC3P BIT(9) /* Capt/Comp 3 Polarity */ -#define TIM_CCER_CC3NP BIT(11) /* Capt/Comp 3N Polarity */ -#define TIM_CCER_CC4E BIT(12) /* Capt/Comp 4 out Ena */ -#define TIM_CCER_CC4P BIT(13) /* Capt/Comp 4 Polarity */ -#define TIM_CCER_CC4NP BIT(15) /* Capt/Comp 4N Polarity */ -#define TIM_CCER_CCXE (BIT(0) | BIT(4) | BIT(8) | BIT(12)) -#define TIM_BDTR_BKE(x) BIT(12 + (x) * 12) /* Break input enable */ -#define TIM_BDTR_BKP(x) BIT(13 + (x) * 12) /* Break input polarity */ -#define TIM_BDTR_AOE BIT(14) /* Automatic Output Enable */ -#define TIM_BDTR_MOE BIT(15) /* Main Output Enable */ -#define TIM_BDTR_BKF(x) (0xf << (16 + (x) * 4)) -#define TIM_DCR_DBA GENMASK(4, 0) /* DMA base addr */ -#define TIM_DCR_DBL GENMASK(12, 8) /* DMA burst len */ +#define TIM_CR1_CEN BIT(0) /* Counter Enable */ +#define TIM_CR1_DIR BIT(4) /* Counter Direction */ +#define TIM_CR1_ARPE BIT(7) /* Auto-reload Preload Ena */ +#define TIM_CR2_MMS (BIT(4) | BIT(5) | BIT(6)) /* Master mode selection */ +#define TIM_CR2_MMS2 GENMASK(23, 20) /* Master mode selection 2 */ +#define TIM_SMCR_SMS (BIT(0) | BIT(1) | BIT(2)) /* Slave mode selection */ +#define TIM_SMCR_TS (BIT(4) | BIT(5) | BIT(6)) /* Trigger selection */ +#define TIM_DIER_UIE BIT(0) /* Update interrupt */ +#define TIM_DIER_CC1IE BIT(1) /* CC1 Interrupt Enable */ +#define TIM_DIER_CC2IE BIT(2) /* CC2 Interrupt Enable */ +#define TIM_DIER_CC3IE BIT(3) /* CC3 Interrupt Enable */ +#define TIM_DIER_CC4IE BIT(4) /* CC4 Interrupt Enable */ +#define TIM_DIER_CC_IE(x) BIT((x) + 1) /* CC1, CC2, CC3, CC4 interrupt enable */ +#define TIM_DIER_UDE BIT(8) /* Update DMA request Enable */ +#define TIM_DIER_CC1DE BIT(9) /* CC1 DMA request Enable */ +#define TIM_DIER_CC2DE BIT(10) /* CC2 DMA request Enable */ +#define TIM_DIER_CC3DE BIT(11) /* CC3 DMA request Enable */ +#define TIM_DIER_CC4DE BIT(12) /* CC4 DMA request Enable */ +#define TIM_DIER_COMDE BIT(13) /* COM DMA request Enable */ +#define TIM_DIER_TDE BIT(14) /* Trigger DMA request Enable */ +#define TIM_SR_UIF BIT(0) /* Update interrupt flag */ +#define TIM_SR_CC_IF(x) BIT((x) + 1) /* CC1, CC2, CC3, CC4 interrupt flag */ +#define TIM_EGR_UG BIT(0) /* Update Generation */ +#define TIM_CCMR_PE BIT(3) /* Channel Preload Enable */ +#define TIM_CCMR_M1 (BIT(6) | BIT(5)) /* Channel PWM Mode 1 */ +#define TIM_CCMR_CC1S (BIT(0) | BIT(1)) /* Capture/compare 1 sel */ +#define TIM_CCMR_IC1PSC GENMASK(3, 2) /* Input capture 1 prescaler */ +#define TIM_CCMR_CC2S (BIT(8) | BIT(9)) /* Capture/compare 2 sel */ +#define TIM_CCMR_IC2PSC GENMASK(11, 10) /* Input capture 2 prescaler */ +#define TIM_CCMR_CC1S_TI1 BIT(0) /* IC1/IC3 selects TI1/TI3 */ +#define TIM_CCMR_CC1S_TI2 BIT(1) /* IC1/IC3 selects TI2/TI4 */ +#define TIM_CCMR_CC2S_TI2 BIT(8) /* IC2/IC4 selects TI2/TI4 */ +#define TIM_CCMR_CC2S_TI1 BIT(9) /* IC2/IC4 selects TI1/TI3 */ +#define TIM_CCMR_CC3S (BIT(0) | BIT(1)) /* Capture/compare 3 sel */ +#define TIM_CCMR_CC4S (BIT(8) | BIT(9)) /* Capture/compare 4 sel */ +#define TIM_CCMR_CC3S_TI3 BIT(0) /* IC3 selects TI3 */ +#define TIM_CCMR_CC4S_TI4 BIT(8) /* IC4 selects TI4 */ +#define TIM_CCER_CC1E BIT(0) /* Capt/Comp 1 out Ena */ +#define TIM_CCER_CC1P BIT(1) /* Capt/Comp 1 Polarity */ +#define TIM_CCER_CC1NE BIT(2) /* Capt/Comp 1N out Ena */ +#define TIM_CCER_CC1NP BIT(3) /* Capt/Comp 1N Polarity */ +#define TIM_CCER_CC2E BIT(4) /* Capt/Comp 2 out Ena */ +#define TIM_CCER_CC2P BIT(5) /* Capt/Comp 2 Polarity */ +#define TIM_CCER_CC2NP BIT(7) /* Capt/Comp 2N Polarity */ +#define TIM_CCER_CC3E BIT(8) /* Capt/Comp 3 out Ena */ +#define TIM_CCER_CC3P BIT(9) /* Capt/Comp 3 Polarity */ +#define TIM_CCER_CC3NP BIT(11) /* Capt/Comp 3N Polarity */ +#define TIM_CCER_CC4E BIT(12) /* Capt/Comp 4 out Ena */ +#define TIM_CCER_CC4P BIT(13) /* Capt/Comp 4 Polarity */ +#define TIM_CCER_CC4NP BIT(15) /* Capt/Comp 4N Polarity */ +#define TIM_CCER_CCXE (BIT(0) | BIT(4) | BIT(8) | BIT(12)) +#define TIM_BDTR_BKE(x) BIT(12 + (x) * 12) /* Break input enable */ +#define TIM_BDTR_BKP(x) BIT(13 + (x) * 12) /* Break input polarity */ +#define TIM_BDTR_AOE BIT(14) /* Automatic Output Enable */ +#define TIM_BDTR_MOE BIT(15) /* Main Output Enable */ +#define TIM_BDTR_BKF(x) (0xf << (16 + (x) * 4)) +#define TIM_DCR_DBA GENMASK(4, 0) /* DMA base addr */ +#define TIM_DCR_DBL GENMASK(12, 8) /* DMA burst len */ -#define MAX_TIM_PSC 0xFFFF -#define MAX_TIM_ICPSC 0x3 -#define TIM_CR2_MMS_SHIFT 4 -#define TIM_CR2_MMS2_SHIFT 20 +#define MAX_TIM_PSC 0xFFFF +#define MAX_TIM_ICPSC 0x3 +#define TIM_CR2_MMS_SHIFT 4 +#define TIM_CR2_MMS2_SHIFT 20 #define TIM_SMCR_SMS_SLAVE_MODE_DISABLED 0 /* counts on internal clock when CEN=1 */ #define TIM_SMCR_SMS_ENCODER_MODE_1 1 /* counts TI1FP1 edges, depending on TI2FP2 level */ #define TIM_SMCR_SMS_ENCODER_MODE_2 2 /* counts TI2FP2 edges, depending on TI1FP1 level */ #define TIM_SMCR_SMS_ENCODER_MODE_3 3 /* counts on both TI1FP1 and TI2FP2 edges */ -#define TIM_SMCR_TS_SHIFT 4 -#define TIM_BDTR_BKF_MASK 0xF -#define TIM_BDTR_BKF_SHIFT(x) (16 + (x) * 4) +#define TIM_SMCR_TS_SHIFT 4 +#define TIM_BDTR_BKF_MASK 0xF +#define TIM_BDTR_BKF_SHIFT(x) (16 + (x) * 4) enum stm32_timers_dmas { STM32_TIMERS_DMA_CH1, -- cgit v1.2.3 From 796b942f65967af55684bf520812787b97522151 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 19 Jun 2024 12:11:43 +0200 Subject: mfd: stm32-timers: Add some register definitions with a parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are some registers that belong together and are numbered from 1 to 4. Introduce a macro definition for these that takes the channel number as parameter and define the previously available constants using the new ones. This allows to simplify some users that up to now use constructs like TIM_CCER_CC1NE << (ch * 4) which is an ugly mix of using a predefined value and still knowing internal details about it. Note that there are several decrements by 1 involved. These are necessary because software guys start counting at 0 while the hardware designer started at 1 (and having TIM_CCER_CCxE(1) be TIM_CCER_CC2E isn't a sane option). The compiler is expected to optimize these out nicely. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/05df15f61dde81033407d3b4fcb67ee403ecc8db.1718791090.git.u.kleine-koenig@baylibre.com Signed-off-by: Lee Jones --- include/linux/mfd/stm32-timers.h | 60 +++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h index 5794110b2b28..92b45a559656 100644 --- a/include/linux/mfd/stm32-timers.h +++ b/include/linux/mfd/stm32-timers.h @@ -24,10 +24,11 @@ #define TIM_CNT 0x24 /* Counter */ #define TIM_PSC 0x28 /* Prescaler */ #define TIM_ARR 0x2c /* Auto-Reload Register */ -#define TIM_CCR1 0x34 /* Capt/Comp Register 1 */ -#define TIM_CCR2 0x38 /* Capt/Comp Register 2 */ -#define TIM_CCR3 0x3C /* Capt/Comp Register 3 */ -#define TIM_CCR4 0x40 /* Capt/Comp Register 4 */ +#define TIM_CCRx(x) (0x34 + 4 * ((x) - 1)) /* Capt/Comp Register x (x ∈ {1, .. 4}) */ +#define TIM_CCR1 TIM_CCRx(1) /* Capt/Comp Register 1 */ +#define TIM_CCR2 TIM_CCRx(2) /* Capt/Comp Register 2 */ +#define TIM_CCR3 TIM_CCRx(3) /* Capt/Comp Register 3 */ +#define TIM_CCR4 TIM_CCRx(4) /* Capt/Comp Register 4 */ #define TIM_BDTR 0x44 /* Break and Dead-Time Reg */ #define TIM_DCR 0x48 /* DMA control register */ #define TIM_DMAR 0x4C /* DMA register for transfer */ @@ -41,16 +42,18 @@ #define TIM_SMCR_SMS (BIT(0) | BIT(1) | BIT(2)) /* Slave mode selection */ #define TIM_SMCR_TS (BIT(4) | BIT(5) | BIT(6)) /* Trigger selection */ #define TIM_DIER_UIE BIT(0) /* Update interrupt */ -#define TIM_DIER_CC1IE BIT(1) /* CC1 Interrupt Enable */ -#define TIM_DIER_CC2IE BIT(2) /* CC2 Interrupt Enable */ -#define TIM_DIER_CC3IE BIT(3) /* CC3 Interrupt Enable */ -#define TIM_DIER_CC4IE BIT(4) /* CC4 Interrupt Enable */ +#define TIM_DIER_CCxIE(x) BIT(1 + ((x) - 1)) /* CCx Interrupt Enable (x ∈ {1, .. 4}) */ +#define TIM_DIER_CC1IE TIM_DIER_CCxIE(1) /* CC1 Interrupt Enable */ +#define TIM_DIER_CC2IE TIM_DIER_CCxIE(2) /* CC2 Interrupt Enable */ +#define TIM_DIER_CC3IE TIM_DIER_CCxIE(3) /* CC3 Interrupt Enable */ +#define TIM_DIER_CC4IE TIM_DIER_CCxIE(4) /* CC4 Interrupt Enable */ #define TIM_DIER_CC_IE(x) BIT((x) + 1) /* CC1, CC2, CC3, CC4 interrupt enable */ #define TIM_DIER_UDE BIT(8) /* Update DMA request Enable */ -#define TIM_DIER_CC1DE BIT(9) /* CC1 DMA request Enable */ -#define TIM_DIER_CC2DE BIT(10) /* CC2 DMA request Enable */ -#define TIM_DIER_CC3DE BIT(11) /* CC3 DMA request Enable */ -#define TIM_DIER_CC4DE BIT(12) /* CC4 DMA request Enable */ +#define TIM_DIER_CCxDE(x) BIT(9 + ((x) - 1)) /* CCx DMA request Enable (x ∈ {1, .. 4}) */ +#define TIM_DIER_CC1DE TIM_DIER_CCxDE(1) /* CC1 DMA request Enable */ +#define TIM_DIER_CC2DE TIM_DIER_CCxDE(2) /* CC2 DMA request Enable */ +#define TIM_DIER_CC3DE TIM_DIER_CCxDE(3) /* CC3 DMA request Enable */ +#define TIM_DIER_CC4DE TIM_DIER_CCxDE(4) /* CC4 DMA request Enable */ #define TIM_DIER_COMDE BIT(13) /* COM DMA request Enable */ #define TIM_DIER_TDE BIT(14) /* Trigger DMA request Enable */ #define TIM_SR_UIF BIT(0) /* Update interrupt flag */ @@ -70,19 +73,26 @@ #define TIM_CCMR_CC4S (BIT(8) | BIT(9)) /* Capture/compare 4 sel */ #define TIM_CCMR_CC3S_TI3 BIT(0) /* IC3 selects TI3 */ #define TIM_CCMR_CC4S_TI4 BIT(8) /* IC4 selects TI4 */ -#define TIM_CCER_CC1E BIT(0) /* Capt/Comp 1 out Ena */ -#define TIM_CCER_CC1P BIT(1) /* Capt/Comp 1 Polarity */ -#define TIM_CCER_CC1NE BIT(2) /* Capt/Comp 1N out Ena */ -#define TIM_CCER_CC1NP BIT(3) /* Capt/Comp 1N Polarity */ -#define TIM_CCER_CC2E BIT(4) /* Capt/Comp 2 out Ena */ -#define TIM_CCER_CC2P BIT(5) /* Capt/Comp 2 Polarity */ -#define TIM_CCER_CC2NP BIT(7) /* Capt/Comp 2N Polarity */ -#define TIM_CCER_CC3E BIT(8) /* Capt/Comp 3 out Ena */ -#define TIM_CCER_CC3P BIT(9) /* Capt/Comp 3 Polarity */ -#define TIM_CCER_CC3NP BIT(11) /* Capt/Comp 3N Polarity */ -#define TIM_CCER_CC4E BIT(12) /* Capt/Comp 4 out Ena */ -#define TIM_CCER_CC4P BIT(13) /* Capt/Comp 4 Polarity */ -#define TIM_CCER_CC4NP BIT(15) /* Capt/Comp 4N Polarity */ +#define TIM_CCER_CCxE(x) BIT(0 + 4 * ((x) - 1)) /* Capt/Comp x out Ena (x ∈ {1, .. 4}) */ +#define TIM_CCER_CCxP(x) BIT(1 + 4 * ((x) - 1)) /* Capt/Comp x Polarity (x ∈ {1, .. 4}) */ +#define TIM_CCER_CCxNE(x) BIT(2 + 4 * ((x) - 1)) /* Capt/Comp xN out Ena (x ∈ {1, .. 4}) */ +#define TIM_CCER_CCxNP(x) BIT(3 + 4 * ((x) - 1)) /* Capt/Comp xN Polarity (x ∈ {1, .. 4}) */ +#define TIM_CCER_CC1E TIM_CCER_CCxE(1) /* Capt/Comp 1 out Ena */ +#define TIM_CCER_CC1P TIM_CCER_CCxP(1) /* Capt/Comp 1 Polarity */ +#define TIM_CCER_CC1NE TIM_CCER_CCxNE(1) /* Capt/Comp 1N out Ena */ +#define TIM_CCER_CC1NP TIM_CCER_CCxNP(1) /* Capt/Comp 1N Polarity */ +#define TIM_CCER_CC2E TIM_CCER_CCxE(2) /* Capt/Comp 2 out Ena */ +#define TIM_CCER_CC2P TIM_CCER_CCxP(2) /* Capt/Comp 2 Polarity */ +#define TIM_CCER_CC2NE TIM_CCER_CCxNE(2) /* Capt/Comp 2N out Ena */ +#define TIM_CCER_CC2NP TIM_CCER_CCxNP(2) /* Capt/Comp 2N Polarity */ +#define TIM_CCER_CC3E TIM_CCER_CCxE(3) /* Capt/Comp 3 out Ena */ +#define TIM_CCER_CC3P TIM_CCER_CCxP(3) /* Capt/Comp 3 Polarity */ +#define TIM_CCER_CC3NE TIM_CCER_CCxNE(3) /* Capt/Comp 3N out Ena */ +#define TIM_CCER_CC3NP TIM_CCER_CCxNP(3) /* Capt/Comp 3N Polarity */ +#define TIM_CCER_CC4E TIM_CCER_CCxE(4) /* Capt/Comp 4 out Ena */ +#define TIM_CCER_CC4P TIM_CCER_CCxP(4) /* Capt/Comp 4 Polarity */ +#define TIM_CCER_CC4NE TIM_CCER_CCxNE(4) /* Capt/Comp 4N out Ena */ +#define TIM_CCER_CC4NP TIM_CCER_CCxNP(4) /* Capt/Comp 4N Polarity */ #define TIM_CCER_CCXE (BIT(0) | BIT(4) | BIT(8) | BIT(12)) #define TIM_BDTR_BKE(x) BIT(12 + (x) * 12) /* Break input enable */ #define TIM_BDTR_BKP(x) BIT(13 + (x) * 12) /* Break input polarity */ -- cgit v1.2.3 From 304d02aa711369da89b4f8c01702bf1b5d1f7abc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 19 Jun 2024 12:11:45 +0200 Subject: mfd: stm32-timers: Drop unused TIM_DIER_CC_IE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This macro is misleading as TIM_DIER_CC_IE(1) == TIM_DIER_CC2IE . The only user was updated to use TIM_DIER_CCxIE() instead which doesn't suffer from this mismatch, so TIM_DIER_CC_IE can be dropped. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/6c8fcc4ed159992a1dbb0796087e6ceb10c39c96.1718791090.git.u.kleine-koenig@baylibre.com Signed-off-by: Lee Jones --- include/linux/mfd/stm32-timers.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h index 92b45a559656..f09ba598c97a 100644 --- a/include/linux/mfd/stm32-timers.h +++ b/include/linux/mfd/stm32-timers.h @@ -47,7 +47,6 @@ #define TIM_DIER_CC2IE TIM_DIER_CCxIE(2) /* CC2 Interrupt Enable */ #define TIM_DIER_CC3IE TIM_DIER_CCxIE(3) /* CC3 Interrupt Enable */ #define TIM_DIER_CC4IE TIM_DIER_CCxIE(4) /* CC4 Interrupt Enable */ -#define TIM_DIER_CC_IE(x) BIT((x) + 1) /* CC1, CC2, CC3, CC4 interrupt enable */ #define TIM_DIER_UDE BIT(8) /* Update DMA request Enable */ #define TIM_DIER_CCxDE(x) BIT(9 + ((x) - 1)) /* CCx DMA request Enable (x ∈ {1, .. 4}) */ #define TIM_DIER_CC1DE TIM_DIER_CCxDE(1) /* CC1 DMA request Enable */ -- cgit v1.2.3 From cf546dd289e0f6d2594c25e2fb4e19ee67c6d988 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 27 May 2024 17:40:10 +0200 Subject: block: change rq_integrity_vec to respect the iterator If we allocate a bio that is larger than NVMe maximum request size, attach integrity metadata to it and send it to the NVMe subsystem, the integrity metadata will be corrupted. Splitting the bio works correctly. The function bio_split will clone the bio, trim the iterator of the first bio and advance the iterator of the second bio. However, the function rq_integrity_vec has a bug - it returns the first vector of the bio's metadata and completely disregards the metadata iterator that was advanced when the bio was split. Thus, the second bio uses the same metadata as the first bio and this leads to metadata corruption. This commit changes rq_integrity_vec, so that it calls mp_bvec_iter_bvec instead of returning the first vector. mp_bvec_iter_bvec reads the iterator and uses it to build a bvec for the current position in the iterator. The "queue_max_integrity_segments(rq->q) > 1" check was removed, because the updated rq_integrity_vec function works correctly with multiple segments. Signed-off-by: Mikulas Patocka Reviewed-by: Anuj Gupta Reviewed-by: Kanchan Joshi Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/49d1afaa-f934-6ed2-a678-e0d428c63a65@redhat.com Signed-off-by: Jens Axboe --- include/linux/blk-integrity.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index d201140d77a3..0fdd62e6d4b0 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -90,14 +90,13 @@ static inline bool blk_integrity_rq(struct request *rq) } /* - * Return the first bvec that contains integrity data. Only drivers that are - * limited to a single integrity segment should use this helper. + * Return the current bvec that contains the integrity data. bip_iter may be + * advanced to iterate over the integrity data. */ -static inline struct bio_vec *rq_integrity_vec(struct request *rq) +static inline struct bio_vec rq_integrity_vec(struct request *rq) { - if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1)) - return NULL; - return rq->bio->bi_integrity->bip_vec; + return mp_bvec_iter_bvec(rq->bio->bi_integrity->bip_vec, + rq->bio->bi_integrity->bip_iter); } #else /* CONFIG_BLK_DEV_INTEGRITY */ static inline int blk_rq_count_integrity_sg(struct request_queue *q, @@ -148,7 +147,8 @@ static inline int blk_integrity_rq(struct request *rq) static inline struct bio_vec *rq_integrity_vec(struct request *rq) { - return NULL; + /* the optimizer will remove all calls to this function */ + return (struct bio_vec){ }; } #endif /* CONFIG_BLK_DEV_INTEGRITY */ -- cgit v1.2.3 From 5d55721d6e24c8e99cc86ee1fcb90d776ef47964 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 23 Jun 2024 08:45:33 +0200 Subject: power: supply: samsung-sdi-battery: Constify struct power_supply_vbat_ri_table 'struct power_supply_vbat_ri_table' are not modified in this driver. Constifying these structures moves some data to a read-only section, so increase overall security. In order to do it, some code also needs to be adjusted to this new const qualifier. On a x86_64, with allmodconfig: Before: ====== $ size drivers/power/supply/samsung-sdi-battery.o text data bss dec hex filename 955 7664 0 8619 21ab drivers/power/supply/samsung-sdi-battery.o After: ===== $ size drivers/power/supply/samsung-sdi-battery.o text data bss dec hex filename 4055 4584 0 8639 21bf drivers/power/supply/samsung-sdi-battery.o Signed-off-by: Christophe JAILLET Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/d01818abd880bf435d1106a9a6cc11a7a8a3e661.1719125040.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 65082ef75692..5061eeecf62e 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -755,9 +755,9 @@ struct power_supply_battery_info { int ocv_table_size[POWER_SUPPLY_OCV_TEMP_MAX]; struct power_supply_resistance_temp_table *resist_table; int resist_table_size; - struct power_supply_vbat_ri_table *vbat2ri_discharging; + const struct power_supply_vbat_ri_table *vbat2ri_discharging; int vbat2ri_discharging_size; - struct power_supply_vbat_ri_table *vbat2ri_charging; + const struct power_supply_vbat_ri_table *vbat2ri_charging; int vbat2ri_charging_size; int bti_resistance_ohm; int bti_resistance_tolerance; -- cgit v1.2.3 From 0b209ec85b2b73c38a09ba71dc05fbe4aee7be67 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 23 Jun 2024 10:04:45 +0200 Subject: power: supply: samsung-sdi-battery: Constify struct power_supply_maintenance_charge_table 'struct power_supply_maintenance_charge_table' is not modified in this driver. Constifying this structure moves some data to a read-only section, so increase overall security. In order to do it, some code also needs to be adjusted to this new const qualifier. On a x86_64, with allmodconfig: Before: ====== $ size drivers/power/supply/samsung-sdi-battery.o text data bss dec hex filename 4055 4584 0 8639 21bf drivers/power/supply/samsung-sdi-battery.o After: ===== $ size drivers/power/supply/samsung-sdi-battery.o text data bss dec hex filename 4087 4552 0 8639 21bf drivers/power/supply/samsung-sdi-battery.o Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/6caafd0ac2556a40405273b1a4badc508ea8e9b0.1719125040.git.christophe.jaillet@wanadoo.fr Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 5061eeecf62e..72dc7e45c90c 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -736,7 +736,7 @@ struct power_supply_battery_info { int overvoltage_limit_uv; int constant_charge_current_max_ua; int constant_charge_voltage_max_uv; - struct power_supply_maintenance_charge_table *maintenance_charge; + const struct power_supply_maintenance_charge_table *maintenance_charge; int maintenance_charge_size; int alert_low_temp_charge_current_ua; int alert_low_temp_charge_voltage_uv; @@ -810,7 +810,7 @@ power_supply_temp2resist_simple(struct power_supply_resistance_temp_table *table int table_len, int temp); extern int power_supply_vbat2ri(struct power_supply_battery_info *info, int vbat_uv, bool charging); -extern struct power_supply_maintenance_charge_table * +extern const struct power_supply_maintenance_charge_table * power_supply_get_maintenance_charging_setting(struct power_supply_battery_info *info, int index); extern bool power_supply_battery_bti_in_range(struct power_supply_battery_info *info, int resistance); @@ -824,7 +824,7 @@ extern int power_supply_set_battery_charged(struct power_supply *psy); static inline bool power_supply_supports_maintenance_charging(struct power_supply_battery_info *info) { - struct power_supply_maintenance_charge_table *mt; + const struct power_supply_maintenance_charge_table *mt; mt = power_supply_get_maintenance_charging_setting(info, 0); -- cgit v1.2.3 From aaa53168cbcc486ca1927faac00bd99e81d4ff04 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 28 May 2024 13:32:34 +0200 Subject: dm: optimize flushes Device mapper sends flush bios to all the targets and the targets send it to the underlying device. That may be inefficient, for example if a table contains 10 linear targets pointing to the same physical device, then device mapper would send 10 flush bios to that device - despite the fact that only one bio would be sufficient. This commit optimizes the flush behavior. It introduces a per-target variable flush_bypasses_map - it is set when the target supports flush optimization - currently, the dm-linear and dm-stripe targets support it. When all the targets in a table have flush_bypasses_map, flush_bypasses_map on the table is set. __send_empty_flush tests if the table has flush_bypasses_map - and if it has, no flush bios are sent to the targets via the "map" method and the list dm_table->devices is iterated and the flush bios are sent to each member of the list. Signed-off-by: Mikulas Patocka Reviewed-by: Mike Snitzer Suggested-by: Yang Yang --- include/linux/device-mapper.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 82b2195efaca..3611b230d0aa 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -397,6 +397,21 @@ struct dm_target { * bio_set_dev(). NOTE: ideally a target should _not_ need this. */ bool needs_bio_set_dev:1; + + /* + * Set if the target supports flush optimization. If all the targets in + * a table have flush_bypasses_map set, the dm core will not send + * flushes to the targets via a ->map method. It will iterate over + * dm_table->devices and send flushes to the devices directly. This + * optimization reduces the number of flushes being sent when multiple + * targets in a table use the same underlying device. + * + * This optimization may be enabled on targets that just pass the + * flushes to the underlying devices without performing any other + * actions on the flush request. Currently, dm-linear and dm-stripe + * support it. + */ + bool flush_bypasses_map:1; }; void *dm_per_bio_data(struct bio *bio, size_t data_size); -- cgit v1.2.3 From ec9b1cf0b0ebfb52274971a8a0e74e0a133f64fb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2024 16:26:24 +0200 Subject: block: rename BLK_FEAT_MISALIGNED This is a flag for ->flags and not a feature for ->features. And fix the one place that actually incorrectly cleared it from ->features. Signed-off-by: Christoph Hellwig Reviewed-by: John Garry Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20240626142637.300624-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b2f1362c4681..1a7e9d9c16d7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -347,7 +347,7 @@ enum { BLK_FLAG_WRITE_CACHE_DISABLED = (1u << 0), /* I/O topology is misaligned */ - BLK_FEAT_MISALIGNED = (1u << 1), + BLK_FLAG_MISALIGNED = (1u << 1), }; struct queue_limits { -- cgit v1.2.3 From fcf865e357f80285af12c0c9a49f89d71acb7f4b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2024 16:26:25 +0200 Subject: block: convert features and flags to __bitwise types ... and let sparse help us catch mismatches or abuses. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20240626142637.300624-5-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 85 +++++++++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1a7e9d9c16d7..b37826b350a2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -283,55 +283,56 @@ static inline bool blk_op_is_passthrough(blk_opf_t op) } /* flags set by the driver in queue_limits.features */ -enum { - /* supports a volatile write cache */ - BLK_FEAT_WRITE_CACHE = (1u << 0), +typedef unsigned int __bitwise blk_features_t; - /* supports passing on the FUA bit */ - BLK_FEAT_FUA = (1u << 1), +/* supports a volatile write cache */ +#define BLK_FEAT_WRITE_CACHE ((__force blk_features_t)(1u << 0)) - /* rotational device (hard drive or floppy) */ - BLK_FEAT_ROTATIONAL = (1u << 2), +/* supports passing on the FUA bit */ +#define BLK_FEAT_FUA ((__force blk_features_t)(1u << 1)) - /* contributes to the random number pool */ - BLK_FEAT_ADD_RANDOM = (1u << 3), +/* rotational device (hard drive or floppy) */ +#define BLK_FEAT_ROTATIONAL ((__force blk_features_t)(1u << 2)) - /* do disk/partitions IO accounting */ - BLK_FEAT_IO_STAT = (1u << 4), +/* contributes to the random number pool */ +#define BLK_FEAT_ADD_RANDOM ((__force blk_features_t)(1u << 3)) - /* don't modify data until writeback is done */ - BLK_FEAT_STABLE_WRITES = (1u << 5), +/* do disk/partitions IO accounting */ +#define BLK_FEAT_IO_STAT ((__force blk_features_t)(1u << 4)) - /* always completes in submit context */ - BLK_FEAT_SYNCHRONOUS = (1u << 6), +/* don't modify data until writeback is done */ +#define BLK_FEAT_STABLE_WRITES ((__force blk_features_t)(1u << 5)) - /* supports REQ_NOWAIT */ - BLK_FEAT_NOWAIT = (1u << 7), +/* always completes in submit context */ +#define BLK_FEAT_SYNCHRONOUS ((__force blk_features_t)(1u << 6)) - /* supports DAX */ - BLK_FEAT_DAX = (1u << 8), +/* supports REQ_NOWAIT */ +#define BLK_FEAT_NOWAIT ((__force blk_features_t)(1u << 7)) - /* supports I/O polling */ - BLK_FEAT_POLL = (1u << 9), +/* supports DAX */ +#define BLK_FEAT_DAX ((__force blk_features_t)(1u << 8)) - /* is a zoned device */ - BLK_FEAT_ZONED = (1u << 10), +/* supports I/O polling */ +#define BLK_FEAT_POLL ((__force blk_features_t)(1u << 9)) - /* supports Zone Reset All */ - BLK_FEAT_ZONE_RESETALL = (1u << 11), +/* is a zoned device */ +#define BLK_FEAT_ZONED ((__force blk_features_t)(1u << 10)) - /* supports PCI(e) p2p requests */ - BLK_FEAT_PCI_P2PDMA = (1u << 12), +/* supports Zone Reset All */ +#define BLK_FEAT_ZONE_RESETALL ((__force blk_features_t)(1u << 11)) - /* skip this queue in blk_mq_(un)quiesce_tagset */ - BLK_FEAT_SKIP_TAGSET_QUIESCE = (1u << 13), +/* supports PCI(e) p2p requests */ +#define BLK_FEAT_PCI_P2PDMA ((__force blk_features_t)(1u << 12)) - /* bounce all highmem pages */ - BLK_FEAT_BOUNCE_HIGH = (1u << 14), +/* skip this queue in blk_mq_(un)quiesce_tagset */ +#define BLK_FEAT_SKIP_TAGSET_QUIESCE ((__force blk_features_t)(1u << 13)) - /* undocumented magic for bcache */ - BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE = (1u << 15), -}; +/* bounce all highmem pages */ +#define BLK_FEAT_BOUNCE_HIGH ((__force blk_features_t)(1u << 14)) + +/* undocumented magic for bcache */ +#define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \ + ((__force blk_features_t)(1u << 15)) /* * Flags automatically inherited when stacking limits. @@ -342,17 +343,17 @@ enum { BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE) /* internal flags in queue_limits.flags */ -enum { - /* do not send FLUSH/FUA commands despite advertising a write cache */ - BLK_FLAG_WRITE_CACHE_DISABLED = (1u << 0), +typedef unsigned int __bitwise blk_flags_t; - /* I/O topology is misaligned */ - BLK_FLAG_MISALIGNED = (1u << 1), -}; +/* do not send FLUSH/FUA commands despite advertising a write cache */ +#define BLK_FLAG_WRITE_CACHE_DISABLED ((__force blk_flags_t)(1u << 0)) + +/* I/O topology is misaligned */ +#define BLK_FLAG_MISALIGNED ((__force blk_flags_t)(1u << 1)) struct queue_limits { - unsigned int features; - unsigned int flags; + blk_features_t features; + blk_flags_t flags; unsigned long seg_boundary_mask; unsigned long virt_boundary_mask; -- cgit v1.2.3 From 73781b3b81e76583708a652c853d54d03dce031d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2024 16:26:27 +0200 Subject: block: remove disk_update_readahead Mark blk_apply_bdi_limits non-static and open code disk_update_readahead in the only caller. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20240626142637.300624-7-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b37826b350a2..6b88382012e9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -973,7 +973,6 @@ static inline void blk_queue_disable_write_zeroes(struct request_queue *q) /* * Access functions for manipulating queue properties */ -void disk_update_readahead(struct gendisk *disk); extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); -- cgit v1.2.3 From abfc9d810926dfbf5645c7755c8d5ab96273f27d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2024 16:26:28 +0200 Subject: block: remove the fallback case in queue_dma_alignment Now that all updates go through blk_validate_limits the default of 511 is set at initialization time. Also remove the unused NULL check as calling this helper on a NULL queue can't happen (and doesn't make much sense to start with). Signed-off-by: Christoph Hellwig Reviewed-by: John Garry Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20240626142637.300624-8-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6b88382012e9..94fcbc912312 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1394,7 +1394,7 @@ static inline bool bdev_is_zone_start(struct block_device *bdev, static inline int queue_dma_alignment(const struct request_queue *q) { - return q ? q->limits.dma_alignment : 511; + return q->limits.dma_alignment; } static inline unsigned int -- cgit v1.2.3 From e94b45d08b5d1c230c0f59c3eed758d28658851e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2024 16:26:29 +0200 Subject: block: move dma_pad_mask into queue_limits dma_pad_mask is a queue_limits by all ways of looking at it, so move it there and set it through the atomic queue limits APIs. Add a little helper that takes the alignment and pad into account to simplify the code that is touched a bit. Note that there never was any need for the > check in blk_queue_update_dma_pad, this probably was just copy and paste from dma_update_dma_alignment. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20240626142637.300624-9-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 94fcbc912312..a53e3434e1a2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -401,6 +401,7 @@ struct queue_limits { * due to possible offsets. */ unsigned int dma_alignment; + unsigned int dma_pad_mask; struct blk_integrity integrity; }; @@ -509,8 +510,6 @@ struct request_queue { */ int id; - unsigned int dma_pad_mask; - /* * queue settings */ @@ -981,7 +980,6 @@ extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset); void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev, sector_t offset, const char *pfx); -extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); struct blk_independent_access_ranges * @@ -1433,10 +1431,16 @@ static inline bool bdev_iter_is_aligned(struct block_device *bdev, bdev_logical_block_size(bdev) - 1); } +static inline int blk_lim_dma_alignment_and_pad(struct queue_limits *lim) +{ + return lim->dma_alignment | lim->dma_pad_mask; +} + static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr, unsigned int len) { - unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask; + unsigned int alignment = blk_lim_dma_alignment_and_pad(&q->limits); + return !(addr & alignment) && !(len & alignment); } -- cgit v1.2.3 From 769cb63166d90f1fadafa4352f180cbd96b6cb77 Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Fri, 21 Jun 2024 12:55:43 +0100 Subject: mfd: syscon: Add of_syscon_register_regmap() API The of_syscon_register_regmap() API allows an externally created regmap to be registered with syscon. This regmap can then be returned to client drivers using the syscon_regmap_lookup_by_phandle() APIs. The API is used by platforms where mmio access to the syscon registers is not possible, and a underlying soc driver like exynos-pmu provides a SoC specific regmap that can issue a SMC or hypervisor call to write the register. This approach keeps the SoC complexities out of syscon, but allows common drivers such as syscon-poweroff, syscon-reboot and friends that are used by many SoCs already to be re-used. Signed-off-by: Peter Griffin Reviewed-by: Arnd Bergmann Reviewed-by: Sam Protsenko Tested-by: Will McVicker Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240621115544.1655458-2-peter.griffin@linaro.org Signed-off-by: Lee Jones --- include/linux/mfd/syscon.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/mfd/syscon.h b/include/linux/mfd/syscon.h index c315903f6dab..aad9c6b50463 100644 --- a/include/linux/mfd/syscon.h +++ b/include/linux/mfd/syscon.h @@ -28,6 +28,8 @@ struct regmap *syscon_regmap_lookup_by_phandle_args(struct device_node *np, unsigned int *out_args); struct regmap *syscon_regmap_lookup_by_phandle_optional(struct device_node *np, const char *property); +int of_syscon_register_regmap(struct device_node *np, + struct regmap *regmap); #else static inline struct regmap *device_node_to_regmap(struct device_node *np) { @@ -67,6 +69,12 @@ static inline struct regmap *syscon_regmap_lookup_by_phandle_optional( return NULL; } +static inline int of_syscon_register_regmap(struct device_node *np, + struct regmap *regmap) +{ + return -EOPNOTSUPP; +} + #endif #endif /* __LINUX_MFD_SYSCON_H__ */ -- cgit v1.2.3 From 3ebc76c424bc0f0768f5c346667e8f51217917ba Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 26 Jun 2024 16:22:08 +0200 Subject: drm/mipi-dsi: add mipi_dsi_usleep_range helper Like for mipi_dsi_msleep(), usleep_range() may often be called in between mipi_dsi_dcs_*() functions and needs a multi compatible counter part. Suggested-by: Dmitry Baryshkov Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20240626142212.1341556-3-jbrunet@baylibre.com Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240626142212.1341556-3-jbrunet@baylibre.com --- include/drm/drm_mipi_dsi.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index 71d121aeef24..0f520eeeaa8e 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -10,6 +10,7 @@ #define __DRM_MIPI_DSI_H__ #include +#include struct mipi_dsi_host; struct mipi_dsi_device; @@ -297,6 +298,12 @@ ssize_t mipi_dsi_generic_read(struct mipi_dsi_device *dsi, const void *params, msleep(delay); \ } while (0) +#define mipi_dsi_usleep_range(ctx, min, max) \ + do { \ + if (!(ctx)->accum_err) \ + usleep_range(min, max); \ + } while (0) + /** * enum mipi_dsi_dcs_tear_mode - Tearing Effect Output Line mode * @MIPI_DSI_DCS_TEAR_MODE_VBLANK: the TE output line consists of V-Blanking -- cgit v1.2.3 From fafc20ded3f4659873c83c2af6d389983d480994 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 12 Jun 2024 06:02:26 +0000 Subject: ASoC: audio-graph-port: add link-trigger-order Sound Card need to consider/adjust HW control ordering based on the combination of CPU/Codec. The controlling feature is already supported on ASoC, but Simple Audio Card / Audio Graph Card still not support it. Let's support it. Cc: Maxim Kochetkov Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87sexizojx.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/dt-bindings/sound/audio-graph.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 include/dt-bindings/sound/audio-graph.h (limited to 'include') diff --git a/include/dt-bindings/sound/audio-graph.h b/include/dt-bindings/sound/audio-graph.h new file mode 100644 index 000000000000..bdb70c6b7332 --- /dev/null +++ b/include/dt-bindings/sound/audio-graph.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * audio-graph.h + * + * Copyright (c) 2024 Kuninori Morimoto + */ +#ifndef __AUDIO_GRAPH_H +#define __AUDIO_GRAPH_H + +/* + * used in + * link-trigger-order + * link-trigger-order-start + * link-trigger-order-stop + * + * default is + * link-trigger-order = ; + */ +#define SND_SOC_TRIGGER_LINK 0 +#define SND_SOC_TRIGGER_COMPONENT 1 +#define SND_SOC_TRIGGER_DAI 2 +#define SND_SOC_TRIGGER_SIZE 3 /* shoud be last */ + +#endif /* __AUDIO_GRAPH_H */ -- cgit v1.2.3 From 5d9cacdccf17bd33dac3ea378324650159c2a863 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 12 Jun 2024 06:02:33 +0000 Subject: ASoC: simple-card-utils: add link-trigger-order support Some Sound Card might need special trigger ordering which is based on CPU/Codec connection. It is already supported on ASoC, but Simple Audio Card / Audio Graph Card still not support it. Let's support it. Cc: Maxim Kochetkov Signed-off-by: Kuninori Morimoto Link: https://patch.msgid.link/87r0d2zojq.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/simple_card_utils.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/sound/simple_card_utils.h b/include/sound/simple_card_utils.h index 0a6435ac5c5f..3360d9eab068 100644 --- a/include/sound/simple_card_utils.h +++ b/include/sound/simple_card_utils.h @@ -199,6 +199,10 @@ int graph_util_parse_dai(struct device *dev, struct device_node *ep, void graph_util_parse_link_direction(struct device_node *np, bool *is_playback_only, bool *is_capture_only); +void graph_util_parse_trigger_order(struct simple_util_priv *priv, + struct device_node *np, + enum snd_soc_trigger_order *trigger_start, + enum snd_soc_trigger_order *trigger_stop); #ifdef DEBUG static inline void simple_util_debug_dai(struct simple_util_priv *priv, -- cgit v1.2.3 From 1cb7d29157603561af4c38535e936850ceb99f0f Mon Sep 17 00:00:00 2001 From: Biju Das Date: Sun, 16 Jun 2024 11:53:55 +0100 Subject: regulator: core: Add helper for allow HW access to enable/disable regulator Add a helper function that allow regulator consumers to allow low-level HW access, in order to enable/disable regulator in atomic context. The use-case for RZ/G2L SoC is to enable VBUS selection register based on vbus detection that happens in interrupt context. Signed-off-by: Biju Das Link: https://patch.msgid.link/20240616105402.45211-4-biju.das.jz@bp.renesas.com Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 59d0b9a79e6e..550a0e6523ae 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -250,6 +250,7 @@ int regulator_get_hardware_vsel_register(struct regulator *regulator, unsigned *vsel_mask); int regulator_list_hardware_vsel(struct regulator *regulator, unsigned selector); +int regulator_hardware_enable(struct regulator *regulator, bool enable); /* regulator notifier block */ int regulator_register_notifier(struct regulator *regulator, @@ -571,6 +572,12 @@ static inline int regulator_list_hardware_vsel(struct regulator *regulator, return -EOPNOTSUPP; } +static inline int regulator_hardware_enable(struct regulator *regulator, + bool enable) +{ + return -EOPNOTSUPP; +} + static inline int regulator_register_notifier(struct regulator *regulator, struct notifier_block *nb) { -- cgit v1.2.3 From 7e5161da9d267957b726a29f3efe6cb50fdfed04 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Sun, 23 Jun 2024 13:31:19 -0700 Subject: drm/xe/oa: Fix kernel doc in xe_drm.h Fix kernel doc in xe_drm.h. Also eliminate private/non-abi enum definitions. v2: Remove __DRM_XE_PERF_TYPE_MAX since it is unused (Michal) v3: Also remove DRM_XE_OA_PROPERTY_MAX since it can also be eliminated (Michal) Suggested-by: Michal Wajdeczko Signed-off-by: Ashutosh Dixit Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240623203119.3840283-1-ashutosh.dixit@intel.com Signed-off-by: Rodrigo Vivi --- include/uapi/drm/xe_drm.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 93e00be44b2d..b410553faa9b 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1379,8 +1379,8 @@ struct drm_xe_wait_user_fence { * enum drm_xe_perf_type - Perf stream types */ enum drm_xe_perf_type { + /** @DRM_XE_PERF_TYPE_OA: OA perf stream type */ DRM_XE_PERF_TYPE_OA, - __DRM_XE_PERF_TYPE_MAX, /* non-ABI */ }; /** @@ -1611,9 +1611,6 @@ enum drm_xe_oa_property_id { * pass along with @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID or will default to 0. */ DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE, - - /** @DRM_XE_OA_PROPERTY_MAX: non-ABI */ - DRM_XE_OA_PROPERTY_MAX }; /** -- cgit v1.2.3 From 406d058dc323ae152d380ac90153eb56a75850c1 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Wed, 26 Jun 2024 11:18:17 -0700 Subject: drm/xe/oa/uapi: Allow preemption to be disabled on the stream exec queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mesa VK_KHR_performance_query use case requires preemption and timeslicing to be disabled for the stream exec queue. Implement this functionality here. v2: Minor change to debug print to print both ret values (Umesh) Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240626181817.1516229-3-ashutosh.dixit@intel.com Signed-off-by: Rodrigo Vivi --- include/uapi/drm/xe_drm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index b410553faa9b..12eaa8532b5c 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1611,6 +1611,12 @@ enum drm_xe_oa_property_id { * pass along with @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID or will default to 0. */ DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE, + + /** + * @DRM_XE_OA_PROPERTY_NO_PREEMPT: Allow preemption and timeslicing + * to be disabled for the stream exec queue. + */ + DRM_XE_OA_PROPERTY_NO_PREEMPT, }; /** -- cgit v1.2.3 From 7931d32955e09d0a11b1fe0b6aac1bfa061c005c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 26 Jun 2024 23:15:38 +0200 Subject: netfilter: nf_tables: fully validate NFT_DATA_VALUE on store to data registers register store validation for NFT_DATA_VALUE is conditional, however, the datatype is always either NFT_DATA_VALUE or NFT_DATA_VERDICT. This only requires a new helper function to infer the register type from the set datatype so this conditional check can be removed. Otherwise, pointer to chain object can be leaked through the registers. Fixes: 96518518cc41 ("netfilter: add nftables") Reported-by: Linus Torvalds Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 2796153b03da..188d41da1a40 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -619,6 +619,11 @@ static inline void *nft_set_priv(const struct nft_set *set) return (void *)set->data; } +static inline enum nft_data_types nft_set_datatype(const struct nft_set *set) +{ + return set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE; +} + static inline bool nft_set_gc_is_pending(const struct nft_set *s) { return refcount_read(&s->refs) != 1; -- cgit v1.2.3 From 69b6517687a4b1fb250bd8c9c193a0a304c8ba17 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 26 Jun 2024 19:01:58 -0600 Subject: block: use the right type for stub rq_integrity_vec() For !CONFIG_BLK_DEV_INTEGRITY, rq_integrity_vec() wasn't updated properly. Fix it up. Fixes: cf546dd289e0 ("block: change rq_integrity_vec to respect the iterator") Signed-off-by: Jens Axboe --- include/linux/blk-integrity.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index 0fdd62e6d4b0..c58634924782 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -145,7 +145,7 @@ static inline int blk_integrity_rq(struct request *rq) return 0; } -static inline struct bio_vec *rq_integrity_vec(struct request *rq) +static inline struct bio_vec rq_integrity_vec(struct request *rq) { /* the optimizer will remove all calls to this function */ return (struct bio_vec){ }; -- cgit v1.2.3 From 78b506984ebeaefaf63172059422fc606dc23e68 Mon Sep 17 00:00:00 2001 From: Ranjan Kumar Date: Wed, 26 Jun 2024 15:56:45 +0530 Subject: scsi: mpi3mr: Add ioctl support for HDB Add interface for applications to manage the host diagnostic buffers and update the automatic diag buffer capture triggers. Co-developed-by: Sathya Prakash Signed-off-by: Sathya Prakash Signed-off-by: Ranjan Kumar Link: https://lore.kernel.org/r/20240626102646.14298-4-ranjan.kumar@broadcom.com Signed-off-by: Martin K. Petersen --- include/uapi/scsi/scsi_bsg_mpi3mr.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/scsi/scsi_bsg_mpi3mr.h b/include/uapi/scsi/scsi_bsg_mpi3mr.h index a3ba779a3f78..f5ea1db92339 100644 --- a/include/uapi/scsi/scsi_bsg_mpi3mr.h +++ b/include/uapi/scsi/scsi_bsg_mpi3mr.h @@ -296,6 +296,7 @@ struct mpi3mr_hdb_entry { * multiple hdb entries. * * @num_hdb_types: Number of host diag buffer types supported + * @element_trigger_format: Element trigger format * @rsvd1: Reserved * @rsvd2: Reserved * @rsvd3: Reserved @@ -303,7 +304,7 @@ struct mpi3mr_hdb_entry { */ struct mpi3mr_bsg_in_hdb_status { __u8 num_hdb_types; - __u8 rsvd1; + __u8 element_trigger_format; __u16 rsvd2; __u32 rsvd3; struct mpi3mr_hdb_entry entry[1]; -- cgit v1.2.3 From 7276f425b744a81eb5a8e519b0c452d5e64b530c Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Thu, 27 Jun 2024 10:43:27 +0300 Subject: mfd: support ROHM BD96801 PMIC core The ROHM BD96801 PMIC is highly customizable automotive grade PMIC which integrates regulator and watchdog funtionalities. Provide INTB IRQ and register accesses for regulator/watchdog drivers. Signed-off-by: Matti Vaittinen Link: https://lore.kernel.org/r/c5260e2dd222e3c64cdf410802bba195637ccb93.1719473802.git.mazziesaccount@gmail.com Signed-off-by: Lee Jones --- include/linux/mfd/rohm-bd96801.h | 215 +++++++++++++++++++++++++++++++++++++++ include/linux/mfd/rohm-generic.h | 1 + 2 files changed, 216 insertions(+) create mode 100644 include/linux/mfd/rohm-bd96801.h (limited to 'include') diff --git a/include/linux/mfd/rohm-bd96801.h b/include/linux/mfd/rohm-bd96801.h new file mode 100644 index 000000000000..e2d9e10b6364 --- /dev/null +++ b/include/linux/mfd/rohm-bd96801.h @@ -0,0 +1,215 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Copyright (C) 2024 ROHM Semiconductors */ + +#ifndef __MFD_BD96801_H__ +#define __MFD_BD96801_H__ + +#define BD96801_REG_SSCG_CTRL 0x09 +#define BD96801_REG_SHD_INTB 0x20 +#define BD96801_LDO5_VOL_LVL_REG 0x2c +#define BD96801_LDO6_VOL_LVL_REG 0x2d +#define BD96801_LDO7_VOL_LVL_REG 0x2e +#define BD96801_REG_BUCK_OVP 0x30 +#define BD96801_REG_BUCK_OVD 0x35 +#define BD96801_REG_LDO_OVP 0x31 +#define BD96801_REG_LDO_OVD 0x36 +#define BD96801_REG_BOOT_OVERTIME 0x3a +#define BD96801_REG_WD_TMO 0x40 +#define BD96801_REG_WD_CONF 0x41 +#define BD96801_REG_WD_FEED 0x42 +#define BD96801_REG_WD_FAILCOUNT 0x43 +#define BD96801_REG_WD_ASK 0x46 +#define BD96801_REG_WD_STATUS 0x4a +#define BD96801_REG_PMIC_STATE 0x4f +#define BD96801_REG_EXT_STATE 0x50 + +#define BD96801_STATE_STBY 0x09 + +#define BD96801_LOCK_REG 0x04 +#define BD96801_UNLOCK 0x9d +#define BD96801_LOCK 0x00 + +/* IRQ register area */ +#define BD96801_REG_INT_MAIN 0x51 + +/* + * The BD96801 has two physical IRQ lines, INTB and ERRB. + * + * The 'main status register' is located at 0x51. + * The ERRB status registers are located at 0x52 ... 0x5B + * INTB status registers are at range 0x5c ... 0x63 + */ +#define BD96801_REG_INT_SYS_ERRB1 0x52 +#define BD96801_REG_INT_SYS_INTB 0x5c +#define BD96801_REG_INT_LDO7_INTB 0x63 + +/* MASK registers */ +#define BD96801_REG_MASK_SYS_INTB 0x73 +#define BD96801_REG_MASK_SYS_ERRB 0x69 + +#define BD96801_MAX_REGISTER 0x7a + +#define BD96801_OTP_ERR_MASK BIT(0) +#define BD96801_DBIST_ERR_MASK BIT(1) +#define BD96801_EEP_ERR_MASK BIT(2) +#define BD96801_ABIST_ERR_MASK BIT(3) +#define BD96801_PRSTB_ERR_MASK BIT(4) +#define BD96801_DRMOS1_ERR_MASK BIT(5) +#define BD96801_DRMOS2_ERR_MASK BIT(6) +#define BD96801_SLAVE_ERR_MASK BIT(7) +#define BD96801_VREF_ERR_MASK BIT(0) +#define BD96801_TSD_ERR_MASK BIT(1) +#define BD96801_UVLO_ERR_MASK BIT(2) +#define BD96801_OVLO_ERR_MASK BIT(3) +#define BD96801_OSC_ERR_MASK BIT(4) +#define BD96801_PON_ERR_MASK BIT(5) +#define BD96801_POFF_ERR_MASK BIT(6) +#define BD96801_CMD_SHDN_ERR_MASK BIT(7) +#define BD96801_INT_PRSTB_WDT_ERR_MASK BIT(0) +#define BD96801_INT_CHIP_IF_ERR_MASK BIT(3) +#define BD96801_INT_SHDN_ERR_MASK BIT(7) +#define BD96801_OUT_PVIN_ERR_MASK BIT(0) +#define BD96801_OUT_OVP_ERR_MASK BIT(1) +#define BD96801_OUT_UVP_ERR_MASK BIT(2) +#define BD96801_OUT_SHDN_ERR_MASK BIT(7) + +/* ERRB IRQs */ +enum { + /* Reg 0x52, 0x53, 0x54 - ERRB system IRQs */ + BD96801_OTP_ERR_STAT, + BD96801_DBIST_ERR_STAT, + BD96801_EEP_ERR_STAT, + BD96801_ABIST_ERR_STAT, + BD96801_PRSTB_ERR_STAT, + BD96801_DRMOS1_ERR_STAT, + BD96801_DRMOS2_ERR_STAT, + BD96801_SLAVE_ERR_STAT, + BD96801_VREF_ERR_STAT, + BD96801_TSD_ERR_STAT, + BD96801_UVLO_ERR_STAT, + BD96801_OVLO_ERR_STAT, + BD96801_OSC_ERR_STAT, + BD96801_PON_ERR_STAT, + BD96801_POFF_ERR_STAT, + BD96801_CMD_SHDN_ERR_STAT, + BD96801_INT_PRSTB_WDT_ERR, + BD96801_INT_CHIP_IF_ERR, + BD96801_INT_SHDN_ERR_STAT, + + /* Reg 0x55 BUCK1 ERR IRQs */ + BD96801_BUCK1_PVIN_ERR_STAT, + BD96801_BUCK1_OVP_ERR_STAT, + BD96801_BUCK1_UVP_ERR_STAT, + BD96801_BUCK1_SHDN_ERR_STAT, + + /* Reg 0x56 BUCK2 ERR IRQs */ + BD96801_BUCK2_PVIN_ERR_STAT, + BD96801_BUCK2_OVP_ERR_STAT, + BD96801_BUCK2_UVP_ERR_STAT, + BD96801_BUCK2_SHDN_ERR_STAT, + + /* Reg 0x57 BUCK3 ERR IRQs */ + BD96801_BUCK3_PVIN_ERR_STAT, + BD96801_BUCK3_OVP_ERR_STAT, + BD96801_BUCK3_UVP_ERR_STAT, + BD96801_BUCK3_SHDN_ERR_STAT, + + /* Reg 0x58 BUCK4 ERR IRQs */ + BD96801_BUCK4_PVIN_ERR_STAT, + BD96801_BUCK4_OVP_ERR_STAT, + BD96801_BUCK4_UVP_ERR_STAT, + BD96801_BUCK4_SHDN_ERR_STAT, + + /* Reg 0x59 LDO5 ERR IRQs */ + BD96801_LDO5_PVIN_ERR_STAT, + BD96801_LDO5_OVP_ERR_STAT, + BD96801_LDO5_UVP_ERR_STAT, + BD96801_LDO5_SHDN_ERR_STAT, + + /* Reg 0x5a LDO6 ERR IRQs */ + BD96801_LDO6_PVIN_ERR_STAT, + BD96801_LDO6_OVP_ERR_STAT, + BD96801_LDO6_UVP_ERR_STAT, + BD96801_LDO6_SHDN_ERR_STAT, + + /* Reg 0x5b LDO7 ERR IRQs */ + BD96801_LDO7_PVIN_ERR_STAT, + BD96801_LDO7_OVP_ERR_STAT, + BD96801_LDO7_UVP_ERR_STAT, + BD96801_LDO7_SHDN_ERR_STAT, +}; + +/* INTB IRQs */ +enum { + /* Reg 0x5c (System INTB) */ + BD96801_TW_STAT, + BD96801_WDT_ERR_STAT, + BD96801_I2C_ERR_STAT, + BD96801_CHIP_IF_ERR_STAT, + + /* Reg 0x5d (BUCK1 INTB) */ + BD96801_BUCK1_OCPH_STAT, + BD96801_BUCK1_OCPL_STAT, + BD96801_BUCK1_OCPN_STAT, + BD96801_BUCK1_OVD_STAT, + BD96801_BUCK1_UVD_STAT, + BD96801_BUCK1_TW_CH_STAT, + + /* Reg 0x5e (BUCK2 INTB) */ + BD96801_BUCK2_OCPH_STAT, + BD96801_BUCK2_OCPL_STAT, + BD96801_BUCK2_OCPN_STAT, + BD96801_BUCK2_OVD_STAT, + BD96801_BUCK2_UVD_STAT, + BD96801_BUCK2_TW_CH_STAT, + + /* Reg 0x5f (BUCK3 INTB)*/ + BD96801_BUCK3_OCPH_STAT, + BD96801_BUCK3_OCPL_STAT, + BD96801_BUCK3_OCPN_STAT, + BD96801_BUCK3_OVD_STAT, + BD96801_BUCK3_UVD_STAT, + BD96801_BUCK3_TW_CH_STAT, + + /* Reg 0x60 (BUCK4 INTB)*/ + BD96801_BUCK4_OCPH_STAT, + BD96801_BUCK4_OCPL_STAT, + BD96801_BUCK4_OCPN_STAT, + BD96801_BUCK4_OVD_STAT, + BD96801_BUCK4_UVD_STAT, + BD96801_BUCK4_TW_CH_STAT, + + /* Reg 0x61 (LDO5 INTB) */ + BD96801_LDO5_OCPH_STAT, /* bit [0] */ + BD96801_LDO5_OVD_STAT, /* bit [3] */ + BD96801_LDO5_UVD_STAT, /* bit [4] */ + + /* Reg 0x62 (LDO6 INTB) */ + BD96801_LDO6_OCPH_STAT, /* bit [0] */ + BD96801_LDO6_OVD_STAT, /* bit [3] */ + BD96801_LDO6_UVD_STAT, /* bit [4] */ + + /* Reg 0x63 (LDO7 INTB) */ + BD96801_LDO7_OCPH_STAT, /* bit [0] */ + BD96801_LDO7_OVD_STAT, /* bit [3] */ + BD96801_LDO7_UVD_STAT, /* bit [4] */ +}; + +/* IRQ MASKs */ +#define BD96801_TW_STAT_MASK BIT(0) +#define BD96801_WDT_ERR_STAT_MASK BIT(1) +#define BD96801_I2C_ERR_STAT_MASK BIT(2) +#define BD96801_CHIP_IF_ERR_STAT_MASK BIT(3) + +#define BD96801_BUCK_OCPH_STAT_MASK BIT(0) +#define BD96801_BUCK_OCPL_STAT_MASK BIT(1) +#define BD96801_BUCK_OCPN_STAT_MASK BIT(2) +#define BD96801_BUCK_OVD_STAT_MASK BIT(3) +#define BD96801_BUCK_UVD_STAT_MASK BIT(4) +#define BD96801_BUCK_TW_CH_STAT_MASK BIT(5) + +#define BD96801_LDO_OCPH_STAT_MASK BIT(0) +#define BD96801_LDO_OVD_STAT_MASK BIT(3) +#define BD96801_LDO_UVD_STAT_MASK BIT(4) + +#endif diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h index 4eeb22876bad..e7d4e6afe388 100644 --- a/include/linux/mfd/rohm-generic.h +++ b/include/linux/mfd/rohm-generic.h @@ -16,6 +16,7 @@ enum rohm_chip_type { ROHM_CHIP_TYPE_BD71828, ROHM_CHIP_TYPE_BD71837, ROHM_CHIP_TYPE_BD71847, + ROHM_CHIP_TYPE_BD96801, ROHM_CHIP_TYPE_AMOUNT }; -- cgit v1.2.3 From 67eccf151d76a9939ad8a50c6db5cb486b01df24 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 26 Jun 2024 15:46:23 +0200 Subject: HID: add source argument to HID low level functions This allows to know who actually sent what when we process the request to the device. This will be useful for a BPF firewall program to allow or not requests coming from a dedicated hidraw node client. Link: https://patch.msgid.link/20240626-hid_hw_req_bpf-v2-2-cfd60fb6c79f@kernel.org Acked-by: Jiri Kosina Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 6 ++++++ include/linux/hid_bpf.h | 16 ++++++++++------ 2 files changed, 16 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 8e06d89698e6..dac2804b4562 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -1125,6 +1125,12 @@ int __must_check hid_hw_open(struct hid_device *hdev); void hid_hw_close(struct hid_device *hdev); void hid_hw_request(struct hid_device *hdev, struct hid_report *report, enum hid_class_request reqtype); +int __hid_hw_raw_request(struct hid_device *hdev, + unsigned char reportnum, __u8 *buf, + size_t len, enum hid_report_type rtype, + enum hid_class_request reqtype, + __u64 source); +int __hid_hw_output_report(struct hid_device *hdev, __u8 *buf, size_t len, __u64 source); int hid_hw_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, size_t len, enum hid_report_type rtype, diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index 65d7e0acc8c2..a54741db0415 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -66,10 +66,12 @@ struct hid_ops { int (*hid_hw_raw_request)(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, size_t len, enum hid_report_type rtype, - enum hid_class_request reqtype); - int (*hid_hw_output_report)(struct hid_device *hdev, __u8 *buf, size_t len); + enum hid_class_request reqtype, + __u64 source); + int (*hid_hw_output_report)(struct hid_device *hdev, __u8 *buf, size_t len, + __u64 source); int (*hid_input_report)(struct hid_device *hid, enum hid_report_type type, - u8 *data, u32 size, int interrupt); + u8 *data, u32 size, int interrupt, u64 source); struct module *owner; const struct bus_type *bus_type; }; @@ -110,7 +112,8 @@ struct hid_bpf_ops { * * Context: Interrupt context. */ - int (*hid_device_event)(struct hid_bpf_ctx *ctx, enum hid_report_type report_type); + int (*hid_device_event)(struct hid_bpf_ctx *ctx, enum hid_report_type report_type, + __u64 source); /** * @hid_rdesc_fixup: called when the probe function parses the report descriptor @@ -146,7 +149,7 @@ struct hid_bpf { #ifdef CONFIG_HID_BPF u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, u8 *data, - u32 *size, int interrupt); + u32 *size, int interrupt, u64 source); int hid_bpf_connect_device(struct hid_device *hdev); void hid_bpf_disconnect_device(struct hid_device *hdev); void hid_bpf_destroy_device(struct hid_device *hid); @@ -154,7 +157,8 @@ void hid_bpf_device_init(struct hid_device *hid); u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *size); #else /* CONFIG_HID_BPF */ static inline u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, - u8 *data, u32 *size, int interrupt) { return data; } + u8 *data, u32 *size, int interrupt, + u64 source) { return data; } static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; } static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {} static inline void hid_bpf_destroy_device(struct hid_device *hid) {} -- cgit v1.2.3 From 6cd735f0e57a6c8510ad92f5b63837a8d0cff3a7 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 26 Jun 2024 15:46:24 +0200 Subject: HID: bpf: protect HID-BPF prog_list access by a SRCU We want to add sleepable callbacks for hid_hw_raw_request() and hid_hw_output_report(), but we can not use a plain RCU for those. Prepare for a SRCU so we can extend HID-BPF. This changes a little bit how hid_bpf_device_init() behaves, as it may now fail, so there is a tiny hid-core.c change to accommodate for this. Link: https://patch.msgid.link/20240626-hid_hw_req_bpf-v2-3-cfd60fb6c79f@kernel.org Acked-by: Jiri Kosina Signed-off-by: Benjamin Tissoires --- include/linux/hid_bpf.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index a54741db0415..f93845de5cac 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -5,6 +5,7 @@ #include #include +#include #include struct hid_device; @@ -145,6 +146,7 @@ struct hid_bpf { struct hid_bpf_ops *rdesc_ops; struct list_head prog_list; struct mutex prog_list_lock; /* protects prog_list update */ + struct srcu_struct srcu; /* protects prog_list read-only access */ }; #ifdef CONFIG_HID_BPF @@ -153,7 +155,7 @@ u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type t int hid_bpf_connect_device(struct hid_device *hdev); void hid_bpf_disconnect_device(struct hid_device *hdev); void hid_bpf_destroy_device(struct hid_device *hid); -void hid_bpf_device_init(struct hid_device *hid); +int hid_bpf_device_init(struct hid_device *hid); u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *size); #else /* CONFIG_HID_BPF */ static inline u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, @@ -162,7 +164,7 @@ static inline u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; } static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {} static inline void hid_bpf_destroy_device(struct hid_device *hid) {} -static inline void hid_bpf_device_init(struct hid_device *hid) {} +static inline int hid_bpf_device_init(struct hid_device *hid) { return 0; } #define call_hid_bpf_rdesc_fixup(_hdev, _rdesc, _size) \ ((u8 *)kmemdup(_rdesc, *(_size), GFP_KERNEL)) -- cgit v1.2.3 From 8bd0488b5ea58655ad6fdcbe0408ef49b16882b1 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 26 Jun 2024 15:46:25 +0200 Subject: HID: bpf: add HID-BPF hooks for hid_hw_raw_requests This allows to intercept and prevent or change the behavior of hid_hw_raw_request() from a bpf program. The intent is to solve a couple of use case: - firewalling a HID device: a firewall can monitor who opens the hidraw nodes and then prevent or allow access to write operations on that hidraw node. - change the behavior of a device and emulate a new HID feature request The hook is allowed to be run as sleepable so it can itself call hid_bpf_hw_request(), which allows to "convert" one feature request into another or even call the feature request on a different HID device on the same physical device. Link: https://patch.msgid.link/20240626-hid_hw_req_bpf-v2-4-cfd60fb6c79f@kernel.org Acked-by: Jiri Kosina Signed-off-by: Benjamin Tissoires --- include/linux/hid_bpf.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include') diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index f93845de5cac..3c01f7f8b6fc 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -130,6 +130,31 @@ struct hid_bpf_ops { */ int (*hid_rdesc_fixup)(struct hid_bpf_ctx *ctx); + /** + * @hid_hw_request: called whenever a hid_hw_raw_request() call is emitted + * on the HID device + * + * It has the following arguments: + * + * ``ctx``: The HID-BPF context as &struct hid_bpf_ctx + * ``reportnum``: the report number, as in hid_hw_raw_request() + * ``rtype``: the report type (``HID_INPUT_REPORT``, ``HID_FEATURE_REPORT``, + * ``HID_OUTPUT_REPORT``) + * ``reqtype``: the request + * ``source``: a u64 referring to a uniq but identifiable source. If %0, the + * kernel itself emitted that call. For hidraw, ``source`` is set + * to the associated ``struct file *``. + * + * Return: %0 to keep processing the request by hid-core; any other value + * stops hid-core from processing that event. A positive value should be + * returned with the number of bytes returned in the incoming buffer; a + * negative error code interrupts the processing of this call. + */ + int (*hid_hw_request)(struct hid_bpf_ctx *ctx, unsigned char reportnum, + enum hid_report_type rtype, enum hid_class_request reqtype, + __u64 source); + + /* private: do not show up in the docs */ struct hid_device *hdev; }; @@ -152,6 +177,11 @@ struct hid_bpf { #ifdef CONFIG_HID_BPF u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 *size, int interrupt, u64 source); +int dispatch_hid_bpf_raw_requests(struct hid_device *hdev, + unsigned char reportnum, __u8 *buf, + u32 size, enum hid_report_type rtype, + enum hid_class_request reqtype, + __u64 source); int hid_bpf_connect_device(struct hid_device *hdev); void hid_bpf_disconnect_device(struct hid_device *hdev); void hid_bpf_destroy_device(struct hid_device *hid); @@ -161,6 +191,11 @@ u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *s static inline u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 *size, int interrupt, u64 source) { return data; } +static inline int dispatch_hid_bpf_raw_requests(struct hid_device *hdev, + unsigned char reportnum, u8 *buf, + u32 size, enum hid_report_type rtype, + enum hid_class_request reqtype, + u64 source) { return 0; } static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; } static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {} static inline void hid_bpf_destroy_device(struct hid_device *hid) {} -- cgit v1.2.3 From 75839101ce52e319cb2154a027d14f1f0aa3be09 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 26 Jun 2024 15:46:26 +0200 Subject: HID: bpf: prevent infinite recursions with hid_hw_raw_requests hooks When we attach a sleepable hook to hid_hw_raw_requests, we can (and in many cases should) call ourself hid_bpf_raw_request(), to actually fetch data from the device itself. However, this means that we might enter an infinite loop between hid_hw_raw_requests hooks and hid_bpf_hw_request() call. To prevent that, if a hid_bpf_hw_request() call is emitted, we prevent any new call of this kfunc by storing the information in the context. This way we can always trace/monitor/filter the incoming bpf requests, while preventing those loops to happen. I don't think exposing "from_bpf" is very interesting because while writing such a bpf program, you need to match at least the report number and/or the source of the call. So a blind "if there is a hid_hw_raw_request() call, I'm emitting another one" makes no real sense. Link: https://patch.msgid.link/20240626-hid_hw_req_bpf-v2-5-cfd60fb6c79f@kernel.org Acked-by: Jiri Kosina Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 2 +- include/linux/hid_bpf.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index dac2804b4562..24d0d7c0bd33 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -1129,7 +1129,7 @@ int __hid_hw_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, size_t len, enum hid_report_type rtype, enum hid_class_request reqtype, - __u64 source); + __u64 source, bool from_bpf); int __hid_hw_output_report(struct hid_device *hdev, __u8 *buf, size_t len, __u64 source); int hid_hw_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index 3c01f7f8b6fc..088c94b6d8ec 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -68,7 +68,7 @@ struct hid_ops { unsigned char reportnum, __u8 *buf, size_t len, enum hid_report_type rtype, enum hid_class_request reqtype, - __u64 source); + __u64 source, bool from_bpf); int (*hid_hw_output_report)(struct hid_device *hdev, __u8 *buf, size_t len, __u64 source); int (*hid_input_report)(struct hid_device *hid, enum hid_report_type type, @@ -181,7 +181,7 @@ int dispatch_hid_bpf_raw_requests(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, u32 size, enum hid_report_type rtype, enum hid_class_request reqtype, - __u64 source); + __u64 source, bool from_bpf); int hid_bpf_connect_device(struct hid_device *hdev); void hid_bpf_disconnect_device(struct hid_device *hdev); void hid_bpf_destroy_device(struct hid_device *hid); @@ -195,7 +195,7 @@ static inline int dispatch_hid_bpf_raw_requests(struct hid_device *hdev, unsigned char reportnum, u8 *buf, u32 size, enum hid_report_type rtype, enum hid_class_request reqtype, - u64 source) { return 0; } + u64 source, bool from_bpf) { return 0; } static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; } static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {} static inline void hid_bpf_destroy_device(struct hid_device *hid) {} -- cgit v1.2.3 From 9286675a2aed40a517be8cc4e283a04f473275b5 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 26 Jun 2024 15:46:28 +0200 Subject: HID: bpf: add HID-BPF hooks for hid_hw_output_report Same story than hid_hw_raw_requests: This allows to intercept and prevent or change the behavior of hid_hw_output_report() from a bpf program. The intent is to solve a couple of use case: - firewalling a HID device: a firewall can monitor who opens the hidraw nodes and then prevent or allow access to write operations on that hidraw node. - change the behavior of a device and emulate a new HID feature request The hook is allowed to be run as sleepable so it can itself call hid_hw_output_report(), which allows to "convert" one feature request into another or even call the feature request on a different HID device on the same physical device. Link: https://patch.msgid.link/20240626-hid_hw_req_bpf-v2-7-cfd60fb6c79f@kernel.org Acked-by: Jiri Kosina Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 3 ++- include/linux/hid_bpf.h | 24 +++++++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hid.h b/include/linux/hid.h index 24d0d7c0bd33..1533c9dcd3a6 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -1130,7 +1130,8 @@ int __hid_hw_raw_request(struct hid_device *hdev, size_t len, enum hid_report_type rtype, enum hid_class_request reqtype, __u64 source, bool from_bpf); -int __hid_hw_output_report(struct hid_device *hdev, __u8 *buf, size_t len, __u64 source); +int __hid_hw_output_report(struct hid_device *hdev, __u8 *buf, size_t len, __u64 source, + bool from_bpf); int hid_hw_raw_request(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, size_t len, enum hid_report_type rtype, diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index 088c94b6d8ec..f35508a73067 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -70,7 +70,7 @@ struct hid_ops { enum hid_class_request reqtype, __u64 source, bool from_bpf); int (*hid_hw_output_report)(struct hid_device *hdev, __u8 *buf, size_t len, - __u64 source); + __u64 source, bool from_bpf); int (*hid_input_report)(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size, int interrupt, u64 source); struct module *owner; @@ -154,6 +154,24 @@ struct hid_bpf_ops { enum hid_report_type rtype, enum hid_class_request reqtype, __u64 source); + /** + * @hid_hw_output_report: called whenever a hid_hw_output_report() call is emitted + * on the HID device + * + * It has the following arguments: + * + * ``ctx``: The HID-BPF context as &struct hid_bpf_ctx + * ``source``: a u64 referring to a uniq but identifiable source. If %0, the + * kernel itself emitted that call. For hidraw, ``source`` is set + * to the associated ``struct file *``. + * + * Return: %0 to keep processing the request by hid-core; any other value + * stops hid-core from processing that event. A positive value should be + * returned with the number of bytes written to the device; a negative error + * code interrupts the processing of this call. + */ + int (*hid_hw_output_report)(struct hid_bpf_ctx *ctx, __u64 source); + /* private: do not show up in the docs */ struct hid_device *hdev; @@ -182,6 +200,8 @@ int dispatch_hid_bpf_raw_requests(struct hid_device *hdev, u32 size, enum hid_report_type rtype, enum hid_class_request reqtype, __u64 source, bool from_bpf); +int dispatch_hid_bpf_output_report(struct hid_device *hdev, __u8 *buf, u32 size, + __u64 source, bool from_bpf); int hid_bpf_connect_device(struct hid_device *hdev); void hid_bpf_disconnect_device(struct hid_device *hdev); void hid_bpf_destroy_device(struct hid_device *hid); @@ -196,6 +216,8 @@ static inline int dispatch_hid_bpf_raw_requests(struct hid_device *hdev, u32 size, enum hid_report_type rtype, enum hid_class_request reqtype, u64 source, bool from_bpf) { return 0; } +static inline int dispatch_hid_bpf_output_report(struct hid_device *hdev, __u8 *buf, u32 size, + __u64 source, bool from_bpf) { return 0; } static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; } static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {} static inline void hid_bpf_destroy_device(struct hid_device *hid) {} -- cgit v1.2.3 From fa03f398a8ac46f46927e0b509b302ebe0ed7e8a Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 26 Jun 2024 15:46:30 +0200 Subject: HID: bpf: make hid_bpf_input_report() sleep until the device is ready hid_bpf_input_report() is already marked to be used in sleepable context only. So instead of hammering with timers the device to hopefully get an available slot where the device is not sending events, we can make that kfunc wait for the current event to be terminated before it goes in. This allows to work with the following pseudo code: in struct_ops/hid_device_event: - schedule a bpf_wq, which calls hid_bpf_input_report() - once this struct_ops function terminates, hid_bpf_input_report() immediately starts before the next event Link: https://patch.msgid.link/20240626-hid_hw_req_bpf-v2-9-cfd60fb6c79f@kernel.org Acked-by: Jiri Kosina Signed-off-by: Benjamin Tissoires --- include/linux/hid_bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index f35508a73067..7f04353d09e9 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -72,7 +72,8 @@ struct hid_ops { int (*hid_hw_output_report)(struct hid_device *hdev, __u8 *buf, size_t len, __u64 source, bool from_bpf); int (*hid_input_report)(struct hid_device *hid, enum hid_report_type type, - u8 *data, u32 size, int interrupt, u64 source); + u8 *data, u32 size, int interrupt, u64 source, + bool lock_already_taken); struct module *owner; const struct bus_type *bus_type; }; -- cgit v1.2.3 From 9acbb7ba4589d4715141d4e14230a828ddc95f3d Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 26 Jun 2024 15:46:32 +0200 Subject: HID: bpf: allow hid_device_event hooks to inject input reports on self This is the same logic than hid_hw_raw_request or hid_hw_output_report: we can allow hid_bpf_try_input_report to be called from a hook on hid_input_report if we ensure that the call can not be made twice in a row. There is one extra subtlety in which there is a lock in hid_input_report. But given that we can detect if we are already in the hook, we can notify hid_input_report to not take the lock. This is done by checking if ctx_kern data is valid or null, and if it is equal to the dedicated incoming data buffer. In order to have more control on whether the lock needs to be taken or not we introduce a new kfunc for it: hid_bpf_try_input_report() Link: https://patch.msgid.link/20240626-hid_hw_req_bpf-v2-11-cfd60fb6c79f@kernel.org Acked-by: Jiri Kosina Signed-off-by: Benjamin Tissoires --- include/linux/hid_bpf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index 7f04353d09e9..93546ee7677a 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -72,7 +72,7 @@ struct hid_ops { int (*hid_hw_output_report)(struct hid_device *hdev, __u8 *buf, size_t len, __u64 source, bool from_bpf); int (*hid_input_report)(struct hid_device *hid, enum hid_report_type type, - u8 *data, u32 size, int interrupt, u64 source, + u8 *data, u32 size, int interrupt, u64 source, bool from_bpf, bool lock_already_taken); struct module *owner; const struct bus_type *bus_type; @@ -195,7 +195,7 @@ struct hid_bpf { #ifdef CONFIG_HID_BPF u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, u8 *data, - u32 *size, int interrupt, u64 source); + u32 *size, int interrupt, u64 source, bool from_bpf); int dispatch_hid_bpf_raw_requests(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, u32 size, enum hid_report_type rtype, @@ -211,7 +211,7 @@ u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *s #else /* CONFIG_HID_BPF */ static inline u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 *size, int interrupt, - u64 source) { return data; } + u64 source, bool from_bpf) { return data; } static inline int dispatch_hid_bpf_raw_requests(struct hid_device *hdev, unsigned char reportnum, u8 *buf, u32 size, enum hid_report_type rtype, -- cgit v1.2.3 From bab4923132feb3e439ae45962979c5d9d5c7c1f1 Mon Sep 17 00:00:00 2001 From: Yunseong Kim Date: Tue, 25 Jun 2024 02:33:23 +0900 Subject: tracing/net_sched: NULL pointer dereference in perf_trace_qdisc_reset() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the TRACE_EVENT(qdisc_reset) NULL dereference occurred from qdisc->dev_queue->dev ->name This situation simulated from bunch of veths and Bluetooth disconnection and reconnection. During qdisc initialization, qdisc was being set to noop_queue. In veth_init_queue, the initial tx_num was reduced back to one, causing the qdisc reset to be called with noop, which led to the kernel panic. I've attached the GitHub gist link that C converted syz-execprogram source code and 3 log of reproduced vmcore-dmesg. https://gist.github.com/yskelg/cc64562873ce249cdd0d5a358b77d740 Yeoreum and I use two fuzzing tool simultaneously. One process with syz-executor : https://github.com/google/syzkaller $ ./syz-execprog -executor=./syz-executor -repeat=1 -sandbox=setuid \ -enable=none -collide=false log1 The other process with perf fuzzer: https://github.com/deater/perf_event_tests/tree/master/fuzzer $ perf_event_tests/fuzzer/perf_fuzzer I think this will happen on the kernel version. Linux kernel version +v6.7.10, +v6.8, +v6.9 and it could happen in v6.10. This occurred from 51270d573a8d. I think this patch is absolutely necessary. Previously, It was showing not intended string value of name. I've reproduced 3 time from my fedora 40 Debug Kernel with any other module or patched. version: 6.10.0-0.rc2.20240608gitdc772f8237f9.29.fc41.aarch64+debug [ 5287.164555] veth0_vlan: left promiscuous mode [ 5287.164929] veth1_macvtap: left promiscuous mode [ 5287.164950] veth0_macvtap: left promiscuous mode [ 5287.164983] veth1_vlan: left promiscuous mode [ 5287.165008] veth0_vlan: left promiscuous mode [ 5287.165450] veth1_macvtap: left promiscuous mode [ 5287.165472] veth0_macvtap: left promiscuous mode [ 5287.165502] veth1_vlan: left promiscuous mode … [ 5297.598240] bridge0: port 2(bridge_slave_1) entered blocking state [ 5297.598262] bridge0: port 2(bridge_slave_1) entered forwarding state [ 5297.598296] bridge0: port 1(bridge_slave_0) entered blocking state [ 5297.598313] bridge0: port 1(bridge_slave_0) entered forwarding state [ 5297.616090] 8021q: adding VLAN 0 to HW filter on device bond0 [ 5297.620405] bridge0: port 1(bridge_slave_0) entered disabled state [ 5297.620730] bridge0: port 2(bridge_slave_1) entered disabled state [ 5297.627247] 8021q: adding VLAN 0 to HW filter on device team0 [ 5297.629636] bridge0: port 1(bridge_slave_0) entered blocking state … [ 5298.002798] bridge_slave_0: left promiscuous mode [ 5298.002869] bridge0: port 1(bridge_slave_0) entered disabled state [ 5298.309444] bond0 (unregistering): (slave bond_slave_0): Releasing backup interface [ 5298.315206] bond0 (unregistering): (slave bond_slave_1): Releasing backup interface [ 5298.320207] bond0 (unregistering): Released all slaves [ 5298.354296] hsr_slave_0: left promiscuous mode [ 5298.360750] hsr_slave_1: left promiscuous mode [ 5298.374889] veth1_macvtap: left promiscuous mode [ 5298.374931] veth0_macvtap: left promiscuous mode [ 5298.374988] veth1_vlan: left promiscuous mode [ 5298.375024] veth0_vlan: left promiscuous mode [ 5299.109741] team0 (unregistering): Port device team_slave_1 removed [ 5299.185870] team0 (unregistering): Port device team_slave_0 removed … [ 5300.155443] Bluetooth: hci3: unexpected cc 0x0c03 length: 249 > 1 [ 5300.155724] Bluetooth: hci3: unexpected cc 0x1003 length: 249 > 9 [ 5300.155988] Bluetooth: hci3: unexpected cc 0x1001 length: 249 > 9 …. [ 5301.075531] team0: Port device team_slave_1 added [ 5301.085515] bridge0: port 1(bridge_slave_0) entered blocking state [ 5301.085531] bridge0: port 1(bridge_slave_0) entered disabled state [ 5301.085588] bridge_slave_0: entered allmulticast mode [ 5301.085800] bridge_slave_0: entered promiscuous mode [ 5301.095617] bridge0: port 1(bridge_slave_0) entered blocking state [ 5301.095633] bridge0: port 1(bridge_slave_0) entered disabled state … [ 5301.149734] bond0: (slave bond_slave_0): Enslaving as an active interface with an up link [ 5301.173234] bond0: (slave bond_slave_0): Enslaving as an active interface with an up link [ 5301.180517] bond0: (slave bond_slave_1): Enslaving as an active interface with an up link [ 5301.193481] hsr_slave_0: entered promiscuous mode [ 5301.204425] hsr_slave_1: entered promiscuous mode [ 5301.210172] debugfs: Directory 'hsr0' with parent 'hsr' already present! [ 5301.210185] Cannot create hsr debugfs directory [ 5301.224061] bond0: (slave bond_slave_1): Enslaving as an active interface with an up link [ 5301.246901] bond0: (slave bond_slave_0): Enslaving as an active interface with an up link [ 5301.255934] team0: Port device team_slave_0 added [ 5301.256480] team0: Port device team_slave_1 added [ 5301.256948] team0: Port device team_slave_0 added … [ 5301.435928] hsr_slave_0: entered promiscuous mode [ 5301.446029] hsr_slave_1: entered promiscuous mode [ 5301.455872] debugfs: Directory 'hsr0' with parent 'hsr' already present! [ 5301.455884] Cannot create hsr debugfs directory [ 5301.502664] hsr_slave_0: entered promiscuous mode [ 5301.513675] hsr_slave_1: entered promiscuous mode [ 5301.526155] debugfs: Directory 'hsr0' with parent 'hsr' already present! [ 5301.526164] Cannot create hsr debugfs directory [ 5301.563662] hsr_slave_0: entered promiscuous mode [ 5301.576129] hsr_slave_1: entered promiscuous mode [ 5301.580259] debugfs: Directory 'hsr0' with parent 'hsr' already present! [ 5301.580270] Cannot create hsr debugfs directory [ 5301.590269] 8021q: adding VLAN 0 to HW filter on device bond0 [ 5301.595872] KASAN: null-ptr-deref in range [0x0000000000000130-0x0000000000000137] [ 5301.595877] Mem abort info: [ 5301.595881] ESR = 0x0000000096000006 [ 5301.595885] EC = 0x25: DABT (current EL), IL = 32 bits [ 5301.595889] SET = 0, FnV = 0 [ 5301.595893] EA = 0, S1PTW = 0 [ 5301.595896] FSC = 0x06: level 2 translation fault [ 5301.595900] Data abort info: [ 5301.595903] ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000 [ 5301.595907] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 5301.595911] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 5301.595915] [dfff800000000026] address between user and kernel address ranges [ 5301.595971] Internal error: Oops: 0000000096000006 [#1] SMP … [ 5301.596076] CPU: 2 PID: 102769 Comm: syz-executor.3 Kdump: loaded Tainted: G W ------- --- 6.10.0-0.rc2.20240608gitdc772f8237f9.29.fc41.aarch64+debug #1 [ 5301.596080] Hardware name: VMware, Inc. VMware20,1/VBSA, BIOS VMW201.00V.21805430.BA64.2305221830 05/22/2023 [ 5301.596082] pstate: 01400005 (nzcv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) [ 5301.596085] pc : strnlen+0x40/0x88 [ 5301.596114] lr : trace_event_get_offsets_qdisc_reset+0x6c/0x2b0 [ 5301.596124] sp : ffff8000beef6b40 [ 5301.596126] x29: ffff8000beef6b40 x28: dfff800000000000 x27: 0000000000000001 [ 5301.596131] x26: 6de1800082c62bd0 x25: 1ffff000110aa9e0 x24: ffff800088554f00 [ 5301.596136] x23: ffff800088554ec0 x22: 0000000000000130 x21: 0000000000000140 [ 5301.596140] x20: dfff800000000000 x19: ffff8000beef6c60 x18: ffff7000115106d8 [ 5301.596143] x17: ffff800121bad000 x16: ffff800080020000 x15: 0000000000000006 [ 5301.596147] x14: 0000000000000002 x13: ffff0001f3ed8d14 x12: ffff700017ddeda5 [ 5301.596151] x11: 1ffff00017ddeda4 x10: ffff700017ddeda4 x9 : ffff800082cc5eec [ 5301.596155] x8 : 0000000000000004 x7 : 00000000f1f1f1f1 x6 : 00000000f2f2f200 [ 5301.596158] x5 : 00000000f3f3f3f3 x4 : ffff700017dded80 x3 : 00000000f204f1f1 [ 5301.596162] x2 : 0000000000000026 x1 : 0000000000000000 x0 : 0000000000000130 [ 5301.596166] Call trace: [ 5301.596175] strnlen+0x40/0x88 [ 5301.596179] trace_event_get_offsets_qdisc_reset+0x6c/0x2b0 [ 5301.596182] perf_trace_qdisc_reset+0xb0/0x538 [ 5301.596184] __traceiter_qdisc_reset+0x68/0xc0 [ 5301.596188] qdisc_reset+0x43c/0x5e8 [ 5301.596190] netif_set_real_num_tx_queues+0x288/0x770 [ 5301.596194] veth_init_queues+0xfc/0x130 [veth] [ 5301.596198] veth_newlink+0x45c/0x850 [veth] [ 5301.596202] rtnl_newlink_create+0x2c8/0x798 [ 5301.596205] __rtnl_newlink+0x92c/0xb60 [ 5301.596208] rtnl_newlink+0xd8/0x130 [ 5301.596211] rtnetlink_rcv_msg+0x2e0/0x890 [ 5301.596214] netlink_rcv_skb+0x1c4/0x380 [ 5301.596225] rtnetlink_rcv+0x20/0x38 [ 5301.596227] netlink_unicast+0x3c8/0x640 [ 5301.596231] netlink_sendmsg+0x658/0xa60 [ 5301.596234] __sock_sendmsg+0xd0/0x180 [ 5301.596243] __sys_sendto+0x1c0/0x280 [ 5301.596246] __arm64_sys_sendto+0xc8/0x150 [ 5301.596249] invoke_syscall+0xdc/0x268 [ 5301.596256] el0_svc_common.constprop.0+0x16c/0x240 [ 5301.596259] do_el0_svc+0x48/0x68 [ 5301.596261] el0_svc+0x50/0x188 [ 5301.596265] el0t_64_sync_handler+0x120/0x130 [ 5301.596268] el0t_64_sync+0x194/0x198 [ 5301.596272] Code: eb15001f 54000120 d343fc02 12000801 (38f46842) [ 5301.596285] SMP: stopping secondary CPUs [ 5301.597053] Starting crashdump kernel... [ 5301.597057] Bye! After applying our patch, I didn't find any kernel panic errors. We've found a simple reproducer # echo 1 > /sys/kernel/debug/tracing/events/qdisc/qdisc_reset/enable # ip link add veth0 type veth peer name veth1 Error: Unknown device type. However, without our patch applied, I tested upstream 6.10.0-rc3 kernel using the qdisc_reset event and the ip command on my qemu virtual machine. This 2 commands makes always kernel panic. Linux version: 6.10.0-rc3 [ 0.000000] Linux version 6.10.0-rc3-00164-g44ef20baed8e-dirty (paran@fedora) (gcc (GCC) 14.1.1 20240522 (Red Hat 14.1.1-4), GNU ld version 2.41-34.fc40) #20 SMP PREEMPT Sat Jun 15 16:51:25 KST 2024 Kernel panic message: [ 615.236484] Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP [ 615.237250] Dumping ftrace buffer: [ 615.237679] (ftrace buffer empty) [ 615.238097] Modules linked in: veth crct10dif_ce virtio_gpu virtio_dma_buf drm_shmem_helper drm_kms_helper zynqmp_fpga xilinx_can xilinx_spi xilinx_selectmap xilinx_core xilinx_pr_decoupler versal_fpga uvcvideo uvc videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videodev videobuf2_common mc usbnet deflate zstd ubifs ubi rcar_canfd rcar_can omap_mailbox ntb_msi_test ntb_hw_epf lattice_sysconfig_spi lattice_sysconfig ice40_spi gpio_xilinx dwmac_altr_socfpga mdio_regmap stmmac_platform stmmac pcs_xpcs dfl_fme_region dfl_fme_mgr dfl_fme_br dfl_afu dfl fpga_region fpga_bridge can can_dev br_netfilter bridge stp llc atl1c ath11k_pci mhi ath11k_ahb ath11k qmi_helpers ath10k_sdio ath10k_pci ath10k_core ath mac80211 libarc4 cfg80211 drm fuse backlight ipv6 Jun 22 02:36:5[3 6k152.62-4sm98k4-0k]v kCePUr:n e1l :P IUDn:a b4le6 8t oC ohmma: nidpl eN oketr nteali nptaedg i6n.g1 0re.0q-urecs3t- 0at0 1v6i4r-tgu4a4le fa2d0dbraeeds0se-dir tyd f#f2f08 615.252376] Hardware name: linux,dummy-virt (DT) [ 615.253220] pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 615.254433] pc : strnlen+0x6c/0xe0 [ 615.255096] lr : trace_event_get_offsets_qdisc_reset+0x94/0x3d0 [ 615.256088] sp : ffff800080b269a0 [ 615.256615] x29: ffff800080b269a0 x28: ffffc070f3f98500 x27: 0000000000000001 [ 615.257831] x26: 0000000000000010 x25: ffffc070f3f98540 x24: ffffc070f619cf60 [ 615.259020] x23: 0000000000000128 x22: 0000000000000138 x21: dfff800000000000 [ 615.260241] x20: ffffc070f631ad00 x19: 0000000000000128 x18: ffffc070f448b800 [ 615.261454] x17: 0000000000000000 x16: 0000000000000001 x15: ffffc070f4ba2a90 [ 615.262635] x14: ffff700010164d73 x13: 1ffff80e1e8d5eb3 x12: 1ffff00010164d72 [ 615.263877] x11: ffff700010164d72 x10: dfff800000000000 x9 : ffffc070e85d6184 [ 615.265047] x8 : ffffc070e4402070 x7 : 000000000000f1f1 x6 : 000000001504a6d3 [ 615.266336] x5 : ffff28ca21122140 x4 : ffffc070f5043ea8 x3 : 0000000000000000 [ 615.267528] x2 : 0000000000000025 x1 : 0000000000000000 x0 : 0000000000000000 [ 615.268747] Call trace: [ 615.269180] strnlen+0x6c/0xe0 [ 615.269767] trace_event_get_offsets_qdisc_reset+0x94/0x3d0 [ 615.270716] trace_event_raw_event_qdisc_reset+0xe8/0x4e8 [ 615.271667] __traceiter_qdisc_reset+0xa0/0x140 [ 615.272499] qdisc_reset+0x554/0x848 [ 615.273134] netif_set_real_num_tx_queues+0x360/0x9a8 [ 615.274050] veth_init_queues+0x110/0x220 [veth] [ 615.275110] veth_newlink+0x538/0xa50 [veth] [ 615.276172] __rtnl_newlink+0x11e4/0x1bc8 [ 615.276944] rtnl_newlink+0xac/0x120 [ 615.277657] rtnetlink_rcv_msg+0x4e4/0x1370 [ 615.278409] netlink_rcv_skb+0x25c/0x4f0 [ 615.279122] rtnetlink_rcv+0x48/0x70 [ 615.279769] netlink_unicast+0x5a8/0x7b8 [ 615.280462] netlink_sendmsg+0xa70/0x1190 Yeoreum and I don't know if the patch we wrote will fix the underlying cause, but we think that priority is to prevent kernel panic happening. So, we're sending this patch. Fixes: 51270d573a8d ("tracing/net_sched: Fix tracepoints that save qdisc_dev() as a string") Link: https://lore.kernel.org/lkml/20240229143432.273b4871@gandalf.local.home/t/ Cc: netdev@vger.kernel.org Tested-by: Yunseong Kim Signed-off-by: Yunseong Kim Signed-off-by: Yeoreum Yun Link: https://lore.kernel.org/r/20240624173320.24945-4-yskelg@gmail.com Signed-off-by: Paolo Abeni --- include/trace/events/qdisc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h index f1b5e816e7e5..ff33f41a9db7 100644 --- a/include/trace/events/qdisc.h +++ b/include/trace/events/qdisc.h @@ -81,7 +81,7 @@ TRACE_EVENT(qdisc_reset, TP_ARGS(q), TP_STRUCT__entry( - __string( dev, qdisc_dev(q)->name ) + __string( dev, qdisc_dev(q) ? qdisc_dev(q)->name : "(null)" ) __string( kind, q->ops->id ) __field( u32, parent ) __field( u32, handle ) -- cgit v1.2.3 From 6d4618ad04e1a14202410648f638b62d3f666d45 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 19 Jun 2024 20:21:47 +0200 Subject: drm/bridge: analogix_dp: remove unused platform power_on_end callback This isn't used, but gives the impression of the power on and power off platform calls being non-symmetrical. Remove the unused callback and rename the power_on_start to simply power_on. Signed-off-by: Lucas Stach Reviewed-by: Robert Foss Tested-by: Heiko Stuebner Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20240619182200.3752465-1-l.stach@pengutronix.de --- include/drm/bridge/analogix_dp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/bridge/analogix_dp.h b/include/drm/bridge/analogix_dp.h index b0dcc07334a1..8709b6a74c0f 100644 --- a/include/drm/bridge/analogix_dp.h +++ b/include/drm/bridge/analogix_dp.h @@ -29,8 +29,7 @@ struct analogix_dp_plat_data { struct drm_connector *connector; bool skip_connector; - int (*power_on_start)(struct analogix_dp_plat_data *); - int (*power_on_end)(struct analogix_dp_plat_data *); + int (*power_on)(struct analogix_dp_plat_data *); int (*power_off)(struct analogix_dp_plat_data *); int (*attach)(struct analogix_dp_plat_data *, struct drm_bridge *, struct drm_connector *); -- cgit v1.2.3 From e7514df007e3b034b65367a32ba19dc61aaa3980 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 19 Jun 2024 20:21:51 +0200 Subject: drm/bridge: analogix_dp: remove unused analogix_dp_remove Now that the clock is handled dynamically through analogix_dp_resume/suspend and it isn't statically enabled in the driver probe routine, there is no need for the remove function anymore. Signed-off-by: Lucas Stach Reviewed-by: Robert Foss Tested-by: Heiko Stuebner Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20240619182200.3752465-5-l.stach@pengutronix.de --- include/drm/bridge/analogix_dp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/drm/bridge/analogix_dp.h b/include/drm/bridge/analogix_dp.h index 8709b6a74c0f..6002c5666031 100644 --- a/include/drm/bridge/analogix_dp.h +++ b/include/drm/bridge/analogix_dp.h @@ -44,7 +44,6 @@ struct analogix_dp_device * analogix_dp_probe(struct device *dev, struct analogix_dp_plat_data *plat_data); int analogix_dp_bind(struct analogix_dp_device *dp, struct drm_device *drm_dev); void analogix_dp_unbind(struct analogix_dp_device *dp); -void analogix_dp_remove(struct analogix_dp_device *dp); int analogix_dp_start_crc(struct drm_connector *connector); int analogix_dp_stop_crc(struct drm_connector *connector); -- cgit v1.2.3 From 58de63ddd0ecc84548b8fd24c72deb3739365c78 Mon Sep 17 00:00:00 2001 From: "Jason-JH.Lin" Date: Tue, 25 Jun 2024 16:39:57 +0800 Subject: soc: mtk-cmdq: Add cmdq_pkt_logic_command to support math operation Add cmdq_pkt_logic_command to support math operation. cmdq_pkt_logic_command can append logic command to the CMDQ packet, ask GCE to execute a arithmetic calculate instruction, such as add, subtract, multiply, AND, OR and NOT, etc. Note that all arithmetic instructions are unsigned calculations. If there are any overflows, GCE will sent the invalid IRQ to notify CMDQ driver. Signed-off-by: Jason-JH.Lin Signed-off-by: Hsiao Chien Sung Link: https://lore.kernel.org/r/20240625083957.3540-1-jason-jh.lin@mediatek.com Signed-off-by: AngeloGioacchino Del Regno --- include/linux/soc/mediatek/mtk-cmdq.h | 42 +++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include') diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h index d4a8e34505e6..5bee6f7fc400 100644 --- a/include/linux/soc/mediatek/mtk-cmdq.h +++ b/include/linux/soc/mediatek/mtk-cmdq.h @@ -25,6 +25,31 @@ struct cmdq_pkt; +enum cmdq_logic_op { + CMDQ_LOGIC_ASSIGN = 0, + CMDQ_LOGIC_ADD = 1, + CMDQ_LOGIC_SUBTRACT = 2, + CMDQ_LOGIC_MULTIPLY = 3, + CMDQ_LOGIC_XOR = 8, + CMDQ_LOGIC_NOT = 9, + CMDQ_LOGIC_OR = 10, + CMDQ_LOGIC_AND = 11, + CMDQ_LOGIC_LEFT_SHIFT = 12, + CMDQ_LOGIC_RIGHT_SHIFT = 13, + CMDQ_LOGIC_MAX, +}; + +struct cmdq_operand { + /* register type */ + bool reg; + union { + /* index */ + u16 idx; + /* value */ + u16 value; + }; +}; + struct cmdq_client_reg { u8 subsys; u16 offset; @@ -272,6 +297,23 @@ int cmdq_pkt_poll(struct cmdq_pkt *pkt, u8 subsys, int cmdq_pkt_poll_mask(struct cmdq_pkt *pkt, u8 subsys, u16 offset, u32 value, u32 mask); +/** + * cmdq_pkt_logic_command() - Append logic command to the CMDQ packet, ask GCE to + * execute an instruction that store the result of logic operation + * with left and right operand into result_reg_idx. + * @pkt: the CMDQ packet + * @result_reg_idx: SPR index that store operation result of left_operand and right_operand + * @left_operand: left operand + * @s_op: the logic operator enum + * @right_operand: right operand + * + * Return: 0 for success; else the error code is returned + */ +int cmdq_pkt_logic_command(struct cmdq_pkt *pkt, u16 result_reg_idx, + struct cmdq_operand *left_operand, + enum cmdq_logic_op s_op, + struct cmdq_operand *right_operand); + /** * cmdq_pkt_assign() - Append logic assign command to the CMDQ packet, ask GCE * to execute an instruction that set a constant value into -- cgit v1.2.3 From d1741141d03f8f5535adaf5a2d5000da3be9fe90 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Wed, 26 Jun 2024 20:14:32 +0200 Subject: media: uapi: Add a pixel format for BGR48 and RGB48 Add BGR48 and RGB48 16-bit per component image formats. Signed-off-by: Jacopo Mondi Reviewed-by: Kieran Bingham Reviewed-by: Naushir Patuck Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index fe6b67e83751..dd6876380fe3 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -582,6 +582,8 @@ struct v4l2_pix_format { /* RGB formats (6 or 8 bytes per pixel) */ #define V4L2_PIX_FMT_BGR48_12 v4l2_fourcc('B', '3', '1', '2') /* 48 BGR 12-bit per component */ +#define V4L2_PIX_FMT_BGR48 v4l2_fourcc('B', 'G', 'R', '6') /* 48 BGR 16-bit per component */ +#define V4L2_PIX_FMT_RGB48 v4l2_fourcc('R', 'G', 'B', '6') /* 48 RGB 16-bit per component */ #define V4L2_PIX_FMT_ABGR64_12 v4l2_fourcc('B', '4', '1', '2') /* 64 BGRA 12-bit per component */ /* Grey formats */ -- cgit v1.2.3 From c6c49bac8770ef95518faf20083e8b3e6150f45f Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Wed, 26 Jun 2024 20:14:33 +0200 Subject: media: uapi: Add Raspberry Pi PiSP Back End uAPI Add the Raspberry Pi PiSP Back End uAPI header. The header defines the data type used to configure the PiSP Back End ISP. The detailed description of the types and of the ISP configuration procedure is available at https://datasheets.raspberrypi.com/camera/raspberry-pi-image-signal-processor-specification.pdf Signed-off-by: Jacopo Mondi Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- .../uapi/linux/media/raspberrypi/pisp_be_config.h | 927 +++++++++++++++++++++ include/uapi/linux/media/raspberrypi/pisp_common.h | 199 +++++ 2 files changed, 1126 insertions(+) create mode 100644 include/uapi/linux/media/raspberrypi/pisp_be_config.h create mode 100644 include/uapi/linux/media/raspberrypi/pisp_common.h (limited to 'include') diff --git a/include/uapi/linux/media/raspberrypi/pisp_be_config.h b/include/uapi/linux/media/raspberrypi/pisp_be_config.h new file mode 100644 index 000000000000..1684ae068d4f --- /dev/null +++ b/include/uapi/linux/media/raspberrypi/pisp_be_config.h @@ -0,0 +1,927 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * PiSP Back End configuration definitions. + * + * Copyright (C) 2021 - Raspberry Pi Ltd + * + */ +#ifndef _UAPI_PISP_BE_CONFIG_H_ +#define _UAPI_PISP_BE_CONFIG_H_ + +#include + +#include "pisp_common.h" + +/* byte alignment for inputs */ +#define PISP_BACK_END_INPUT_ALIGN 4u +/* alignment for compressed inputs */ +#define PISP_BACK_END_COMPRESSED_ALIGN 8u +/* minimum required byte alignment for outputs */ +#define PISP_BACK_END_OUTPUT_MIN_ALIGN 16u +/* preferred byte alignment for outputs */ +#define PISP_BACK_END_OUTPUT_MAX_ALIGN 64u + +/* minimum allowed tile width anywhere in the pipeline */ +#define PISP_BACK_END_MIN_TILE_WIDTH 16u +/* minimum allowed tile width anywhere in the pipeline */ +#define PISP_BACK_END_MIN_TILE_HEIGHT 16u + +#define PISP_BACK_END_NUM_OUTPUTS 2 +#define PISP_BACK_END_HOG_OUTPUT 1 + +#define PISP_BACK_END_NUM_TILES 64 + +enum pisp_be_bayer_enable { + PISP_BE_BAYER_ENABLE_INPUT = 0x000001, + PISP_BE_BAYER_ENABLE_DECOMPRESS = 0x000002, + PISP_BE_BAYER_ENABLE_DPC = 0x000004, + PISP_BE_BAYER_ENABLE_GEQ = 0x000008, + PISP_BE_BAYER_ENABLE_TDN_INPUT = 0x000010, + PISP_BE_BAYER_ENABLE_TDN_DECOMPRESS = 0x000020, + PISP_BE_BAYER_ENABLE_TDN = 0x000040, + PISP_BE_BAYER_ENABLE_TDN_COMPRESS = 0x000080, + PISP_BE_BAYER_ENABLE_TDN_OUTPUT = 0x000100, + PISP_BE_BAYER_ENABLE_SDN = 0x000200, + PISP_BE_BAYER_ENABLE_BLC = 0x000400, + PISP_BE_BAYER_ENABLE_STITCH_INPUT = 0x000800, + PISP_BE_BAYER_ENABLE_STITCH_DECOMPRESS = 0x001000, + PISP_BE_BAYER_ENABLE_STITCH = 0x002000, + PISP_BE_BAYER_ENABLE_STITCH_COMPRESS = 0x004000, + PISP_BE_BAYER_ENABLE_STITCH_OUTPUT = 0x008000, + PISP_BE_BAYER_ENABLE_WBG = 0x010000, + PISP_BE_BAYER_ENABLE_CDN = 0x020000, + PISP_BE_BAYER_ENABLE_LSC = 0x040000, + PISP_BE_BAYER_ENABLE_TONEMAP = 0x080000, + PISP_BE_BAYER_ENABLE_CAC = 0x100000, + PISP_BE_BAYER_ENABLE_DEBIN = 0x200000, + PISP_BE_BAYER_ENABLE_DEMOSAIC = 0x400000, +}; + +enum pisp_be_rgb_enable { + PISP_BE_RGB_ENABLE_INPUT = 0x000001, + PISP_BE_RGB_ENABLE_CCM = 0x000002, + PISP_BE_RGB_ENABLE_SAT_CONTROL = 0x000004, + PISP_BE_RGB_ENABLE_YCBCR = 0x000008, + PISP_BE_RGB_ENABLE_FALSE_COLOUR = 0x000010, + PISP_BE_RGB_ENABLE_SHARPEN = 0x000020, + /* Preferred colours would occupy 0x000040 */ + PISP_BE_RGB_ENABLE_YCBCR_INVERSE = 0x000080, + PISP_BE_RGB_ENABLE_GAMMA = 0x000100, + PISP_BE_RGB_ENABLE_CSC0 = 0x000200, + PISP_BE_RGB_ENABLE_CSC1 = 0x000400, + PISP_BE_RGB_ENABLE_DOWNSCALE0 = 0x001000, + PISP_BE_RGB_ENABLE_DOWNSCALE1 = 0x002000, + PISP_BE_RGB_ENABLE_RESAMPLE0 = 0x008000, + PISP_BE_RGB_ENABLE_RESAMPLE1 = 0x010000, + PISP_BE_RGB_ENABLE_OUTPUT0 = 0x040000, + PISP_BE_RGB_ENABLE_OUTPUT1 = 0x080000, + PISP_BE_RGB_ENABLE_HOG = 0x200000 +}; + +#define PISP_BE_RGB_ENABLE_CSC(i) (PISP_BE_RGB_ENABLE_CSC0 << (i)) +#define PISP_BE_RGB_ENABLE_DOWNSCALE(i) (PISP_BE_RGB_ENABLE_DOWNSCALE0 << (i)) +#define PISP_BE_RGB_ENABLE_RESAMPLE(i) (PISP_BE_RGB_ENABLE_RESAMPLE0 << (i)) +#define PISP_BE_RGB_ENABLE_OUTPUT(i) (PISP_BE_RGB_ENABLE_OUTPUT0 << (i)) + +/* + * We use the enable flags to show when blocks are "dirty", but we need some + * extra ones too. + */ +enum pisp_be_dirty { + PISP_BE_DIRTY_GLOBAL = 0x0001, + PISP_BE_DIRTY_SH_FC_COMBINE = 0x0002, + PISP_BE_DIRTY_CROP = 0x0004 +}; + +/** + * struct pisp_be_global_config - PiSP global enable bitmaps + * @bayer_enables: Bayer input enable flags + * @rgb_enables: RGB output enable flags + * @bayer_order: Bayer input format ordering + * @pad: Padding bytes + */ +struct pisp_be_global_config { + __u32 bayer_enables; + __u32 rgb_enables; + __u8 bayer_order; + __u8 pad[3]; +} __attribute__((packed)); + +/** + * struct pisp_be_input_buffer_config - PiSP Back End input buffer + * @addr: Input buffer address + */ +struct pisp_be_input_buffer_config { + /* low 32 bits followed by high 32 bits (for each of up to 3 planes) */ + __u32 addr[3][2]; +} __attribute__((packed)); + +/** + * struct pisp_be_dpc_config - PiSP Back End DPC config + * + * Defective Pixel Correction configuration + * + * @coeff_level: Coefficient for the darkest neighbouring pixel value + * @coeff_range: Coefficient for the range of pixels for this Bayer channel + * @pad: Padding byte + * @flags: DPC configuration flags + */ +struct pisp_be_dpc_config { + __u8 coeff_level; + __u8 coeff_range; + __u8 pad; +#define PISP_BE_DPC_FLAG_FOLDBACK 1 + __u8 flags; +} __attribute__((packed)); + +/** + * struct pisp_be_geq_config - PiSP Back End GEQ config + * + * Green Equalisation configuration + * + * @offset: Offset value for threshold calculation + * @slope_sharper: Slope/Sharper configuration + * @min: Minimum value the threshold may have + * @max: Maximum value the threshold may have + */ +struct pisp_be_geq_config { + __u16 offset; +#define PISP_BE_GEQ_SHARPER BIT(15) +#define PISP_BE_GEQ_SLOPE ((1 << 10) - 1) + /* top bit is the "sharper" flag, slope value is bottom 10 bits */ + __u16 slope_sharper; + __u16 min; + __u16 max; +} __attribute__((packed)); + +/** + * struct pisp_be_tdn_input_buffer_config - PiSP Back End TDN input buffer + * @addr: TDN input buffer address + */ +struct pisp_be_tdn_input_buffer_config { + /* low 32 bits followed by high 32 bits */ + __u32 addr[2]; +} __attribute__((packed)); + +/** + * struct pisp_be_tdn_config - PiSP Back End TDN config + * + * Temporal Denoise configuration + * + * @black_level: Black level value subtracted from pixels + * @ratio: Multiplier for the LTA input frame + * @noise_constant: Constant offset value used in noise estimation + * @noise_slope: Noise estimation multiplier + * @threshold: Threshold for TDN operations + * @reset: Disable TDN operations + * @pad: Padding byte + */ +struct pisp_be_tdn_config { + __u16 black_level; + __u16 ratio; + __u16 noise_constant; + __u16 noise_slope; + __u16 threshold; + __u8 reset; + __u8 pad; +} __attribute__((packed)); + +/** + * struct pisp_be_tdn_output_buffer_config - PiSP Back End TDN output buffer + * @addr: TDN output buffer address + */ +struct pisp_be_tdn_output_buffer_config { + /* low 32 bits followed by high 32 bits */ + __u32 addr[2]; +} __attribute__((packed)); + +/** + * struct pisp_be_sdn_config - PiSP Back End SDN config + * + * Spatial Denoise configuration + * + * @black_level: Black level subtracted from pixel for noise estimation + * @leakage: Proportion of the original undenoised value to mix in + * denoised output + * @pad: Padding byte + * @noise_constant: Noise constant used for noise estimation + * @noise_slope: Noise slope value used for noise estimation + * @noise_constant2: Second noise constant used for noise estimation + * @noise_slope2: Second slope value used for noise estimation + */ +struct pisp_be_sdn_config { + __u16 black_level; + __u8 leakage; + __u8 pad; + __u16 noise_constant; + __u16 noise_slope; + __u16 noise_constant2; + __u16 noise_slope2; +} __attribute__((packed)); + +/** + * struct pisp_be_stitch_input_buffer_config - PiSP Back End Stitch input + * @addr: Stitch input buffer address + */ +struct pisp_be_stitch_input_buffer_config { + /* low 32 bits followed by high 32 bits */ + __u32 addr[2]; +} __attribute__((packed)); + +#define PISP_BE_STITCH_STREAMING_LONG 0x8000 +#define PISP_BE_STITCH_EXPOSURE_RATIO_MASK 0x7fff + +/** + * struct pisp_be_stitch_config - PiSP Back End Stitch config + * + * Stitch block configuration + * + * @threshold_lo: Low threshold value + * @threshold_diff_power: Low and high threshold difference + * @pad: Padding bytes + * @exposure_ratio: Multiplier to convert long exposure pixels into + * short exposure pixels + * @motion_threshold_256: Motion threshold above which short exposure + * pixels are used + * @motion_threshold_recip: Reciprocal of motion_threshold_256 value + */ +struct pisp_be_stitch_config { + __u16 threshold_lo; + __u8 threshold_diff_power; + __u8 pad; + + /* top bit indicates whether streaming input is the long exposure */ + __u16 exposure_ratio; + + __u8 motion_threshold_256; + __u8 motion_threshold_recip; +} __attribute__((packed)); + +/** + * struct pisp_be_stitch_output_buffer_config - PiSP Back End Stitch output + * @addr: Stitch input buffer address + */ +struct pisp_be_stitch_output_buffer_config { + /* low 32 bits followed by high 32 bits */ + __u32 addr[2]; +} __attribute__((packed)); + +/** + * struct pisp_be_cdn_config - PiSP Back End CDN config + * + * Colour Denoise configuration + * + * @thresh: Constant for noise estimation + * @iir_strength: Relative strength of the IIR part of the filter + * @g_adjust: Proportion of the change assigned to the G channel + */ +struct pisp_be_cdn_config { + __u16 thresh; + __u8 iir_strength; + __u8 g_adjust; +} __attribute__((packed)); + +#define PISP_BE_LSC_LOG_GRID_SIZE 5 +#define PISP_BE_LSC_GRID_SIZE (1 << PISP_BE_LSC_LOG_GRID_SIZE) +#define PISP_BE_LSC_STEP_PRECISION 18 + +/** + * struct pisp_be_lsc_config - PiSP Back End LSC config + * + * Lens Shading Correction configuration + * + * @grid_step_x: Reciprocal of cell size width + * @grid_step_y: Reciprocal of cell size height + * @lut_packed: Jointly-coded RGB gains for each LSC grid + */ +struct pisp_be_lsc_config { + /* (1<<18) / grid_cell_width */ + __u16 grid_step_x; + /* (1<<18) / grid_cell_height */ + __u16 grid_step_y; + /* RGB gains jointly encoded in 32 bits */ +#define PISP_BE_LSC_LUT_SIZE (PISP_BE_LSC_GRID_SIZE + 1) + __u32 lut_packed[PISP_BE_LSC_LUT_SIZE][PISP_BE_LSC_LUT_SIZE]; +} __attribute__((packed)); + +/** + * struct pisp_be_lsc_extra - PiSP Back End LSC Extra config + * @offset_x: Horizontal offset into the LSC table of this tile + * @offset_y: Vertical offset into the LSC table of this tile + */ +struct pisp_be_lsc_extra { + __u16 offset_x; + __u16 offset_y; +} __attribute__((packed)); + +#define PISP_BE_CAC_LOG_GRID_SIZE 3 +#define PISP_BE_CAC_GRID_SIZE (1 << PISP_BE_CAC_LOG_GRID_SIZE) +#define PISP_BE_CAC_STEP_PRECISION 20 + +/** + * struct pisp_be_cac_config - PiSP Back End CAC config + * + * Chromatic Aberration Correction config + * + * @grid_step_x: Reciprocal of cell size width + * @grid_step_y: Reciprocal of cell size height + * @lut: Pixel shift for the CAC grid + */ +struct pisp_be_cac_config { + /* (1<<20) / grid_cell_width */ + __u16 grid_step_x; + /* (1<<20) / grid_cell_height */ + __u16 grid_step_y; + /* [gridy][gridx][rb][xy] */ +#define PISP_BE_CAC_LUT_SIZE (PISP_BE_CAC_GRID_SIZE + 1) + __s8 lut[PISP_BE_CAC_LUT_SIZE][PISP_BE_CAC_LUT_SIZE][2][2]; +} __attribute__((packed)); + +/** + * struct pisp_be_cac_extra - PiSP Back End CAC extra config + * @offset_x: Horizontal offset into the CAC table of this tile + * @offset_y: Horizontal offset into the CAC table of this tile + */ +struct pisp_be_cac_extra { + __u16 offset_x; + __u16 offset_y; +} __attribute__((packed)); + +#define PISP_BE_DEBIN_NUM_COEFFS 4 + +/** + * struct pisp_be_debin_config - PiSP Back End Debin config + * + * Debinning configuration + * + * @coeffs: Filter coefficients for debinning + * @h_enable: Horizontal debinning enable + * @v_enable: Vertical debinning enable + * @pad: Padding bytes + */ +struct pisp_be_debin_config { + __s8 coeffs[PISP_BE_DEBIN_NUM_COEFFS]; + __s8 h_enable; + __s8 v_enable; + __s8 pad[2]; +} __attribute__((packed)); + +#define PISP_BE_TONEMAP_LUT_SIZE 64 + +/** + * struct pisp_be_tonemap_config - PiSP Back End Tonemap config + * + * Tonemapping configuration + * + * @detail_constant: Constant value for threshold calculation + * @detail_slope: Slope value for threshold calculation + * @iir_strength: Relative strength of the IIR fiter + * @strength: Strength factor + * @lut: Look-up table for tonemap curve + */ +struct pisp_be_tonemap_config { + __u16 detail_constant; + __u16 detail_slope; + __u16 iir_strength; + __u16 strength; + __u32 lut[PISP_BE_TONEMAP_LUT_SIZE]; +} __attribute__((packed)); + +/** + * struct pisp_be_demosaic_config - PiSP Back End Demosaic config + * + * Demosaic configuration + * + * @sharper: Use other Bayer channels to increase sharpness + * @fc_mode: Built-in false colour suppression mode + * @pad: Padding bytes + */ +struct pisp_be_demosaic_config { + __u8 sharper; + __u8 fc_mode; + __u8 pad[2]; +} __attribute__((packed)); + +/** + * struct pisp_be_ccm_config - PiSP Back End CCM config + * + * Colour Correction Matrix configuration + * + * @coeffs: Matrix coefficients + * @pad: Padding bytes + * @offsets: Offsets triplet + */ +struct pisp_be_ccm_config { + __s16 coeffs[9]; + __u8 pad[2]; + __s32 offsets[3]; +} __attribute__((packed)); + +/** + * struct pisp_be_sat_control_config - PiSP Back End SAT config + * + * Saturation Control configuration + * + * @shift_r: Left shift for Red colour channel + * @shift_g: Left shift for Green colour channel + * @shift_b: Left shift for Blue colour channel + * @pad: Padding byte + */ +struct pisp_be_sat_control_config { + __u8 shift_r; + __u8 shift_g; + __u8 shift_b; + __u8 pad; +} __attribute__((packed)); + +/** + * struct pisp_be_false_colour_config - PiSP Back End False Colour config + * + * False Colour configuration + * + * @distance: Distance of neighbouring pixels, either 1 or 2 + * @pad: Padding bytes + */ +struct pisp_be_false_colour_config { + __u8 distance; + __u8 pad[3]; +} __attribute__((packed)); + +#define PISP_BE_SHARPEN_SIZE 5 +#define PISP_BE_SHARPEN_FUNC_NUM_POINTS 9 + +/** + * struct pisp_be_sharpen_config - PiSP Back End Sharpening config + * + * Sharpening configuration + * + * @kernel0: Coefficient for filter 0 + * @pad0: Padding byte + * @kernel1: Coefficient for filter 1 + * @pad1: Padding byte + * @kernel2: Coefficient for filter 2 + * @pad2: Padding byte + * @kernel3: Coefficient for filter 3 + * @pad3: Padding byte + * @kernel4: Coefficient for filter 4 + * @pad4: Padding byte + * @threshold_offset0: Offset for filter 0 response calculation + * @threshold_slope0: Slope multiplier for the filter 0 response calculation + * @scale0: Scale factor for filter 0 response calculation + * @pad5: Padding byte + * @threshold_offset1: Offset for filter 0 response calculation + * @threshold_slope1: Slope multiplier for the filter 0 response calculation + * @scale1: Scale factor for filter 0 response calculation + * @pad6: Padding byte + * @threshold_offset2: Offset for filter 0 response calculation + * @threshold_slope2: Slope multiplier for the filter 0 response calculation + * @scale2: Scale factor for filter 0 response calculation + * @pad7: Padding byte + * @threshold_offset3: Offset for filter 0 response calculation + * @threshold_slope3: Slope multiplier for the filter 0 response calculation + * @scale3: Scale factor for filter 0 response calculation + * @pad8: Padding byte + * @threshold_offset4: Offset for filter 0 response calculation + * @threshold_slope4: Slope multiplier for the filter 0 response calculation + * @scale4: Scale factor for filter 0 response calculation + * @pad9: Padding byte + * @positive_strength: Factor to scale the positive sharpening strength + * @positive_pre_limit: Maximum allowed possible positive sharpening value + * @positive_func: Gain factor applied to positive sharpening response + * @positive_limit: Final gain factor applied to positive sharpening + * @negative_strength: Factor to scale the negative sharpening strength + * @negative_pre_limit: Maximum allowed possible negative sharpening value + * @negative_func: Gain factor applied to negative sharpening response + * @negative_limit: Final gain factor applied to negative sharpening + * @enables: Filter enable mask + * @white: White output pixel filter mask + * @black: Black output pixel filter mask + * @grey: Grey output pixel filter mask + */ +struct pisp_be_sharpen_config { + __s8 kernel0[PISP_BE_SHARPEN_SIZE * PISP_BE_SHARPEN_SIZE]; + __s8 pad0[3]; + __s8 kernel1[PISP_BE_SHARPEN_SIZE * PISP_BE_SHARPEN_SIZE]; + __s8 pad1[3]; + __s8 kernel2[PISP_BE_SHARPEN_SIZE * PISP_BE_SHARPEN_SIZE]; + __s8 pad2[3]; + __s8 kernel3[PISP_BE_SHARPEN_SIZE * PISP_BE_SHARPEN_SIZE]; + __s8 pad3[3]; + __s8 kernel4[PISP_BE_SHARPEN_SIZE * PISP_BE_SHARPEN_SIZE]; + __s8 pad4[3]; + __u16 threshold_offset0; + __u16 threshold_slope0; + __u16 scale0; + __u16 pad5; + __u16 threshold_offset1; + __u16 threshold_slope1; + __u16 scale1; + __u16 pad6; + __u16 threshold_offset2; + __u16 threshold_slope2; + __u16 scale2; + __u16 pad7; + __u16 threshold_offset3; + __u16 threshold_slope3; + __u16 scale3; + __u16 pad8; + __u16 threshold_offset4; + __u16 threshold_slope4; + __u16 scale4; + __u16 pad9; + __u16 positive_strength; + __u16 positive_pre_limit; + __u16 positive_func[PISP_BE_SHARPEN_FUNC_NUM_POINTS]; + __u16 positive_limit; + __u16 negative_strength; + __u16 negative_pre_limit; + __u16 negative_func[PISP_BE_SHARPEN_FUNC_NUM_POINTS]; + __u16 negative_limit; + __u8 enables; + __u8 white; + __u8 black; + __u8 grey; +} __attribute__((packed)); + +/** + * struct pisp_be_sh_fc_combine_config - PiSP Back End Sharpening and + * False Colour config + * + * Sharpening and False Colour configuration + * + * @y_factor: Control amount of desaturation of pixels being darkened + * @c1_factor: Control amount of brightening of a pixel for the Cb + * channel + * @c2_factor: Control amount of brightening of a pixel for the Cr + * channel + * @pad: Padding byte + */ +struct pisp_be_sh_fc_combine_config { + __u8 y_factor; + __u8 c1_factor; + __u8 c2_factor; + __u8 pad; +} __attribute__((packed)); + +#define PISP_BE_GAMMA_LUT_SIZE 64 + +/** + * struct pisp_be_gamma_config - PiSP Back End Gamma configuration + * @lut: Gamma curve look-up table + */ +struct pisp_be_gamma_config { + __u32 lut[PISP_BE_GAMMA_LUT_SIZE]; +} __attribute__((packed)); + +/** + * struct pisp_be_crop_config - PiSP Back End Crop config + * + * Crop configuration + * + * @offset_x: Number of pixels cropped from the left of the tile + * @offset_y: Number of pixels cropped from the top of the tile + * @width: Width of the cropped tile output + * @height: Height of the cropped tile output + */ +struct pisp_be_crop_config { + __u16 offset_x, offset_y; + __u16 width, height; +} __attribute__((packed)); + +#define PISP_BE_RESAMPLE_FILTER_SIZE 96 + +/** + * struct pisp_be_resample_config - PiSP Back End Resampling config + * + * Resample configuration + * + * @scale_factor_h: Horizontal scale factor + * @scale_factor_v: Vertical scale factor + * @coef: Resample coefficients + */ +struct pisp_be_resample_config { + __u16 scale_factor_h, scale_factor_v; + __s16 coef[PISP_BE_RESAMPLE_FILTER_SIZE]; +} __attribute__((packed)); + +/** + * struct pisp_be_resample_extra - PiSP Back End Resample config + * + * Resample configuration + * + * @scaled_width: Width in pixels of the scaled output + * @scaled_height: Height in pixels of the scaled output + * @initial_phase_h: Initial horizontal phase + * @initial_phase_v: Initial vertical phase + */ +struct pisp_be_resample_extra { + __u16 scaled_width; + __u16 scaled_height; + __s16 initial_phase_h[3]; + __s16 initial_phase_v[3]; +} __attribute__((packed)); + +/** + * struct pisp_be_downscale_config - PiSP Back End Downscale config + * + * Downscale configuration + * + * @scale_factor_h: Horizontal scale factor + * @scale_factor_v: Vertical scale factor + * @scale_recip_h: Horizontal reciprocal factor + * @scale_recip_v: Vertical reciprocal factor + */ +struct pisp_be_downscale_config { + __u16 scale_factor_h; + __u16 scale_factor_v; + __u16 scale_recip_h; + __u16 scale_recip_v; +} __attribute__((packed)); + +/** + * struct pisp_be_downscale_extra - PiSP Back End Downscale Extra config + * @scaled_width: Scaled image width + * @scaled_height: Scaled image height + */ +struct pisp_be_downscale_extra { + __u16 scaled_width; + __u16 scaled_height; +} __attribute__((packed)); + +/** + * struct pisp_be_hog_config - PiSP Back End HOG config + * + * Histogram of Oriented Gradients configuration + * + * @compute_signed: Set 0 for unsigned gradients, 1 for signed + * @channel_mix: Channels proportions to use + * @stride: Stride in bytes between blocks directly below + */ +struct pisp_be_hog_config { + __u8 compute_signed; + __u8 channel_mix[3]; + __u32 stride; +} __attribute__((packed)); + +struct pisp_be_axi_config { + __u8 r_qos; /* Read QoS */ + __u8 r_cache_prot; /* Read { prot[2:0], cache[3:0] } */ + __u8 w_qos; /* Write QoS */ + __u8 w_cache_prot; /* Write { prot[2:0], cache[3:0] } */ +} __attribute__((packed)); + +/** + * enum pisp_be_transform - PiSP Back End Transform flags + * @PISP_BE_TRANSFORM_NONE: No transform + * @PISP_BE_TRANSFORM_HFLIP: Horizontal flip + * @PISP_BE_TRANSFORM_VFLIP: Vertical flip + * @PISP_BE_TRANSFORM_ROT180: 180 degress rotation + */ +enum pisp_be_transform { + PISP_BE_TRANSFORM_NONE = 0x0, + PISP_BE_TRANSFORM_HFLIP = 0x1, + PISP_BE_TRANSFORM_VFLIP = 0x2, + PISP_BE_TRANSFORM_ROT180 = + (PISP_BE_TRANSFORM_HFLIP | PISP_BE_TRANSFORM_VFLIP) +}; + +struct pisp_be_output_format_config { + struct pisp_image_format_config image; + __u8 transform; + __u8 pad[3]; + __u16 lo; + __u16 hi; + __u16 lo2; + __u16 hi2; +} __attribute__((packed)); + +/** + * struct pisp_be_output_buffer_config - PiSP Back End Output buffer + * @addr: Output buffer address + */ +struct pisp_be_output_buffer_config { + /* low 32 bits followed by high 32 bits (for each of 3 planes) */ + __u32 addr[3][2]; +} __attribute__((packed)); + +/** + * struct pisp_be_hog_buffer_config - PiSP Back End HOG buffer + * @addr: HOG buffer address + */ +struct pisp_be_hog_buffer_config { + /* low 32 bits followed by high 32 bits */ + __u32 addr[2]; +} __attribute__((packed)); + +/** + * struct pisp_be_config - RaspberryPi PiSP Back End Processing configuration + * + * @global: Global PiSP configuration + * @input_format: Input image format + * @decompress: Decompress configuration + * @dpc: Defective Pixel Correction configuration + * @geq: Green Equalisation configuration + * @tdn_input_format: Temporal Denoise input format + * @tdn_decompress: Temporal Denoise decompress configuration + * @tdn: Temporal Denoise configuration + * @tdn_compress: Temporal Denoise compress configuration + * @tdn_output_format: Temporal Denoise output format + * @sdn: Spatial Denoise configuration + * @blc: Black Level Correction configuration + * @stitch_compress: Stitch compress configuration + * @stitch_output_format: Stitch output format + * @stitch_input_format: Stitch input format + * @stitch_decompress: Stitch decompress configuration + * @stitch: Stitch configuration + * @lsc: Lens Shading Correction configuration + * @wbg: White Balance Gain configuration + * @cdn: Colour Denoise configuration + * @cac: Colour Aberration Correction configuration + * @debin: Debinning configuration + * @tonemap: Tonemapping configuration + * @demosaic: Demosaicing configuration + * @ccm: Colour Correction Matrix configuration + * @sat_control: Saturation Control configuration + * @ycbcr: YCbCr colour correction configuration + * @sharpen: Sharpening configuration + * @false_colour: False colour correction + * @sh_fc_combine: Sharpening and False Colour correction + * @ycbcr_inverse: Inverse YCbCr colour correction + * @gamma: Gamma curve configuration + * @csc: Color Space Conversion configuration + * @downscale: Downscale configuration + * @resample: Resampling configuration + * @output_format: Output format configuration + * @hog: HOG configuration + */ +struct pisp_be_config { + struct pisp_be_global_config global; + struct pisp_image_format_config input_format; + struct pisp_decompress_config decompress; + struct pisp_be_dpc_config dpc; + struct pisp_be_geq_config geq; + struct pisp_image_format_config tdn_input_format; + struct pisp_decompress_config tdn_decompress; + struct pisp_be_tdn_config tdn; + struct pisp_compress_config tdn_compress; + struct pisp_image_format_config tdn_output_format; + struct pisp_be_sdn_config sdn; + struct pisp_bla_config blc; + struct pisp_compress_config stitch_compress; + struct pisp_image_format_config stitch_output_format; + struct pisp_image_format_config stitch_input_format; + struct pisp_decompress_config stitch_decompress; + struct pisp_be_stitch_config stitch; + struct pisp_be_lsc_config lsc; + struct pisp_wbg_config wbg; + struct pisp_be_cdn_config cdn; + struct pisp_be_cac_config cac; + struct pisp_be_debin_config debin; + struct pisp_be_tonemap_config tonemap; + struct pisp_be_demosaic_config demosaic; + struct pisp_be_ccm_config ccm; + struct pisp_be_sat_control_config sat_control; + struct pisp_be_ccm_config ycbcr; + struct pisp_be_sharpen_config sharpen; + struct pisp_be_false_colour_config false_colour; + struct pisp_be_sh_fc_combine_config sh_fc_combine; + struct pisp_be_ccm_config ycbcr_inverse; + struct pisp_be_gamma_config gamma; + struct pisp_be_ccm_config csc[PISP_BACK_END_NUM_OUTPUTS]; + struct pisp_be_downscale_config downscale[PISP_BACK_END_NUM_OUTPUTS]; + struct pisp_be_resample_config resample[PISP_BACK_END_NUM_OUTPUTS]; + struct pisp_be_output_format_config + output_format[PISP_BACK_END_NUM_OUTPUTS]; + struct pisp_be_hog_config hog; +} __attribute__((packed)); + +/** + * enum pisp_tile_edge - PiSP Back End Tile position + * @PISP_LEFT_EDGE: Left edge tile + * @PISP_RIGHT_EDGE: Right edge tile + * @PISP_TOP_EDGE: Top edge tile + * @PISP_BOTTOM_EDGE: Bottom edge tile + */ +enum pisp_tile_edge { + PISP_LEFT_EDGE = (1 << 0), + PISP_RIGHT_EDGE = (1 << 1), + PISP_TOP_EDGE = (1 << 2), + PISP_BOTTOM_EDGE = (1 << 3) +}; + +/** + * struct pisp_tile - Raspberry Pi PiSP Back End tile configuration + * + * Tile parameters: each set of tile parameters is a 160-bytes block of data + * which contains the tile processing parameters. + * + * @edge: Edge tile flag + * @pad0: Padding bytes + * @input_addr_offset: Top-left pixel offset, in bytes + * @input_addr_offset2: Top-left pixel offset, in bytes for the second/ + * third image planes + * @input_offset_x: Horizontal offset in pixels of this tile in the + * input image + * @input_offset_y: Vertical offset in pixels of this tile in the + * input image + * @input_width: Width in pixels of this tile + * @input_height: Height in pixels of the this tile + * @tdn_input_addr_offset: TDN input image offset, in bytes + * @tdn_output_addr_offset: TDN output image offset, in bytes + * @stitch_input_addr_offset: Stitch input image offset, in bytes + * @stitch_output_addr_offset: Stitch output image offset, in bytes + * @lsc_grid_offset_x: Horizontal offset in the LSC table for this tile + * @lsc_grid_offset_y: Vertical offset in the LSC table for this tile + * @cac_grid_offset_x: Horizontal offset in the CAC table for this tile + * @cac_grid_offset_y: Horizontal offset in the CAC table for this tile + * @crop_x_start: Number of pixels cropped from the left of the + * tile + * @crop_x_end: Number of pixels cropped from the right of the + * tile + * @crop_y_start: Number of pixels cropped from the top of the + * tile + * @crop_y_end: Number of pixels cropped from the bottom of the + * tile + * @downscale_phase_x: Initial horizontal phase in pixels + * @downscale_phase_y: Initial vertical phase in pixels + * @resample_in_width: Width in pixels of the tile entering the + * Resample block + * @resample_in_height: Height in pixels of the tile entering the + * Resample block + * @resample_phase_x: Initial horizontal phase for the Resample block + * @resample_phase_y: Initial vertical phase for the Resample block + * @output_offset_x: Horizontal offset in pixels where the tile will + * be written into the output image + * @output_offset_y: Vertical offset in pixels where the tile will be + * written into the output image + * @output_width: Width in pixels in the output image of this tile + * @output_height: Height in pixels in the output image of this tile + * @output_addr_offset: Offset in bytes into the output buffer + * @output_addr_offset2: Offset in bytes into the output buffer for the + * second and third plane + * @output_hog_addr_offset: Offset in bytes into the HOG buffer where + * results of this tile are to be written + */ +struct pisp_tile { + __u8 edge; /* enum pisp_tile_edge */ + __u8 pad0[3]; + /* 4 bytes */ + __u32 input_addr_offset; + __u32 input_addr_offset2; + __u16 input_offset_x; + __u16 input_offset_y; + __u16 input_width; + __u16 input_height; + /* 20 bytes */ + __u32 tdn_input_addr_offset; + __u32 tdn_output_addr_offset; + __u32 stitch_input_addr_offset; + __u32 stitch_output_addr_offset; + /* 36 bytes */ + __u32 lsc_grid_offset_x; + __u32 lsc_grid_offset_y; + /* 44 bytes */ + __u32 cac_grid_offset_x; + __u32 cac_grid_offset_y; + /* 52 bytes */ + __u16 crop_x_start[PISP_BACK_END_NUM_OUTPUTS]; + __u16 crop_x_end[PISP_BACK_END_NUM_OUTPUTS]; + __u16 crop_y_start[PISP_BACK_END_NUM_OUTPUTS]; + __u16 crop_y_end[PISP_BACK_END_NUM_OUTPUTS]; + /* 68 bytes */ + /* Ordering is planes then branches */ + __u16 downscale_phase_x[3 * PISP_BACK_END_NUM_OUTPUTS]; + __u16 downscale_phase_y[3 * PISP_BACK_END_NUM_OUTPUTS]; + /* 92 bytes */ + __u16 resample_in_width[PISP_BACK_END_NUM_OUTPUTS]; + __u16 resample_in_height[PISP_BACK_END_NUM_OUTPUTS]; + /* 100 bytes */ + /* Ordering is planes then branches */ + __u16 resample_phase_x[3 * PISP_BACK_END_NUM_OUTPUTS]; + __u16 resample_phase_y[3 * PISP_BACK_END_NUM_OUTPUTS]; + /* 124 bytes */ + __u16 output_offset_x[PISP_BACK_END_NUM_OUTPUTS]; + __u16 output_offset_y[PISP_BACK_END_NUM_OUTPUTS]; + __u16 output_width[PISP_BACK_END_NUM_OUTPUTS]; + __u16 output_height[PISP_BACK_END_NUM_OUTPUTS]; + /* 140 bytes */ + __u32 output_addr_offset[PISP_BACK_END_NUM_OUTPUTS]; + __u32 output_addr_offset2[PISP_BACK_END_NUM_OUTPUTS]; + /* 156 bytes */ + __u32 output_hog_addr_offset; + /* 160 bytes */ +} __attribute__((packed)); + +/** + * struct pisp_be_tiles_config - Raspberry Pi PiSP Back End configuration + * @tiles: Tile descriptors + * @num_tiles: Number of tiles + * @config: PiSP Back End configuration + */ +struct pisp_be_tiles_config { + struct pisp_tile tiles[PISP_BACK_END_NUM_TILES]; + __u32 num_tiles; + struct pisp_be_config config; +} __attribute__((packed)); + +#endif /* _UAPI_PISP_BE_CONFIG_H_ */ diff --git a/include/uapi/linux/media/raspberrypi/pisp_common.h b/include/uapi/linux/media/raspberrypi/pisp_common.h new file mode 100644 index 000000000000..b2522e29c976 --- /dev/null +++ b/include/uapi/linux/media/raspberrypi/pisp_common.h @@ -0,0 +1,199 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * RP1 PiSP common definitions. + * + * Copyright (C) 2021 - Raspberry Pi Ltd. + * + */ +#ifndef _UAPI_PISP_COMMON_H_ +#define _UAPI_PISP_COMMON_H_ + +#include + +struct pisp_image_format_config { + /* size in pixels */ + __u16 width; + __u16 height; + /* must match struct pisp_image_format below */ + __u32 format; + __s32 stride; + /* some planar image formats will need a second stride */ + __s32 stride2; +} __attribute__((packed)); + +enum pisp_bayer_order { + /* + * Note how bayer_order&1 tells you if G is on the even pixels of the + * checkerboard or not, and bayer_order&2 tells you if R is on the even + * rows or is swapped with B. Note that if the top (of the 8) bits is + * set, this denotes a monochrome or greyscale image, and the lower bits + * should all be ignored. + */ + PISP_BAYER_ORDER_RGGB = 0, + PISP_BAYER_ORDER_GBRG = 1, + PISP_BAYER_ORDER_BGGR = 2, + PISP_BAYER_ORDER_GRBG = 3, + PISP_BAYER_ORDER_GREYSCALE = 128 +}; + +enum pisp_image_format { + /* + * Precise values are mostly tbd. Generally these will be portmanteau + * values comprising bit fields and flags. This format must be shared + * throughout the PiSP. + */ + PISP_IMAGE_FORMAT_BPS_8 = 0x00000000, + PISP_IMAGE_FORMAT_BPS_10 = 0x00000001, + PISP_IMAGE_FORMAT_BPS_12 = 0x00000002, + PISP_IMAGE_FORMAT_BPS_16 = 0x00000003, + PISP_IMAGE_FORMAT_BPS_MASK = 0x00000003, + + PISP_IMAGE_FORMAT_PLANARITY_INTERLEAVED = 0x00000000, + PISP_IMAGE_FORMAT_PLANARITY_SEMI_PLANAR = 0x00000010, + PISP_IMAGE_FORMAT_PLANARITY_PLANAR = 0x00000020, + PISP_IMAGE_FORMAT_PLANARITY_MASK = 0x00000030, + + PISP_IMAGE_FORMAT_SAMPLING_444 = 0x00000000, + PISP_IMAGE_FORMAT_SAMPLING_422 = 0x00000100, + PISP_IMAGE_FORMAT_SAMPLING_420 = 0x00000200, + PISP_IMAGE_FORMAT_SAMPLING_MASK = 0x00000300, + + PISP_IMAGE_FORMAT_ORDER_NORMAL = 0x00000000, + PISP_IMAGE_FORMAT_ORDER_SWAPPED = 0x00001000, + + PISP_IMAGE_FORMAT_SHIFT_0 = 0x00000000, + PISP_IMAGE_FORMAT_SHIFT_1 = 0x00010000, + PISP_IMAGE_FORMAT_SHIFT_2 = 0x00020000, + PISP_IMAGE_FORMAT_SHIFT_3 = 0x00030000, + PISP_IMAGE_FORMAT_SHIFT_4 = 0x00040000, + PISP_IMAGE_FORMAT_SHIFT_5 = 0x00050000, + PISP_IMAGE_FORMAT_SHIFT_6 = 0x00060000, + PISP_IMAGE_FORMAT_SHIFT_7 = 0x00070000, + PISP_IMAGE_FORMAT_SHIFT_8 = 0x00080000, + PISP_IMAGE_FORMAT_SHIFT_MASK = 0x000f0000, + + PISP_IMAGE_FORMAT_UNCOMPRESSED = 0x00000000, + PISP_IMAGE_FORMAT_COMPRESSION_MODE_1 = 0x01000000, + PISP_IMAGE_FORMAT_COMPRESSION_MODE_2 = 0x02000000, + PISP_IMAGE_FORMAT_COMPRESSION_MODE_3 = 0x03000000, + PISP_IMAGE_FORMAT_COMPRESSION_MASK = 0x03000000, + + PISP_IMAGE_FORMAT_HOG_SIGNED = 0x04000000, + PISP_IMAGE_FORMAT_HOG_UNSIGNED = 0x08000000, + PISP_IMAGE_FORMAT_INTEGRAL_IMAGE = 0x10000000, + PISP_IMAGE_FORMAT_WALLPAPER_ROLL = 0x20000000, + PISP_IMAGE_FORMAT_THREE_CHANNEL = 0x40000000, + + /* Lastly a few specific instantiations of the above. */ + PISP_IMAGE_FORMAT_SINGLE_16 = PISP_IMAGE_FORMAT_BPS_16, + PISP_IMAGE_FORMAT_THREE_16 = PISP_IMAGE_FORMAT_BPS_16 | + PISP_IMAGE_FORMAT_THREE_CHANNEL +}; + +#define PISP_IMAGE_FORMAT_bps_8(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_8) +#define PISP_IMAGE_FORMAT_bps_10(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_10) +#define PISP_IMAGE_FORMAT_bps_12(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_12) +#define PISP_IMAGE_FORMAT_bps_16(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_16) +#define PISP_IMAGE_FORMAT_bps(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) ? \ + 8 + (2 << (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) - 1)) : 8) +#define PISP_IMAGE_FORMAT_shift(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_SHIFT_MASK) / PISP_IMAGE_FORMAT_SHIFT_1) +#define PISP_IMAGE_FORMAT_three_channel(fmt) \ + ((fmt) & PISP_IMAGE_FORMAT_THREE_CHANNEL) +#define PISP_IMAGE_FORMAT_single_channel(fmt) \ + (!((fmt) & PISP_IMAGE_FORMAT_THREE_CHANNEL)) +#define PISP_IMAGE_FORMAT_compressed(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_COMPRESSION_MASK) != \ + PISP_IMAGE_FORMAT_UNCOMPRESSED) +#define PISP_IMAGE_FORMAT_sampling_444(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_SAMPLING_MASK) == \ + PISP_IMAGE_FORMAT_SAMPLING_444) +#define PISP_IMAGE_FORMAT_sampling_422(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_SAMPLING_MASK) == \ + PISP_IMAGE_FORMAT_SAMPLING_422) +#define PISP_IMAGE_FORMAT_sampling_420(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_SAMPLING_MASK) == \ + PISP_IMAGE_FORMAT_SAMPLING_420) +#define PISP_IMAGE_FORMAT_order_normal(fmt) \ + (!((fmt) & PISP_IMAGE_FORMAT_ORDER_SWAPPED)) +#define PISP_IMAGE_FORMAT_order_swapped(fmt) \ + ((fmt) & PISP_IMAGE_FORMAT_ORDER_SWAPPED) +#define PISP_IMAGE_FORMAT_interleaved(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_PLANARITY_MASK) == \ + PISP_IMAGE_FORMAT_PLANARITY_INTERLEAVED) +#define PISP_IMAGE_FORMAT_semiplanar(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_PLANARITY_MASK) == \ + PISP_IMAGE_FORMAT_PLANARITY_SEMI_PLANAR) +#define PISP_IMAGE_FORMAT_planar(fmt) \ + (((fmt) & PISP_IMAGE_FORMAT_PLANARITY_MASK) == \ + PISP_IMAGE_FORMAT_PLANARITY_PLANAR) +#define PISP_IMAGE_FORMAT_wallpaper(fmt) \ + ((fmt) & PISP_IMAGE_FORMAT_WALLPAPER_ROLL) +#define PISP_IMAGE_FORMAT_HOG(fmt) \ + ((fmt) & \ + (PISP_IMAGE_FORMAT_HOG_SIGNED | PISP_IMAGE_FORMAT_HOG_UNSIGNED)) + +#define PISP_WALLPAPER_WIDTH 128 /* in bytes */ + +struct pisp_bla_config { + __u16 black_level_r; + __u16 black_level_gr; + __u16 black_level_gb; + __u16 black_level_b; + __u16 output_black_level; + __u8 pad[2]; +} __attribute__((packed)); + +struct pisp_wbg_config { + __u16 gain_r; + __u16 gain_g; + __u16 gain_b; + __u8 pad[2]; +} __attribute__((packed)); + +struct pisp_compress_config { + /* value subtracted from incoming data */ + __u16 offset; + __u8 pad; + /* 1 => Companding; 2 => Delta (recommended); 3 => Combined (for HDR) */ + __u8 mode; +} __attribute__((packed)); + +struct pisp_decompress_config { + /* value added to reconstructed data */ + __u16 offset; + __u8 pad; + /* 1 => Companding; 2 => Delta (recommended); 3 => Combined (for HDR) */ + __u8 mode; +} __attribute__((packed)); + +enum pisp_axi_flags { + /* + * round down bursts to end at a 32-byte boundary, to align following + * bursts + */ + PISP_AXI_FLAG_ALIGN = 128, + /* for FE writer: force WSTRB high, to pad output to 16-byte boundary */ + PISP_AXI_FLAG_PAD = 64, + /* for FE writer: Use Output FIFO level to trigger "panic" */ + PISP_AXI_FLAG_PANIC = 32, +}; + +struct pisp_axi_config { + /* + * burst length minus one, which must be in the range 0:15; OR'd with + * flags + */ + __u8 maxlen_flags; + /* { prot[2:0], cache[3:0] } fields, echoed on AXI bus */ + __u8 cache_prot; + /* QoS field(s) (4x4 bits for FE writer; 4 bits for other masters) */ + __u16 qos; +} __attribute__((packed)); + +#endif /* _UAPI_PISP_COMMON_H_ */ -- cgit v1.2.3 From 8f6c2202222faffa0959929d8cd9090254a60831 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Wed, 26 Jun 2024 20:14:34 +0200 Subject: media: uapi: Add meta pixel format for PiSP BE config Add format description for the PiSP Back End configuration parameter buffer. Signed-off-by: Jacopo Mondi Reviewed-by: Naushir Patuck Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index dd6876380fe3..96fc0456081e 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -843,6 +843,9 @@ struct v4l2_pix_format { #define V4L2_META_FMT_RK_ISP1_PARAMS v4l2_fourcc('R', 'K', '1', 'P') /* Rockchip ISP1 3A Parameters */ #define V4L2_META_FMT_RK_ISP1_STAT_3A v4l2_fourcc('R', 'K', '1', 'S') /* Rockchip ISP1 3A Statistics */ +/* Vendor specific - used for RaspberryPi PiSP */ +#define V4L2_META_FMT_RPI_BE_CFG v4l2_fourcc('R', 'P', 'B', 'C') /* PiSP BE configuration */ + #ifdef __KERNEL__ /* * Line-based metadata formats. Remember to update v4l_fill_fmtdesc() when -- cgit v1.2.3 From d260c1224786c870edbae09ef6ecf5f35361821e Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Wed, 26 Jun 2024 20:14:35 +0200 Subject: media: uapi: Add PiSP Compressed RAW Bayer formats Add Raspberry Pi compressed RAW Bayer formats. The compression algorithm description is provided by Nick Hollinghurst from Raspberry Pi. Signed-off-by: Jacopo Mondi Reviewed-by: Naushir Patuck Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/uapi/linux/videodev2.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 96fc0456081e..4e91362da6da 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -816,6 +816,18 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_IPU3_SGRBG10 v4l2_fourcc('i', 'p', '3', 'G') /* IPU3 packed 10-bit GRBG bayer */ #define V4L2_PIX_FMT_IPU3_SRGGB10 v4l2_fourcc('i', 'p', '3', 'r') /* IPU3 packed 10-bit RGGB bayer */ +/* Raspberry Pi PiSP compressed formats. */ +#define V4L2_PIX_FMT_PISP_COMP1_RGGB v4l2_fourcc('P', 'C', '1', 'R') /* PiSP 8-bit mode 1 compressed RGGB bayer */ +#define V4L2_PIX_FMT_PISP_COMP1_GRBG v4l2_fourcc('P', 'C', '1', 'G') /* PiSP 8-bit mode 1 compressed GRBG bayer */ +#define V4L2_PIX_FMT_PISP_COMP1_GBRG v4l2_fourcc('P', 'C', '1', 'g') /* PiSP 8-bit mode 1 compressed GBRG bayer */ +#define V4L2_PIX_FMT_PISP_COMP1_BGGR v4l2_fourcc('P', 'C', '1', 'B') /* PiSP 8-bit mode 1 compressed BGGR bayer */ +#define V4L2_PIX_FMT_PISP_COMP1_MONO v4l2_fourcc('P', 'C', '1', 'M') /* PiSP 8-bit mode 1 compressed monochrome */ +#define V4L2_PIX_FMT_PISP_COMP2_RGGB v4l2_fourcc('P', 'C', '2', 'R') /* PiSP 8-bit mode 2 compressed RGGB bayer */ +#define V4L2_PIX_FMT_PISP_COMP2_GRBG v4l2_fourcc('P', 'C', '2', 'G') /* PiSP 8-bit mode 2 compressed GRBG bayer */ +#define V4L2_PIX_FMT_PISP_COMP2_GBRG v4l2_fourcc('P', 'C', '2', 'g') /* PiSP 8-bit mode 2 compressed GBRG bayer */ +#define V4L2_PIX_FMT_PISP_COMP2_BGGR v4l2_fourcc('P', 'C', '2', 'B') /* PiSP 8-bit mode 2 compressed BGGR bayer */ +#define V4L2_PIX_FMT_PISP_COMP2_MONO v4l2_fourcc('P', 'C', '2', 'M') /* PiSP 8-bit mode 2 compressed monochrome */ + /* SDR formats - used only for Software Defined Radio devices */ #define V4L2_SDR_FMT_CU8 v4l2_fourcc('C', 'U', '0', '8') /* IQ u8 */ #define V4L2_SDR_FMT_CU16LE v4l2_fourcc('C', 'U', '1', '6') /* IQ u16le */ -- cgit v1.2.3 From 7e1f4eb9a60d40dd17a97d9b76818682a024a127 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Apr 2024 12:04:54 +0200 Subject: kallsyms: rework symbol lookup return codes Building with W=1 in some configurations produces a false positive warning for kallsyms: kernel/kallsyms.c: In function '__sprint_symbol.isra': kernel/kallsyms.c:503:17: error: 'strcpy' source argument is the same as destination [-Werror=restrict] 503 | strcpy(buffer, name); | ^~~~~~~~~~~~~~~~~~~~ This originally showed up while building with -O3, but later started happening in other configurations as well, depending on inlining decisions. The underlying issue is that the local 'name' variable is always initialized to the be the same as 'buffer' in the called functions that fill the buffer, which gcc notices while inlining, though it could see that the address check always skips the copy. The calling conventions here are rather unusual, as all of the internal lookup functions (bpf_address_lookup, ftrace_mod_address_lookup, ftrace_func_address_lookup, module_address_lookup and kallsyms_lookup_buildid) already use the provided buffer and either return the address of that buffer to indicate success, or NULL for failure, but the callers are written to also expect an arbitrary other buffer to be returned. Rework the calling conventions to return the length of the filled buffer instead of its address, which is simpler and easier to follow as well as avoiding the warning. Leave only the kallsyms_lookup() calling conventions unchanged, since that is called from 16 different functions and adapting this would be a much bigger change. Link: https://lore.kernel.org/lkml/20200107214042.855757-1-arnd@arndb.de/ Link: https://lore.kernel.org/lkml/20240326130647.7bfb1d92@gandalf.local.home/ Tested-by: Geert Uytterhoeven Reviewed-by: Luis Chamberlain Acked-by: Steven Rostedt (Google) Signed-off-by: Arnd Bergmann --- include/linux/filter.h | 14 +++++++------- include/linux/ftrace.h | 6 +++--- include/linux/module.h | 14 +++++++------- 3 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 0f12cf01070e..5669da513cd7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1208,18 +1208,18 @@ static inline bool bpf_jit_kallsyms_enabled(void) return false; } -const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, +int __bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char *sym); bool is_bpf_text_address(unsigned long addr); int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *sym); struct bpf_prog *bpf_prog_ksym_find(unsigned long addr); -static inline const char * +static inline int bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym) { - const char *ret = __bpf_address_lookup(addr, size, off, sym); + int ret = __bpf_address_lookup(addr, size, off, sym); if (ret && modname) *modname = NULL; @@ -1263,11 +1263,11 @@ static inline bool bpf_jit_kallsyms_enabled(void) return false; } -static inline const char * +static inline int __bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char *sym) { - return NULL; + return 0; } static inline bool is_bpf_text_address(unsigned long addr) @@ -1286,11 +1286,11 @@ static inline struct bpf_prog *bpf_prog_ksym_find(unsigned long addr) return NULL; } -static inline const char * +static inline int bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym) { - return NULL; + return 0; } static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 800995c425e0..b792274189a3 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -86,15 +86,15 @@ struct ftrace_hash; #if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_MODULES) && \ defined(CONFIG_DYNAMIC_FTRACE) -const char * +int ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym); #else -static inline const char * +static inline int ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym) { - return NULL; + return 0; } #endif diff --git a/include/linux/module.h b/include/linux/module.h index ffa1c603163c..330ffb59efe5 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -931,11 +931,11 @@ int module_kallsyms_on_each_symbol(const char *modname, * least KSYM_NAME_LEN long: a pointer to namebuf is returned if * found, otherwise NULL. */ -const char *module_address_lookup(unsigned long addr, - unsigned long *symbolsize, - unsigned long *offset, - char **modname, const unsigned char **modbuildid, - char *namebuf); +int module_address_lookup(unsigned long addr, + unsigned long *symbolsize, + unsigned long *offset, + char **modname, const unsigned char **modbuildid, + char *namebuf); int lookup_module_symbol_name(unsigned long addr, char *symname); int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, @@ -964,14 +964,14 @@ static inline int module_kallsyms_on_each_symbol(const char *modname, } /* For kallsyms to ask for address resolution. NULL means not found. */ -static inline const char *module_address_lookup(unsigned long addr, +static inline int module_address_lookup(unsigned long addr, unsigned long *symbolsize, unsigned long *offset, char **modname, const unsigned char **modbuildid, char *namebuf) { - return NULL; + return 0; } static inline int lookup_module_symbol_name(unsigned long addr, char *symname) -- cgit v1.2.3 From 1bc6d4452d5c91beb09e37a98a590808e1997b79 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 30 Apr 2024 13:54:46 +0200 Subject: fs: new helper vfs_empty_path() Make it possible to quickly check whether AT_EMPTY_PATH is valid. Note, after some discussion we decided to also allow NULL to be passed instead of requiring the empty string. Signed-off-by: Christian Brauner --- include/linux/fs.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 942cb11dba96..5ff362277834 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3631,4 +3631,21 @@ extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len, extern int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice); +static inline bool vfs_empty_path(int dfd, const char __user *path) +{ + char c; + + if (dfd < 0) + return false; + + /* We now allow NULL to be used for empty path. */ + if (!path) + return true; + + if (unlikely(get_user(c, path))) + return false; + + return !c; +} + #endif /* _LINUX_FS_H */ -- cgit v1.2.3 From f378ec4eec8b7e607dbc0112913fd3d5d84eb1b8 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Thu, 27 Jun 2024 18:11:52 +0200 Subject: vfs: rename parent_ino to d_parent_ino and make it use RCU The routine is used by procfs through dir_emit_dots. The combined RCU and lock fallback implementation is too big for an inline. Given that the routine takes a dentry argument fs/dcache.c seems like the place to put it in. Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/r/20240627161152.802567-1-mjguzik@gmail.com Signed-off-by: Christian Brauner --- include/linux/dcache.h | 2 ++ include/linux/fs.h | 16 +--------------- 2 files changed, 3 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index bf53e3894aae..ea58843942b9 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -278,6 +278,8 @@ static inline unsigned d_count(const struct dentry *dentry) return dentry->d_lockref.count; } +ino_t d_parent_ino(struct dentry *dentry); + /* * helper function for dentry_operations.d_dname() members */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 5ff362277834..2fa06a4d197a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3454,20 +3454,6 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) return 0; } -static inline ino_t parent_ino(struct dentry *dentry) -{ - ino_t res; - - /* - * Don't strictly need d_lock here? If the parent ino could change - * then surely we'd have a deeper race in the caller? - */ - spin_lock(&dentry->d_lock); - res = dentry->d_parent->d_inode->i_ino; - spin_unlock(&dentry->d_lock); - return res; -} - /* Transaction based IO helpers */ /* @@ -3592,7 +3578,7 @@ static inline bool dir_emit_dot(struct file *file, struct dir_context *ctx) static inline bool dir_emit_dotdot(struct file *file, struct dir_context *ctx) { return ctx->actor(ctx, "..", 2, ctx->pos, - parent_ino(file->f_path.dentry), DT_DIR); + d_parent_ino(file->f_path.dentry), DT_DIR); } static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx) { -- cgit v1.2.3 From 63db4a1f795a19e4e12f036a12a5f61c48b03e5c Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 27 Jun 2024 16:07:35 +0000 Subject: block: Delete blk_queue_flag_test_and_set() Since commit 70200574cc22 ("block: remove QUEUE_FLAG_DISCARD"), blk_queue_flag_test_and_set() has not been used, so delete it. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20240627160735.842189-1-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a53e3434e1a2..53c41ef4222c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -609,7 +609,6 @@ struct request_queue { void blk_queue_flag_set(unsigned int flag, struct request_queue *q); void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); -bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) -- cgit v1.2.3 From dd6d7f8574d7f8b6a0bf1aeef0b285d2706b8c2a Mon Sep 17 00:00:00 2001 From: Akiva Goldberger Date: Thu, 27 Jun 2024 21:23:49 +0300 Subject: RDMA: Pass entire uverbs attr bundle to create cq function Changes the create_cq verb signature by sending the entire uverbs attr bundle as a parameter. This allows drivers to send driver specific attrs through ioctl for the create_cq verb and access them in their driver specific code. Also adds a new enum value for driver specific ioctl attributes for methods already supporting UHW. Link: https://lore.kernel.org/r/ed147343987c0d43fd391c1b2f85e2f425747387.1719512393.git.leon@kernel.org Signed-off-by: Akiva Goldberger Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/rdma/ib_verbs.h | 2 +- include/uapi/rdma/ib_user_ioctl_cmds.h | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 5a193008f99c..825043512b2d 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2465,7 +2465,7 @@ struct ib_device_ops { int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); int (*destroy_qp)(struct ib_qp *qp, struct ib_udata *udata); int (*create_cq)(struct ib_cq *cq, const struct ib_cq_init_attr *attr, - struct ib_udata *udata); + struct uverbs_attr_bundle *attrs); int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata); int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata); diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h index dafc7ebe545b..ec719053aab9 100644 --- a/include/uapi/rdma/ib_user_ioctl_cmds.h +++ b/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -37,9 +37,6 @@ #define UVERBS_ID_NS_MASK 0xF000 #define UVERBS_ID_NS_SHIFT 12 -#define UVERBS_UDATA_DRIVER_DATA_NS 1 -#define UVERBS_UDATA_DRIVER_DATA_FLAG (1UL << UVERBS_ID_NS_SHIFT) - enum uverbs_default_objects { UVERBS_OBJECT_DEVICE, /* No instances of DEVICE are allowed */ UVERBS_OBJECT_PD, @@ -61,8 +58,10 @@ enum uverbs_default_objects { }; enum { - UVERBS_ATTR_UHW_IN = UVERBS_UDATA_DRIVER_DATA_FLAG, + UVERBS_ID_DRIVER_NS = 1UL << UVERBS_ID_NS_SHIFT, + UVERBS_ATTR_UHW_IN = UVERBS_ID_DRIVER_NS, UVERBS_ATTR_UHW_OUT, + UVERBS_ID_DRIVER_NS_WITH_UHW, }; enum uverbs_methods_device { -- cgit v1.2.3 From 589b844f1bf04850d9fabcaa2e943325dc6768b4 Mon Sep 17 00:00:00 2001 From: Akiva Goldberger Date: Thu, 27 Jun 2024 21:23:50 +0300 Subject: RDMA/mlx5: Send UAR page index as ioctl attribute Add UAR page index as a driver ioctl attribute to increase the number of supported indices, previously limited to 16 bits by mlx5_ib_create_cq struct. Link: https://lore.kernel.org/r/0e18b34d7ec3b1ae02d694b0d545aed7413c0ef7.1719512393.git.leon@kernel.org Signed-off-by: Akiva Goldberger Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 595edad03dfe..5b74d6534899 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -270,6 +270,10 @@ enum mlx5_ib_device_query_context_attrs { MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX = (1U << UVERBS_ID_NS_SHIFT), }; +enum mlx5_ib_create_cq_attrs { + MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX = UVERBS_ID_DRIVER_NS_WITH_UHW, +}; + #define MLX5_IB_DW_MATCH_PARAM 0xA0 struct mlx5_ib_match_params { -- cgit v1.2.3 From 12eba993b94c9186e44a01f46e38b3ab3c635f2d Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Fri, 17 May 2024 20:40:01 +0800 Subject: ext4: make ext4_es_insert_delayed_block() insert multi-blocks Rename ext4_es_insert_delayed_block() to ext4_es_insert_delayed_extent() and pass length parameter to make it insert multiple delalloc blocks at a time. For the case of bigalloc, split the allocated parameter to lclu_allocated and end_allocated. lclu_allocated indicates the allocation state of the cluster which is containing the lblk, end_allocated indicates the allocation state of the extent end, clusters in the middle of delay allocated extent must be unallocated. Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://patch.msgid.link/20240517124005.347221-7-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o --- include/trace/events/ext4.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index a697f4b77162..6b41ac61310f 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -2478,11 +2478,11 @@ TRACE_EVENT(ext4_es_shrink, __entry->scan_time, __entry->nr_skipped, __entry->retried) ); -TRACE_EVENT(ext4_es_insert_delayed_block, +TRACE_EVENT(ext4_es_insert_delayed_extent, TP_PROTO(struct inode *inode, struct extent_status *es, - bool allocated), + bool lclu_allocated, bool end_allocated), - TP_ARGS(inode, es, allocated), + TP_ARGS(inode, es, lclu_allocated, end_allocated), TP_STRUCT__entry( __field( dev_t, dev ) @@ -2491,7 +2491,8 @@ TRACE_EVENT(ext4_es_insert_delayed_block, __field( ext4_lblk_t, len ) __field( ext4_fsblk_t, pblk ) __field( char, status ) - __field( bool, allocated ) + __field( bool, lclu_allocated ) + __field( bool, end_allocated ) ), TP_fast_assign( @@ -2501,16 +2502,17 @@ TRACE_EVENT(ext4_es_insert_delayed_block, __entry->len = es->es_len; __entry->pblk = ext4_es_show_pblock(es); __entry->status = ext4_es_status(es); - __entry->allocated = allocated; + __entry->lclu_allocated = lclu_allocated; + __entry->end_allocated = end_allocated; ), TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s " - "allocated %d", + "allocated %d %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->lblk, __entry->len, __entry->pblk, show_extent_status(__entry->status), - __entry->allocated) + __entry->lclu_allocated, __entry->end_allocated) ); /* fsmap traces */ -- cgit v1.2.3 From 0d66b23d79c750276f791411d81a524549a64852 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Fri, 17 May 2024 20:40:02 +0800 Subject: ext4: make ext4_da_reserve_space() reserve multi-clusters Add 'nr_resv' parameter to ext4_da_reserve_space(), which indicates the number of clusters wants to reserve, make it reserve multiple clusters at a time. Signed-off-by: Zhang Yi Reviewed-by: Jan Kara Link: https://patch.msgid.link/20240517124005.347221-8-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o --- include/trace/events/ext4.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 6b41ac61310f..cc5e9b7b2b44 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -1246,14 +1246,15 @@ TRACE_EVENT(ext4_da_update_reserve_space, ); TRACE_EVENT(ext4_da_reserve_space, - TP_PROTO(struct inode *inode), + TP_PROTO(struct inode *inode, int nr_resv), - TP_ARGS(inode), + TP_ARGS(inode, nr_resv), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) __field( __u64, i_blocks ) + __field( int, reserve_blocks ) __field( int, reserved_data_blocks ) __field( __u16, mode ) ), @@ -1262,16 +1263,17 @@ TRACE_EVENT(ext4_da_reserve_space, __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->i_blocks = inode->i_blocks; + __entry->reserve_blocks = nr_resv; __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks; __entry->mode = inode->i_mode; ), - TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu " + TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu reserve_blocks %d" "reserved_data_blocks %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->i_blocks, - __entry->reserved_data_blocks) + __entry->reserve_blocks, __entry->reserved_data_blocks) ); TRACE_EVENT(ext4_da_release_space, -- cgit v1.2.3 From d3dcb084c70727be4a2f61bd94796e66147cfa35 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Wed, 26 Jun 2024 05:06:17 +0200 Subject: net: phy: phy_device: Fix PHY LED blinking code comment Fix copy-paste error in the code comment. The code refers to LED blinking configuration, not brightness configuration. It was likely copied from comment above this one which does refer to brightness configuration. Fixes: 4e901018432e ("net: phy: phy_device: Call into the PHY driver to set LED blinking") Signed-off-by: Marek Vasut Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20240626030638.512069-1-marex@denx.de Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index e6e83304558e..3be430cf3132 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1122,7 +1122,7 @@ struct phy_driver { u8 index, enum led_brightness value); /** - * @led_blink_set: Set a PHY LED brightness. Index indicates + * @led_blink_set: Set a PHY LED blinking. Index indicates * which of the PHYs led should be configured to blink. Delays * are in milliseconds and if both are zero then a sensible * default should be chosen. The call should adjust the -- cgit v1.2.3 From 502099acb8cbf08acb6b43ff2b8da43d2bf85166 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 27 Jun 2024 18:08:47 +0100 Subject: firewire: core: Fix spelling mistakes in tracepoint messages There are two spelling mistakes in the tracepoint message text. Fix them. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20240627170847.125531-1-colin.i.king@gmail.com Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index aa00c9f33551..bac9d98e88e5 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -854,7 +854,7 @@ DECLARE_EVENT_CLASS(isoc_single_completions_template, memcpy(__get_dynamic_array(header), header, __get_dynamic_array_len(header)); ), TP_printk( - "context=0x%llx card_index=%u timestap=0x%04x cause=%s header=%s", + "context=0x%llx card_index=%u timestamp=0x%04x cause=%s header=%s", __entry->context, __entry->card_index, __entry->timestamp, @@ -892,7 +892,7 @@ TRACE_EVENT(isoc_inbound_multiple_completions, __entry->cause = cause; ), TP_printk( - "context=0x%llx card_index=%u comleted=%u cause=%s", + "context=0x%llx card_index=%u completed=%u cause=%s", __entry->context, __entry->card_index, __entry->completed, -- cgit v1.2.3 From ff2c570ef7eaa9ded58e7a02dd7a68874a897508 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 7 Jun 2024 16:55:35 +0200 Subject: path: add cleanup helper Add a simple cleanup helper so we can cleanup struct path easily. No need for any extra machinery. Avoid DEFINE_FREE() as it causes a local copy of struct path to be used. Just rely on path_put() directly called from a cleanup helper. Link: https://lore.kernel.org/r/20240607-vfs-listmount-reverse-v1-2-7877a2bfa5e5@kernel.org Reviewed-by: Josef Bacik Signed-off-by: Christian Brauner --- include/linux/path.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/path.h b/include/linux/path.h index 475225a03d0d..ca073e70decd 100644 --- a/include/linux/path.h +++ b/include/linux/path.h @@ -24,4 +24,13 @@ static inline void path_put_init(struct path *path) *path = (struct path) { }; } +/* + * Cleanup macro for use with __free(path_put). Avoids dereference and + * copying @path unlike DEFINE_FREE(). path_put() will handle the empty + * path correctly just ensure @path is initialized: + * + * struct path path __free(path_put) = {}; + */ +#define __free_path_put path_put + #endif /* _LINUX_PATH_H */ -- cgit v1.2.3 From d04bccd8c19d601232ed3e3c9e248c0040167d47 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 7 Jun 2024 16:55:37 +0200 Subject: listmount: allow listing in reverse order util-linux is about to implement listmount() and statmount() support. Karel requested the ability to scan the mount table in backwards order because that's what libmount currently does in order to get the latest mount first. We currently don't support this in listmount(). Add a new LISTMOUNT_REVERSE flag to allow listing mounts in reverse order. For example, listing all child mounts of /sys without LISTMOUNT_REVERSE gives: /sys/kernel/security @ mnt_id: 4294968369 /sys/fs/cgroup @ mnt_id: 4294968370 /sys/firmware/efi/efivars @ mnt_id: 4294968371 /sys/fs/bpf @ mnt_id: 4294968372 /sys/kernel/tracing @ mnt_id: 4294968373 /sys/kernel/debug @ mnt_id: 4294968374 /sys/fs/fuse/connections @ mnt_id: 4294968375 /sys/kernel/config @ mnt_id: 4294968376 whereas with LISTMOUNT_REVERSE it gives: /sys/kernel/config @ mnt_id: 4294968376 /sys/fs/fuse/connections @ mnt_id: 4294968375 /sys/kernel/debug @ mnt_id: 4294968374 /sys/kernel/tracing @ mnt_id: 4294968373 /sys/fs/bpf @ mnt_id: 4294968372 /sys/firmware/efi/efivars @ mnt_id: 4294968371 /sys/fs/cgroup @ mnt_id: 4294968370 /sys/kernel/security @ mnt_id: 4294968369 Link: https://lore.kernel.org/r/20240607-vfs-listmount-reverse-v1-4-7877a2bfa5e5@kernel.org Reviewed-by: Josef Bacik Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index ad5478dbad00..88d78de1519f 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -207,5 +207,6 @@ struct mnt_id_req { * Special @mnt_id values that can be passed to listmount */ #define LSMT_ROOT 0xffffffffffffffff /* root mount */ +#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ #endif /* _UAPI_LINUX_MOUNT_H */ -- cgit v1.2.3 From 09b31295f833031c88419550172703d45c5401e3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 24 Jun 2024 11:49:47 -0400 Subject: fs: export the mount ns id via statmount In order to allow users to iterate through children mount namespaces via listmount we need a way for them to know what the ns id for the mount. Add a new field to statmount called mnt_ns_id which will carry the ns id for the given mount entry. Co-developed-by: Christian Brauner Signed-off-by: Josef Bacik Link: https://lore.kernel.org/r/6dabf437331fb7415d886f7c64b21cb2a50b1c66.1719243756.git.josef@toxicpanda.com Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 88d78de1519f..a07508aee518 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -172,7 +172,8 @@ struct statmount { __u64 propagate_from; /* Propagation from in current namespace */ __u32 mnt_root; /* [str] Root of mount relative to root of fs */ __u32 mnt_point; /* [str] Mountpoint relative to current root */ - __u64 __spare2[50]; + __u64 mnt_ns_id; /* ID of the mount namespace */ + __u64 __spare2[49]; char str[]; /* Variable size part containing strings */ }; @@ -202,6 +203,7 @@ struct mnt_id_req { #define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ #define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ #define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ +#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ /* * Special @mnt_id values that can be passed to listmount -- cgit v1.2.3 From 0a3deb11858ae8a0b3849b5fda45512ad383f0e1 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 24 Jun 2024 11:49:48 -0400 Subject: fs: Allow listmount() in foreign mount namespace Expand struct mnt_id_req to add an optional mnt_ns_id field. When this field is populated, listmount() will be performed on the specified mount namespace, provided the currently application has CAP_SYS_ADMIN in its user namespace and the mount namespace is a child of the current namespace. Co-developed-by: Josef Bacik Signed-off-by: Josef Bacik Link: https://lore.kernel.org/r/49930bdce29a8367a213eb14c1e68e7e49284f86.1719243756.git.josef@toxicpanda.com Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index a07508aee518..ee1559cd6764 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -189,10 +189,12 @@ struct mnt_id_req { __u32 spare; __u64 mnt_id; __u64 param; + __u64 mnt_ns_id; }; /* List of all mnt_id_req versions. */ #define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ +#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */ /* * @mask bits for statmount(2) -- cgit v1.2.3 From e8e43a1fcc5c07575f37e40f8a2cd78aee46f9a0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 24 Jun 2024 11:49:50 -0400 Subject: fs: add an ioctl to get the mnt ns id from nsfs In order to utilize the listmount() and statmount() extensions that allow us to call them on different namespaces we need a way to get the mnt namespace id from user space. Add an ioctl to nsfs that will allow us to extract the mnt namespace id in order to make these new extensions usable. Signed-off-by: Josef Bacik Link: https://lore.kernel.org/r/180449959d5a756af7306d6bda55f41b9d53e3cb.1719243756.git.josef@toxicpanda.com Signed-off-by: Christian Brauner --- include/uapi/linux/nsfs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index a0c8552b64ee..56e8b1639b98 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -15,5 +15,7 @@ #define NS_GET_NSTYPE _IO(NSIO, 0x3) /* Get owner UID (in the caller's user namespace) for a user namespace */ #define NS_GET_OWNER_UID _IO(NSIO, 0x4) +/* Get the id for a mount namespace */ +#define NS_GET_MNTNS_ID _IO(NSIO, 0x5) #endif /* __LINUX_NSFS_H */ -- cgit v1.2.3 From 8c62617295d3c4cd03f1a02c3b9bf9d4e6d6e0c6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 27 Jun 2024 13:25:27 +0200 Subject: wifi: mac80211: remove DEAUTH_NEED_MGD_TX_PREP This flag is annoying because it puts a lot of logic into mac80211 that could just as well be in the driver (only iwlmvm uses it) and the implementation is also broken for MLO. Remove the flag in favour of calling drv_mgd_prepare_tx() without any conditions even for the deauth-while-assoc case. The drivers that implement it can take the appropriate actions, which for the only user of DEAUTH_NEED_MGD_TX_PREP (iwlmvm) is a bit more tricky than the implementation in mac80211 is anyway, and all others have no need and can just exit if info->was_assoc is set. Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20240627132527.94924bcc9c9e.I328a219e45f2e2724cd52e75bb9feee3bf21a463@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 9c96e8ae9ef7..bd0f8aefa797 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2767,14 +2767,6 @@ struct ieee80211_txq { * @IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA: Hardware supports buffer STA on * TDLS links. * - * @IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP: The driver requires the - * mgd_prepare_tx() callback to be called before transmission of a - * deauthentication frame in case the association was completed but no - * beacon was heard. This is required in multi-channel scenarios, where the - * virtual interface might not be given air time for the transmission of - * the frame, as it is not synced with the AP/P2P GO yet, and thus the - * deauthentication frame might not be transmitted. - * * @IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP: The driver (or firmware) doesn't * support QoS NDP for AP probing - that's most likely a driver bug. * @@ -2874,7 +2866,6 @@ enum ieee80211_hw_flags { IEEE80211_HW_REPORTS_LOW_ACK, IEEE80211_HW_SUPPORTS_TX_FRAG, IEEE80211_HW_SUPPORTS_TDLS_BUFFER_STA, - IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP, IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP, IEEE80211_HW_BUFF_MMPDU_TXQ, IEEE80211_HW_SUPPORTS_VHT_EXT_NSS_BW, @@ -3787,13 +3778,15 @@ enum ieee80211_reconfig_type { * @success: whether the frame exchange was successful, only * used with the mgd_complete_tx() method, and then only * valid for auth and (re)assoc. + * @was_assoc: set if this call is due to deauth/disassoc + * while just having been associated * @link_id: the link id on which the frame will be TX'ed. * Only used with the mgd_prepare_tx() method. */ struct ieee80211_prep_tx_info { u16 duration; u16 subtype; - u8 success:1; + u8 success:1, was_assoc:1; int link_id; }; @@ -4242,12 +4235,9 @@ struct ieee80211_prep_tx_info { * yet it need not necessarily be given airtime, in particular since any * transmission to a P2P GO needs to be synchronized against the GO's * powersave state. mac80211 will call this function before transmitting a - * management frame prior to having successfully associated to allow the - * driver to give it channel time for the transmission, to get a response - * and to be able to synchronize with the GO. - * For drivers that set %IEEE80211_HW_DEAUTH_NEED_MGD_TX_PREP, mac80211 - * would also call this function before transmitting a deauthentication - * frame in case that no beacon was heard from the AP/P2P GO. + * management frame prior to transmitting that frame to allow the driver + * to give it channel time for the transmission, to get a response and be + * able to synchronize with the GO. * The callback will be called before each transmission and upon return * mac80211 will transmit the frame right away. * Additional information is passed in the &struct ieee80211_prep_tx_info -- cgit v1.2.3 From 816c6bec09ed5b90a58a1e12d5a606c5b6e23f47 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 27 Jun 2024 10:42:56 +0200 Subject: wifi: mac80211: fix BSS_CHANGED_UNSOL_BCAST_PROBE_RESP Fix the definition of BSS_CHANGED_UNSOL_BCAST_PROBE_RESP so that not all higher bits get set, 1<<31 is a signed variable, so when we do u64 changed = BSS_CHANGED_UNSOL_BCAST_PROBE_RESP; we get sign expansion, so the value is 0xffff'ffff'8000'0000 and that's clearly not desired. Use BIT_ULL() to make it unsigned as well as the right type for the change flags. Fixes: 178e9d6adc43 ("wifi: mac80211: fix unsolicited broadcast probe config") Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20240627104257.06174d291db2.Iba0d642916eb78a61f8ab2cc5ca9280783d9c1db@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index cafc664ee531..45ad37adbe32 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -395,7 +395,7 @@ enum ieee80211_bss_change { BSS_CHANGED_HE_OBSS_PD = 1<<28, BSS_CHANGED_HE_BSS_COLOR = 1<<29, BSS_CHANGED_FILS_DISCOVERY = 1<<30, - BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31, + BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = BIT_ULL(31), BSS_CHANGED_MLD_VALID_LINKS = BIT_ULL(33), BSS_CHANGED_MLD_TTLM = BIT_ULL(34), -- cgit v1.2.3 From c6269149cbf7053272d918101981869438ff7c1e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 27 Jun 2024 16:11:39 +0200 Subject: file: add take_fd() cleanup helper Add a helper that returns the file descriptor and ensures that the old variable contains a negative value. This makes it easy to rely on CLASS(get_unused_fd). Link: https://lore.kernel.org/r/20240627-work-pidfs-v1-1-7e9ab6cc3bb1@kernel.org Reviewed-by: Jeff Layton Reviewed-by: Josef Bacik Reviewed-by: Alexander Mikhalitsyn Signed-off-by: Christian Brauner --- include/linux/cleanup.h | 13 ++++++++----- include/linux/file.h | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index c2d09bc4f976..80c4181e194a 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -63,17 +63,20 @@ #define __free(_name) __cleanup(__free_##_name) -#define __get_and_null_ptr(p) \ - ({ __auto_type __ptr = &(p); \ - __auto_type __val = *__ptr; \ - *__ptr = NULL; __val; }) +#define __get_and_null(p, nullvalue) \ + ({ \ + __auto_type __ptr = &(p); \ + __auto_type __val = *__ptr; \ + *__ptr = nullvalue; \ + __val; \ + }) static inline __must_check const volatile void * __must_check_fn(const volatile void *val) { return val; } #define no_free_ptr(p) \ - ((typeof(p)) __must_check_fn(__get_and_null_ptr(p))) + ((typeof(p)) __must_check_fn(__get_and_null(p, NULL))) #define return_ptr(p) return no_free_ptr(p) diff --git a/include/linux/file.h b/include/linux/file.h index 45d0f4800abd..237931f20739 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -97,6 +97,26 @@ extern void put_unused_fd(unsigned int fd); DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T), get_unused_fd_flags(flags), unsigned flags) +/* + * take_fd() will take care to set @fd to -EBADF ensuring that + * CLASS(get_unused_fd) won't call put_unused_fd(). This makes it + * easier to rely on CLASS(get_unused_fd): + * + * struct file *f; + * + * CLASS(get_unused_fd, fd)(O_CLOEXEC); + * if (fd < 0) + * return fd; + * + * f = dentry_open(&path, O_RDONLY, current_cred()); + * if (IS_ERR(f)) + * return PTR_ERR(fd); + * + * fd_install(fd, f); + * return take_fd(fd); + */ +#define take_fd(fd) __get_and_null(fd, -EBADF) + extern void fd_install(unsigned int fd, struct file *file); int receive_fd(struct file *file, int __user *ufd, unsigned int o_flags); -- cgit v1.2.3 From d057c108155ab8d02b603c7ee5da7df615c2f32f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 27 Jun 2024 16:11:40 +0200 Subject: nsproxy: add a cleanup helper for nsproxy Add a simple cleanup helper for nsproxy. Link: https://lore.kernel.org/r/20240627-work-pidfs-v1-2-7e9ab6cc3bb1@kernel.org Reviewed-by: Jeff Layton Reviewed-by: Josef Bacik Reviewed-by: Alexander Mikhalitsyn Signed-off-by: Christian Brauner --- include/linux/nsproxy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index 5601d14e2886..e6bec522b139 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -112,4 +112,6 @@ static inline void get_nsproxy(struct nsproxy *ns) refcount_inc(&ns->count); } +DEFINE_FREE(put_nsproxy, struct nsproxy *, if (_T) put_nsproxy(_T)) + #endif -- cgit v1.2.3 From 85e4daaeb70bc68ec920e0c268eb428113d31b21 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 28 Jun 2024 10:05:19 +0200 Subject: nsproxy: add helper to go from arbitrary namespace to ns_common They all contains struct ns_common ns and if there ever is one where that isn't the case we'll catch it here at build time. Signed-off-by: Christian Brauner --- include/linux/nsproxy.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index e6bec522b139..dab6a1734a22 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -42,6 +42,17 @@ struct nsproxy { }; extern struct nsproxy init_nsproxy; +#define to_ns_common(__ns) \ + _Generic((__ns), \ + struct cgroup_namespace *: &(__ns->ns), \ + struct ipc_namespace *: &(__ns->ns), \ + struct net *: &(__ns->ns), \ + struct pid_namespace *: &(__ns->ns), \ + struct mnt_namespace *: &(__ns->ns), \ + struct time_namespace *: &(__ns->ns), \ + struct user_namespace *: &(__ns->ns), \ + struct uts_namespace *: &(__ns->ns)) + /* * A structure to encompass all bits needed to install * a partial or complete new set of namespaces. -- cgit v1.2.3 From 5b08bd408534bfb3a7cf5778da5b27d4e4fffe12 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 27 Jun 2024 16:11:42 +0200 Subject: pidfs: allow retrieval of namespace file descriptors For users that hold a reference to a pidfd procfs might not even be available nor is it desirable to parse through procfs just for the sake of getting namespace file descriptors for a process. Make it possible to directly retrieve namespace file descriptors from a pidfd. Pidfds already can be used with setns() to change a set of namespaces atomically. Link: https://lore.kernel.org/r/20240627-work-pidfs-v1-4-7e9ab6cc3bb1@kernel.org Reviewed-by: Jeff Layton Reviewed-by: Josef Bacik Reviewed-by: Alexander Mikhalitsyn Signed-off-by: Christian Brauner --- include/uapi/linux/pidfd.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h index 72ec000a97cd..565fc0629fff 100644 --- a/include/uapi/linux/pidfd.h +++ b/include/uapi/linux/pidfd.h @@ -5,6 +5,7 @@ #include #include +#include /* Flags for pidfd_open(). */ #define PIDFD_NONBLOCK O_NONBLOCK @@ -15,4 +16,17 @@ #define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1) #define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2) +#define PIDFS_IOCTL_MAGIC 0xFF + +#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) +#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2) +#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3) +#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4) +#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5) +#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6) +#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7) +#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8) +#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) +#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) + #endif /* _UAPI_LINUX_PIDFD_H */ -- cgit v1.2.3 From 69540b7987ef05d1dff36981d4cc7c31e9716b36 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 27 Jun 2024 17:08:48 +0300 Subject: ethtool: Add ethtool operation to write to a transceiver module EEPROM Ethtool can already retrieve information from a transceiver module EEPROM by invoking the ethtool_ops::get_module_eeprom_by_page operation. Add a corresponding operation that allows ethtool to write to a transceiver module EEPROM. The new write operation is purely an in-kernel API and is not exposed to user space. The purpose of this operation is not to enable arbitrary read / write access, but to allow the kernel to write to specific addresses as part of transceiver module firmware flashing. In the future, more functionality can be implemented on top of these read / write operations. Adjust the comments of the 'ethtool_module_eeprom' structure as it is no longer used only for read access. Signed-off-by: Ido Schimmel Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/ethtool.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 959196af7f5a..c7f6f2bc9cac 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -506,17 +506,16 @@ struct ethtool_ts_stats { #define ETH_MODULE_MAX_I2C_ADDRESS 0x7f /** - * struct ethtool_module_eeprom - EEPROM dump from specified page - * @offset: Offset within the specified EEPROM page to begin read, in bytes. - * @length: Number of bytes to read. - * @page: Page number to read from. - * @bank: Page bank number to read from, if applicable by EEPROM spec. + * struct ethtool_module_eeprom - plug-in module EEPROM read / write parameters + * @offset: When @offset is 0-127, it is used as an address to the Lower Memory + * (@page must be 0). Otherwise, it is used as an address to the + * Upper Memory. + * @length: Number of bytes to read / write. + * @page: Page number. + * @bank: Bank number, if supported by EEPROM spec. * @i2c_address: I2C address of a page. Value less than 0x7f expected. Most * EEPROMs use 0x50 or 0x51. * @data: Pointer to buffer with EEPROM data of @length size. - * - * This can be used to manage pages during EEPROM dump in ethtool and pass - * required information to the driver. */ struct ethtool_module_eeprom { u32 offset; @@ -824,6 +823,8 @@ struct ethtool_rxfh_param { * @get_module_eeprom_by_page: Get a region of plug-in module EEPROM data from * specified page. Returns a negative error code or the amount of bytes * read. + * @set_module_eeprom_by_page: Write to a region of plug-in module EEPROM, + * from kernel space only. Returns a negative error code or zero. * @get_eth_phy_stats: Query some of the IEEE 802.3 PHY statistics. * @get_eth_mac_stats: Query some of the IEEE 802.3 MAC statistics. * @get_eth_ctrl_stats: Query some of the IEEE 802.3 MAC Ctrl statistics. @@ -958,6 +959,9 @@ struct ethtool_ops { int (*get_module_eeprom_by_page)(struct net_device *dev, const struct ethtool_module_eeprom *page, struct netlink_ext_ack *extack); + int (*set_module_eeprom_by_page)(struct net_device *dev, + const struct ethtool_module_eeprom *page, + struct netlink_ext_ack *extack); void (*get_eth_phy_stats)(struct net_device *dev, struct ethtool_eth_phy_stats *phy_stats); void (*get_eth_mac_stats)(struct net_device *dev, -- cgit v1.2.3 From 46fb3ba95b93d1887e6dfa02a535e0526062de95 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 27 Jun 2024 17:08:50 +0300 Subject: ethtool: Add an interface for flashing transceiver modules' firmware CMIS compliant modules such as QSFP-DD might be running a firmware that can be updated in a vendor-neutral way by exchanging messages between the host and the module as described in section 7.3.1 of revision 5.2 of the CMIS standard. Add a pair of new ethtool messages that allow: * User space to trigger firmware update of transceiver modules * The kernel to notify user space about the progress of the process The user interface is designed to be asynchronous in order to avoid RTNL being held for too long and to allow several modules to be updated simultaneously. The interface is designed with CMIS compliant modules in mind, but kept generic enough to accommodate future use cases, if these arise. Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 18 ++++++++++++++++++ include/uapi/linux/ethtool_netlink.h | 19 +++++++++++++++++++ 2 files changed, 37 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 8733a3117902..e011384c915c 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -877,6 +877,24 @@ enum ethtool_mm_verify_status { ETHTOOL_MM_VERIFY_STATUS_DISABLED, }; +/** + * enum ethtool_module_fw_flash_status - plug-in module firmware flashing status + * @ETHTOOL_MODULE_FW_FLASH_STATUS_STARTED: The firmware flashing process has + * started. + * @ETHTOOL_MODULE_FW_FLASH_STATUS_IN_PROGRESS: The firmware flashing process + * is in progress. + * @ETHTOOL_MODULE_FW_FLASH_STATUS_COMPLETED: The firmware flashing process was + * completed successfully. + * @ETHTOOL_MODULE_FW_FLASH_STATUS_ERROR: The firmware flashing process was + * stopped due to an error. + */ +enum ethtool_module_fw_flash_status { + ETHTOOL_MODULE_FW_FLASH_STATUS_STARTED = 1, + ETHTOOL_MODULE_FW_FLASH_STATUS_IN_PROGRESS, + ETHTOOL_MODULE_FW_FLASH_STATUS_COMPLETED, + ETHTOOL_MODULE_FW_FLASH_STATUS_ERROR, +}; + /** * struct ethtool_gstrings - string set for data tagging * @cmd: Command number = %ETHTOOL_GSTRINGS diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index d15856c7e001..840dabdc9d88 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -57,6 +57,7 @@ enum { ETHTOOL_MSG_PLCA_GET_STATUS, ETHTOOL_MSG_MM_GET, ETHTOOL_MSG_MM_SET, + ETHTOOL_MSG_MODULE_FW_FLASH_ACT, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -109,6 +110,7 @@ enum { ETHTOOL_MSG_PLCA_NTF, ETHTOOL_MSG_MM_GET_REPLY, ETHTOOL_MSG_MM_NTF, + ETHTOOL_MSG_MODULE_FW_FLASH_NTF, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -1018,6 +1020,23 @@ enum { ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1) }; +/* MODULE_FW_FLASH */ + +enum { + ETHTOOL_A_MODULE_FW_FLASH_UNSPEC, + ETHTOOL_A_MODULE_FW_FLASH_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME, /* string */ + ETHTOOL_A_MODULE_FW_FLASH_PASSWORD, /* u32 */ + ETHTOOL_A_MODULE_FW_FLASH_STATUS, /* u32 */ + ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG, /* string */ + ETHTOOL_A_MODULE_FW_FLASH_DONE, /* uint */ + ETHTOOL_A_MODULE_FW_FLASH_TOTAL, /* uint */ + + /* add new constants above here */ + __ETHTOOL_A_MODULE_FW_FLASH_CNT, + ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 -- cgit v1.2.3 From 31e0aa99dc02b2b038a270b0670fc8201b69ec8a Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 27 Jun 2024 17:08:52 +0300 Subject: ethtool: Veto some operations during firmware flashing process Some operations cannot be performed during the firmware flashing process. For example: - Port must be down during the whole flashing process to avoid packet loss while committing reset for example. - Writing to EEPROM interrupts the flashing process, so operations like ethtool dump, module reset, get and set power mode should be vetoed. - Split port firmware flashing should be vetoed. In order to veto those scenarios, add a flag in 'struct net_device' that indicates when a firmware flash is taking place on the module and use it to prevent interruptions during the process. Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cc18acd3c58b..1e3401093c13 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1990,6 +1990,8 @@ enum netdev_reg_state { * * @threaded: napi threaded mode is enabled * + * @module_fw_flash_in_progress: Module firmware flashing is in progress. + * * @net_notifier_list: List of per-net netdev notifier block * that follow this device when it is moved * to another network namespace. @@ -2374,7 +2376,7 @@ struct net_device { bool proto_down; bool threaded; unsigned wol_enabled:1; - + unsigned module_fw_flash_in_progress:1; struct list_head net_notifier_list; #if IS_ENABLED(CONFIG_MACSEC) -- cgit v1.2.3 From e4f91936993ce4d0954688ec3cb80b3471b9eda5 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 27 Jun 2024 17:08:53 +0300 Subject: net: sfp: Add more extended compliance codes SFF-8024 is used to define various constants re-used in several SFF SFP-related specifications. Add SFF-8024 extended compliance code definitions for CMIS compliant modules and use them in the next patch to determine the firmware flashing work. Signed-off-by: Danielle Ratson Reviewed-by: Petr Machata Reviewed-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/sfp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/sfp.h b/include/linux/sfp.h index a45da7eef9a2..b14be59550e3 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -284,6 +284,12 @@ enum { SFF8024_ID_QSFP_8438 = 0x0c, SFF8024_ID_QSFP_8436_8636 = 0x0d, SFF8024_ID_QSFP28_8636 = 0x11, + SFF8024_ID_QSFP_DD = 0x18, + SFF8024_ID_OSFP = 0x19, + SFF8024_ID_DSFP = 0x1B, + SFF8024_ID_QSFP_PLUS_CMIS = 0x1E, + SFF8024_ID_SFP_DD_CMIS = 0x1F, + SFF8024_ID_SFP_PLUS_CMIS = 0x20, SFF8024_ENCODING_UNSPEC = 0x00, SFF8024_ENCODING_8B10B = 0x01, -- cgit v1.2.3 From 048a403648fcef8bd9f4f1a290c57b626ad16296 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 27 Jun 2024 21:02:34 +0300 Subject: net/mlx5: IFC updates for changing max EQs Expose new capability to support changing the number of EQs available to other functions. Fixes: 93197c7c509d ("mlx5/core: Support max_io_eqs for a function") Signed-off-by: Daniel Jurgens Reviewed-by: Parav Pandit Reviewed-by: William Tu Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5df52e15f7d6..d45bfb7cf81d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2029,7 +2029,11 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 pcc_ifa2[0x1]; u8 reserved_at_3f1[0xf]; - u8 reserved_at_400[0x400]; + u8 reserved_at_400[0x40]; + + u8 reserved_at_440[0x8]; + u8 max_num_eqs_24b[0x18]; + u8 reserved_at_460[0x3a0]; }; enum mlx5_ifc_flow_destination_type { -- cgit v1.2.3 From f9af549d1fd31487bbbc666b5b158cfc940ccc17 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 24 Jun 2024 15:40:52 -0400 Subject: fs: export mount options via statmount() statmount() can export arbitrary strings, so utilize the __spare1 slot for a mnt_opts string pointer, and then support asking for and setting the mount options during statmount(). This calls into the helper for showing mount options, which already uses a seq_file, so fits in nicely with our existing mechanism for exporting strings via statmount(). Signed-off-by: Josef Bacik Link: https://lore.kernel.org/r/3aa6bf8bd5d0a21df9ebd63813af8ab532c18276.1719257716.git.josef@toxicpanda.com Reviewed-by: Jeff Layton [brauner: only call sb->s_op->show_options()] Signed-off-by: Christian Brauner --- include/uapi/linux/mount.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index ee1559cd6764..225bc366ffcb 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -154,7 +154,7 @@ struct mount_attr { */ struct statmount { __u32 size; /* Total size, including strings */ - __u32 __spare1; + __u32 mnt_opts; /* [str] Mount options of the mount */ __u64 mask; /* What results were written */ __u32 sb_dev_major; /* Device ID */ __u32 sb_dev_minor; @@ -206,6 +206,7 @@ struct mnt_id_req { #define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ #define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ #define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ +#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ /* * Special @mnt_id values that can be passed to listmount -- cgit v1.2.3 From 205fdba5d0ffe1ad8de61763d74323e88b640d41 Mon Sep 17 00:00:00 2001 From: James Ogletree Date: Thu, 20 Jun 2024 16:17:41 +0000 Subject: firmware: cs_dsp: Add write sequence interface A write sequence is a sequence of register addresses and values executed by some Cirrus DSPs following certain power state transitions. Add support for Cirrus drivers to update or add to a write sequence present in firmware. Reviewed-by: Charles Keepax Signed-off-by: James Ogletree Reviewed-by: Jeff LaBundy Link: https://lore.kernel.org/r/20240620161745.2312359-2-jogletre@opensource.cirrus.com Signed-off-by: Lee Jones --- include/linux/firmware/cirrus/cs_dsp.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include') diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h index 82687e07a7c2..c28a6b9c3b2d 100644 --- a/include/linux/firmware/cirrus/cs_dsp.h +++ b/include/linux/firmware/cirrus/cs_dsp.h @@ -42,6 +42,16 @@ #define CS_DSP_ACKED_CTL_MIN_VALUE 0 #define CS_DSP_ACKED_CTL_MAX_VALUE 0xFFFFFF +/* + * Write sequence operation codes + */ +#define CS_DSP_WSEQ_FULL 0x00 +#define CS_DSP_WSEQ_ADDR8 0x02 +#define CS_DSP_WSEQ_L16 0x04 +#define CS_DSP_WSEQ_H16 0x05 +#define CS_DSP_WSEQ_UNLOCK 0xFD +#define CS_DSP_WSEQ_END 0xFF + /** * struct cs_dsp_region - Describes a logical memory region in DSP address space * @type: Memory region type @@ -258,6 +268,23 @@ struct cs_dsp_alg_region *cs_dsp_find_alg_region(struct cs_dsp *dsp, const char *cs_dsp_mem_region_name(unsigned int type); +/** + * struct cs_dsp_wseq - Describes a write sequence + * @ctl: Write sequence cs_dsp control + * @ops: Operations contained within + */ +struct cs_dsp_wseq { + struct cs_dsp_coeff_ctl *ctl; + struct list_head ops; +}; + +int cs_dsp_wseq_init(struct cs_dsp *dsp, struct cs_dsp_wseq *wseqs, unsigned int num_wseqs); +int cs_dsp_wseq_write(struct cs_dsp *dsp, struct cs_dsp_wseq *wseq, u32 addr, u32 data, + u8 op_code, bool update); +int cs_dsp_wseq_multi_write(struct cs_dsp *dsp, struct cs_dsp_wseq *wseq, + const struct reg_sequence *reg_seq, int num_regs, + u8 op_code, bool update); + /** * struct cs_dsp_chunk - Describes a buffer holding data formatted for the DSP * @data: Pointer to underlying buffer memory -- cgit v1.2.3 From cb626376cbd00cd69329371519a8e9568baef715 Mon Sep 17 00:00:00 2001 From: James Ogletree Date: Thu, 20 Jun 2024 16:17:43 +0000 Subject: mfd: cs40l50: Add support for CS40L50 core driver Introduce support for Cirrus Logic Device CS40L50: a haptic driver with waveform memory, integrated DSP, and closed-loop algorithms. The MFD component registers and initializes the device. Signed-off-by: James Ogletree Reviewed-by: Jeff LaBundy Link: https://lore.kernel.org/r/20240620161745.2312359-4-jogletre@opensource.cirrus.com Signed-off-by: Lee Jones --- include/linux/mfd/cs40l50.h | 137 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 include/linux/mfd/cs40l50.h (limited to 'include') diff --git a/include/linux/mfd/cs40l50.h b/include/linux/mfd/cs40l50.h new file mode 100644 index 000000000000..e5dc49860944 --- /dev/null +++ b/include/linux/mfd/cs40l50.h @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * CS40L50 Advanced Haptic Driver with waveform memory, + * integrated DSP, and closed-loop algorithms + * + * Copyright 2024 Cirrus Logic, Inc. + * + * Author: James Ogletree + */ + +#ifndef __MFD_CS40L50_H__ +#define __MFD_CS40L50_H__ + +#include +#include +#include +#include + +/* Power Supply Configuration */ +#define CS40L50_BLOCK_ENABLES2 0x201C +#define CS40L50_ERR_RLS 0x2034 +#define CS40L50_BST_LPMODE_SEL 0x3810 +#define CS40L50_DCM_LOW_POWER 0x1 +#define CS40L50_OVERTEMP_WARN 0x4000010 + +/* Interrupts */ +#define CS40L50_IRQ1_INT_1 0xE010 +#define CS40L50_IRQ1_BASE CS40L50_IRQ1_INT_1 +#define CS40L50_IRQ1_INT_2 0xE014 +#define CS40L50_IRQ1_INT_8 0xE02C +#define CS40L50_IRQ1_INT_9 0xE030 +#define CS40L50_IRQ1_INT_10 0xE034 +#define CS40L50_IRQ1_INT_18 0xE054 +#define CS40L50_IRQ1_MASK_1 0xE090 +#define CS40L50_IRQ1_MASK_2 0xE094 +#define CS40L50_IRQ1_MASK_20 0xE0DC +#define CS40L50_IRQ1_INT_1_OFFSET (CS40L50_IRQ1_INT_1 - CS40L50_IRQ1_BASE) +#define CS40L50_IRQ1_INT_2_OFFSET (CS40L50_IRQ1_INT_2 - CS40L50_IRQ1_BASE) +#define CS40L50_IRQ1_INT_8_OFFSET (CS40L50_IRQ1_INT_8 - CS40L50_IRQ1_BASE) +#define CS40L50_IRQ1_INT_9_OFFSET (CS40L50_IRQ1_INT_9 - CS40L50_IRQ1_BASE) +#define CS40L50_IRQ1_INT_10_OFFSET (CS40L50_IRQ1_INT_10 - CS40L50_IRQ1_BASE) +#define CS40L50_IRQ1_INT_18_OFFSET (CS40L50_IRQ1_INT_18 - CS40L50_IRQ1_BASE) +#define CS40L50_IRQ_MASK_2_OVERRIDE 0xFFDF7FFF +#define CS40L50_IRQ_MASK_20_OVERRIDE 0x15C01000 +#define CS40L50_AMP_SHORT_MASK BIT(31) +#define CS40L50_DSP_QUEUE_MASK BIT(21) +#define CS40L50_TEMP_ERR_MASK BIT(31) +#define CS40L50_BST_UVP_MASK BIT(6) +#define CS40L50_BST_SHORT_MASK BIT(7) +#define CS40L50_BST_ILIMIT_MASK BIT(18) +#define CS40L50_UVLO_VDDBATT_MASK BIT(16) +#define CS40L50_GLOBAL_ERROR_MASK BIT(15) + +enum cs40l50_irq_list { + CS40L50_DSP_QUEUE_IRQ, + CS40L50_GLOBAL_ERROR_IRQ, + CS40L50_UVLO_VDDBATT_IRQ, + CS40L50_BST_ILIMIT_IRQ, + CS40L50_BST_SHORT_IRQ, + CS40L50_BST_UVP_IRQ, + CS40L50_TEMP_ERR_IRQ, + CS40L50_AMP_SHORT_IRQ, +}; + +/* DSP */ +#define CS40L50_XMEM_PACKED_0 0x2000000 +#define CS40L50_XMEM_UNPACKED24_0 0x2800000 +#define CS40L50_SYS_INFO_ID 0x25E0000 +#define CS40L50_DSP_QUEUE_WT 0x28042C8 +#define CS40L50_DSP_QUEUE_RD 0x28042CC +#define CS40L50_NUM_WAVES 0x2805C18 +#define CS40L50_CORE_BASE 0x2B80000 +#define CS40L50_YMEM_PACKED_0 0x2C00000 +#define CS40L50_YMEM_UNPACKED24_0 0x3400000 +#define CS40L50_PMEM_0 0x3800000 +#define CS40L50_DSP_POLL_US 1000 +#define CS40L50_DSP_TIMEOUT_COUNT 100 +#define CS40L50_RESET_PULSE_US 2200 +#define CS40L50_CP_READY_US 3100 +#define CS40L50_AUTOSUSPEND_MS 2000 +#define CS40L50_PM_ALGO 0x9F206 +#define CS40L50_GLOBAL_ERR_RLS_SET BIT(11) +#define CS40L50_GLOBAL_ERR_RLS_CLEAR 0 + +enum cs40l50_wseqs { + CS40L50_PWR_ON, + CS40L50_STANDBY, + CS40L50_ACTIVE, + CS40L50_NUM_WSEQS, +}; + +/* DSP Queue */ +#define CS40L50_DSP_QUEUE_BASE 0x11004 +#define CS40L50_DSP_QUEUE_END 0x1101C +#define CS40L50_DSP_QUEUE 0x11020 +#define CS40L50_PREVENT_HIBER 0x2000003 +#define CS40L50_ALLOW_HIBER 0x2000004 +#define CS40L50_SHUTDOWN 0x2000005 +#define CS40L50_SYSTEM_RESET 0x2000007 +#define CS40L50_START_I2S 0x3000002 +#define CS40L50_OWT_PUSH 0x3000008 +#define CS40L50_STOP_PLAYBACK 0x5000000 +#define CS40L50_OWT_DELETE 0xD000000 + +/* Firmware files */ +#define CS40L50_FW "cs40l50.wmfw" +#define CS40L50_WT "cs40l50.bin" + +/* Device */ +#define CS40L50_DEVID 0x0 +#define CS40L50_REVID 0x4 +#define CS40L50_DEVID_A 0x40A50 +#define CS40L50_REVID_B0 0xB0 + +struct cs40l50 { + struct device *dev; + struct regmap *regmap; + struct mutex lock; + struct cs_dsp dsp; + struct gpio_desc *reset_gpio; + struct regmap_irq_chip_data *irq_data; + const struct firmware *fw; + const struct firmware *bin; + struct cs_dsp_wseq wseqs[CS40L50_NUM_WSEQS]; + int irq; + u32 devid; + u32 revid; +}; + +int cs40l50_dsp_write(struct device *dev, struct regmap *regmap, u32 val); +int cs40l50_probe(struct cs40l50 *cs40l50); +int cs40l50_remove(struct cs40l50 *cs40l50); + +extern const struct regmap_config cs40l50_regmap; +extern const struct dev_pm_ops cs40l50_pm_ops; + +#endif /* __MFD_CS40L50_H__ */ -- cgit v1.2.3 From 860f8e3beac0b800bbe20f23c5f3440b1c470b8f Mon Sep 17 00:00:00 2001 From: Karel Balej Date: Fri, 31 May 2024 19:34:57 +0200 Subject: mfd: Add driver for Marvell 88PM886 PMIC Marvell 88PM886 is a PMIC which provides various functions such as onkey, battery, charger and regulators. It is found for instance in the samsung,coreprimevelte smartphone with which this was tested. Implement basic support to allow for the use of regulators and onkey. Signed-off-by: Karel Balej Link: https://lore.kernel.org/r/20240531175109.15599-3-balejk@matfyz.cz Signed-off-by: Lee Jones --- include/linux/mfd/88pm886.h | 69 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 include/linux/mfd/88pm886.h (limited to 'include') diff --git a/include/linux/mfd/88pm886.h b/include/linux/mfd/88pm886.h new file mode 100644 index 000000000000..133aa302e492 --- /dev/null +++ b/include/linux/mfd/88pm886.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __MFD_88PM886_H +#define __MFD_88PM886_H + +#include +#include + +#define PM886_A1_CHIP_ID 0xa1 + +#define PM886_IRQ_ONKEY 0 + +#define PM886_PAGE_OFFSET_REGULATORS 1 + +#define PM886_REG_ID 0x00 + +#define PM886_REG_STATUS1 0x01 +#define PM886_ONKEY_STS1 BIT(0) + +#define PM886_REG_INT_STATUS1 0x05 + +#define PM886_REG_INT_ENA_1 0x0a +#define PM886_INT_ENA1_ONKEY BIT(0) + +#define PM886_REG_MISC_CONFIG1 0x14 +#define PM886_SW_PDOWN BIT(5) + +#define PM886_REG_MISC_CONFIG2 0x15 +#define PM886_INT_INV BIT(0) +#define PM886_INT_CLEAR BIT(1) +#define PM886_INT_RC 0x00 +#define PM886_INT_WC BIT(1) +#define PM886_INT_MASK_MODE BIT(2) + +#define PM886_REG_RTC_SPARE6 0xef + +#define PM886_REG_BUCK_EN 0x08 +#define PM886_REG_LDO_EN1 0x09 +#define PM886_REG_LDO_EN2 0x0a +#define PM886_REG_LDO1_VOUT 0x20 +#define PM886_REG_LDO2_VOUT 0x26 +#define PM886_REG_LDO3_VOUT 0x2c +#define PM886_REG_LDO4_VOUT 0x32 +#define PM886_REG_LDO5_VOUT 0x38 +#define PM886_REG_LDO6_VOUT 0x3e +#define PM886_REG_LDO7_VOUT 0x44 +#define PM886_REG_LDO8_VOUT 0x4a +#define PM886_REG_LDO9_VOUT 0x50 +#define PM886_REG_LDO10_VOUT 0x56 +#define PM886_REG_LDO11_VOUT 0x5c +#define PM886_REG_LDO12_VOUT 0x62 +#define PM886_REG_LDO13_VOUT 0x68 +#define PM886_REG_LDO14_VOUT 0x6e +#define PM886_REG_LDO15_VOUT 0x74 +#define PM886_REG_LDO16_VOUT 0x7a +#define PM886_REG_BUCK1_VOUT 0xa5 +#define PM886_REG_BUCK2_VOUT 0xb3 +#define PM886_REG_BUCK3_VOUT 0xc1 +#define PM886_REG_BUCK4_VOUT 0xcf +#define PM886_REG_BUCK5_VOUT 0xdd + +#define PM886_LDO_VSEL_MASK 0x0f +#define PM886_BUCK_VSEL_MASK 0x7f + +struct pm886_chip { + struct i2c_client *client; + unsigned int chip_id; + struct regmap *regmap; +}; +#endif /* __MFD_88PM886_H */ -- cgit v1.2.3 From d19b46340b3c0ea66bef0f6c58876cc085813ba8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2024 06:59:38 +0200 Subject: block: remove bio_integrity_process Move the bvec interation into the generate/verify helpers to avoid a bit of argument passing churn. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240626045950.189758-6-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-integrity.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include') diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index c58634924782..804f856ed3e5 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -14,15 +14,6 @@ enum blk_integrity_flags { BLK_INTEGRITY_STACKED = 1 << 4, }; -struct blk_integrity_iter { - void *prot_buf; - void *data_buf; - sector_t seed; - unsigned int data_size; - unsigned short interval; - const char *disk_name; -}; - const char *blk_integrity_profile_name(struct blk_integrity *bi); bool queue_limits_stack_integrity(struct queue_limits *t, struct queue_limits *b); -- cgit v1.2.3 From aa6ff4eb7c10d9a6532db3ea9e78124bf14e70ae Mon Sep 17 00:00:00 2001 From: Dongliang Cui Date: Fri, 14 Jun 2024 15:49:36 +0800 Subject: block: Add ioprio to block_rq tracepoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sometimes we need to track the processing order of requests with ioprio set. So the ioprio of request can be useful information. Example: block_rq_insert: 8,0 RA 16384 () 6500840 + 32 be,0,6 [binder:815_3] block_rq_issue: 8,0 RA 16384 () 6500840 + 32 be,0,6 [binder:815_3] block_rq_complete: 8,0 RA () 6500840 + 32 be,0,6 [0] Signed-off-by: Dongliang Cui Reviewed-by: Bart Van Assche Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20240614074936.113659-1-dongliang.cui@unisoc.com Signed-off-by: Jens Axboe --- include/trace/events/block.h | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 0e128ad51460..1527d5d45e01 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -9,9 +9,17 @@ #include #include #include +#include #define RWBS_LEN 8 +#define IOPRIO_CLASS_STRINGS \ + { IOPRIO_CLASS_NONE, "none" }, \ + { IOPRIO_CLASS_RT, "rt" }, \ + { IOPRIO_CLASS_BE, "be" }, \ + { IOPRIO_CLASS_IDLE, "idle" }, \ + { IOPRIO_CLASS_INVALID, "invalid"} + #ifdef CONFIG_BUFFER_HEAD DECLARE_EVENT_CLASS(block_buffer, @@ -82,6 +90,7 @@ TRACE_EVENT(block_rq_requeue, __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) + __field( unsigned short, ioprio ) __array( char, rwbs, RWBS_LEN ) __dynamic_array( char, cmd, 1 ) ), @@ -90,16 +99,20 @@ TRACE_EVENT(block_rq_requeue, __entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0; __entry->sector = blk_rq_trace_sector(rq); __entry->nr_sector = blk_rq_trace_nr_sectors(rq); + __entry->ioprio = rq->ioprio; blk_fill_rwbs(__entry->rwbs, rq->cmd_flags); __get_str(cmd)[0] = '\0'; ), - TP_printk("%d,%d %s (%s) %llu + %u [%d]", + TP_printk("%d,%d %s (%s) %llu + %u %s,%u,%u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __get_str(cmd), - (unsigned long long)__entry->sector, - __entry->nr_sector, 0) + (unsigned long long)__entry->sector, __entry->nr_sector, + __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio), + IOPRIO_CLASS_STRINGS), + IOPRIO_PRIO_HINT(__entry->ioprio), + IOPRIO_PRIO_LEVEL(__entry->ioprio), 0) ); DECLARE_EVENT_CLASS(block_rq_completion, @@ -113,6 +126,7 @@ DECLARE_EVENT_CLASS(block_rq_completion, __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( int , error ) + __field( unsigned short, ioprio ) __array( char, rwbs, RWBS_LEN ) __dynamic_array( char, cmd, 1 ) ), @@ -122,16 +136,20 @@ DECLARE_EVENT_CLASS(block_rq_completion, __entry->sector = blk_rq_pos(rq); __entry->nr_sector = nr_bytes >> 9; __entry->error = blk_status_to_errno(error); + __entry->ioprio = rq->ioprio; blk_fill_rwbs(__entry->rwbs, rq->cmd_flags); __get_str(cmd)[0] = '\0'; ), - TP_printk("%d,%d %s (%s) %llu + %u [%d]", + TP_printk("%d,%d %s (%s) %llu + %u %s,%u,%u [%d]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __get_str(cmd), - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->error) + (unsigned long long)__entry->sector, __entry->nr_sector, + __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio), + IOPRIO_CLASS_STRINGS), + IOPRIO_PRIO_HINT(__entry->ioprio), + IOPRIO_PRIO_LEVEL(__entry->ioprio), __entry->error) ); /** @@ -180,6 +198,7 @@ DECLARE_EVENT_CLASS(block_rq, __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( unsigned int, bytes ) + __field( unsigned short, ioprio ) __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) __dynamic_array( char, cmd, 1 ) @@ -190,17 +209,21 @@ DECLARE_EVENT_CLASS(block_rq, __entry->sector = blk_rq_trace_sector(rq); __entry->nr_sector = blk_rq_trace_nr_sectors(rq); __entry->bytes = blk_rq_bytes(rq); + __entry->ioprio = rq->ioprio; blk_fill_rwbs(__entry->rwbs, rq->cmd_flags); __get_str(cmd)[0] = '\0'; memcpy(__entry->comm, current->comm, TASK_COMM_LEN); ), - TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", + TP_printk("%d,%d %s %u (%s) %llu + %u %s,%u,%u [%s]", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, __entry->bytes, __get_str(cmd), - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + (unsigned long long)__entry->sector, __entry->nr_sector, + __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio), + IOPRIO_CLASS_STRINGS), + IOPRIO_PRIO_HINT(__entry->ioprio), + IOPRIO_PRIO_LEVEL(__entry->ioprio), __entry->comm) ); /** -- cgit v1.2.3 From c1385c1f0ba3b80bd12f26c440612175088c664c Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 29 May 2024 14:34:28 +0100 Subject: ACPI: processor: Simplify initial onlining to use same path for cold and hotplug Separate code paths, combined with a flag set in acpi_processor.c to indicate a struct acpi_processor was for a hotplugged CPU ensured that per CPU data was only set up the first time that a CPU was initialized. This appears to be unnecessary as the paths can be combined by letting the online logic also handle any CPUs online at the time of driver load. Motivation for this change, beyond simplification, is that ARM64 virtual CPU HP uses the same code paths for hotplug and cold path in acpi_processor.c so had no easy way to set the flag for hotplug only. Removing this necessity will enable ARM64 vCPU HP to reuse the existing code paths. Acked-by: Rafael J. Wysocki Reviewed-by: Hanjun Guo Tested-by: Miguel Luis Reviewed-by: Gavin Shan Reviewed-by: Miguel Luis Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240529133446.28446-2-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- include/acpi/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 3f34ebb27525..e6f6074eadbf 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -217,7 +217,7 @@ struct acpi_processor_flags { u8 has_lpi:1; u8 power_setup_done:1; u8 bm_rld_set:1; - u8 need_hotplug_init:1; + u8 previously_online:1; }; struct acpi_processor { -- cgit v1.2.3 From 36b921637e900c77f5f9bd5b45db522124068a96 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 29 May 2024 14:34:34 +0100 Subject: ACPI: processor: Add acpi_get_processor_handle() helper If CONFIG_ACPI_PROCESSOR provide a helper to retrieve the acpi_handle for a given CPU allowing access to methods in DSDT. Tested-by: Miguel Luis Reviewed-by: Gavin Shan Acked-by: Rafael J. Wysocki Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240529133446.28446-8-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- include/linux/acpi.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 28c3fb2bef0d..fd45bfab66b8 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -304,6 +304,8 @@ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int acpi_unmap_cpu(int cpu); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ +acpi_handle acpi_get_processor_handle(int cpu); + #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr); #endif @@ -1076,6 +1078,11 @@ static inline bool acpi_sleep_state_supported(u8 sleep_state) return false; } +static inline acpi_handle acpi_get_processor_handle(int cpu) +{ + return NULL; +} + #endif /* !CONFIG_ACPI */ extern void arch_post_acpi_subsys_init(void); -- cgit v1.2.3 From 3b9d0a78aeda194994892f7c42f6ca5feb45eadd Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 29 May 2024 14:34:37 +0100 Subject: ACPI: Add post_eject to struct acpi_scan_handler for cpu hotplug struct acpi_scan_handler has a detach callback that is used to remove a driver when a bus is changed. When interacting with an eject-request, the detach callback is called before _EJ0. This means the ACPI processor driver can't use _STA to determine if a CPU has been made not-present, or some of the other _STA bits have been changed. acpi_processor_remove() needs to know the value of _STA after _EJ0 has been called. Add a post_eject callback to struct acpi_scan_handler. This is called after acpi_scan_hot_remove() has successfully called _EJ0. Because acpi_scan_check_and_detach() also clears the handler pointer, it needs to be told if the caller will go on to call acpi_bus_post_eject(), so that acpi_device_clear_enumerated() and clearing the handler pointer can be deferred. An extra flag is added to flags field introduced in the previous patch to achieve this. Signed-off-by: James Morse Reviewed-by: Gavin Shan Tested-by: Miguel Luis Tested-by: Vishnu Pajjuri Tested-by: Jianyong Wu Acked-by: Rafael J. Wysocki Reviewed-by: Hanjun Guo Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240529133446.28446-11-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- include/acpi/acpi_bus.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 1a4dfd7a1c4a..05ca49478537 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -134,6 +134,7 @@ struct acpi_scan_handler { bool (*match)(const char *idstr, const struct acpi_device_id **matchid); int (*attach)(struct acpi_device *dev, const struct acpi_device_id *id); void (*detach)(struct acpi_device *dev); + void (*post_eject)(struct acpi_device *dev); void (*bind)(struct device *phys_dev); void (*unbind)(struct device *phys_dev); struct acpi_hotplug_profile hotplug; -- cgit v1.2.3 From d633da5d3ab1a0eb26a2213d65da1e189e82f8ab Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 29 May 2024 14:34:41 +0100 Subject: irqchip/gic-v3: Add support for ACPI's disabled but 'online capable' CPUs To support virtual CPU hotplug, ACPI has added an 'online capable' bit to the MADT GICC entries. This indicates a disabled CPU entry may not be possible to online via PSCI until firmware has set enabled bit in _STA. This means that a "usable" GIC redistributor is one that is marked as either enabled, or online capable. The meaning of the acpi_gicc_is_usable() would become less clear than just checking the pair of flags at call sites. As such, drop that helper function. The test in gic_acpi_match_gicc() remains as testing just the enabled bit so the count of enabled distributors is correct. What about the redistributor in the GICC entry? ACPI doesn't want to say. Assume the worst: When a redistributor is described in the GICC entry, but the entry is marked as disabled at boot, assume the redistributor is inaccessible. The GICv3 driver doesn't support late online of redistributors, so this means the corresponding CPU can't be brought online either. Rather than modifying cpu masks that may already have been used, register a new cpuhp callback to fail this case. This must run earlier than the main gic_starting_cpu() so that this case can be rejected before the section of cpuhp that runs on the CPU that is coming up as that is not allowed to fail. This solution keeps the handling of this broken firmware corner case local to the GIC driver. As precise ordering of this callback doesn't need to be controlled as long as it is in that initial prepare phase, use CPUHP_BP_PREPARE_DYN. Systems that want CPU hotplug in a VM can ensure their redistributors are always-on, and describe them that way with a GICR entry in the MADT. Suggested-by: Marc Zyngier Signed-off-by: James Morse Signed-off-by: Russell King (Oracle) Tested-by: Miguel Luis Co-developed-by: Jonathan Cameron Signed-off-by: Jonathan Cameron Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20240529133446.28446-15-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- include/linux/acpi.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index fd45bfab66b8..9f8c9d29b035 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -237,11 +237,6 @@ acpi_table_parse_cedt(enum acpi_cedt_type id, int acpi_parse_mcfg (struct acpi_table_header *header); void acpi_table_print_madt_entry (struct acpi_subtable_header *madt); -static inline bool acpi_gicc_is_usable(struct acpi_madt_generic_interrupt *gicc) -{ - return gicc->flags & ACPI_MADT_ENABLED; -} - #if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa); #else -- cgit v1.2.3 From 4e1a7df4548003fc081360b0f4edce3f7a991bfc Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 29 May 2024 14:34:46 +0100 Subject: cpumask: Add enabled cpumask for present CPUs that can be brought online The 'offline' file in sysfs shows all offline CPUs, including those that aren't present. User-space is expected to remove not-present CPUs from this list to learn which CPUs could be brought online. CPUs can be present but not-enabled. These CPUs can't be brought online until the firmware policy changes, which comes with an ACPI notification that will register the CPUs. With only the offline and present files, user-space is unable to determine which CPUs it can try to bring online. Add a new CPU mask that shows this based on all the registered CPUs. Signed-off-by: James Morse Tested-by: Miguel Luis Tested-by: Vishnu Pajjuri Tested-by: Jianyong Wu Acked-by: Thomas Gleixner Signed-off-by: Russell King (Oracle) Reviewed-by: Gavin Shan Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240529133446.28446-20-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas --- include/linux/cpumask.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 23686bed441d..954d4adc8f81 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -93,6 +93,7 @@ static inline void set_nr_cpu_ids(unsigned int nr) * * cpu_possible_mask- has bit 'cpu' set iff cpu is populatable * cpu_present_mask - has bit 'cpu' set iff cpu is populated + * cpu_enabled_mask - has bit 'cpu' set iff cpu can be brought online * cpu_online_mask - has bit 'cpu' set iff cpu available to scheduler * cpu_active_mask - has bit 'cpu' set iff cpu available to migration * @@ -125,11 +126,13 @@ static inline void set_nr_cpu_ids(unsigned int nr) extern struct cpumask __cpu_possible_mask; extern struct cpumask __cpu_online_mask; +extern struct cpumask __cpu_enabled_mask; extern struct cpumask __cpu_present_mask; extern struct cpumask __cpu_active_mask; extern struct cpumask __cpu_dying_mask; #define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask) #define cpu_online_mask ((const struct cpumask *)&__cpu_online_mask) +#define cpu_enabled_mask ((const struct cpumask *)&__cpu_enabled_mask) #define cpu_present_mask ((const struct cpumask *)&__cpu_present_mask) #define cpu_active_mask ((const struct cpumask *)&__cpu_active_mask) #define cpu_dying_mask ((const struct cpumask *)&__cpu_dying_mask) @@ -1075,6 +1078,7 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS); #else #define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask) #define for_each_online_cpu(cpu) for_each_cpu((cpu), cpu_online_mask) +#define for_each_enabled_cpu(cpu) for_each_cpu((cpu), cpu_enabled_mask) #define for_each_present_cpu(cpu) for_each_cpu((cpu), cpu_present_mask) #endif @@ -1092,6 +1096,15 @@ set_cpu_possible(unsigned int cpu, bool possible) cpumask_clear_cpu(cpu, &__cpu_possible_mask); } +static inline void +set_cpu_enabled(unsigned int cpu, bool can_be_onlined) +{ + if (can_be_onlined) + cpumask_set_cpu(cpu, &__cpu_enabled_mask); + else + cpumask_clear_cpu(cpu, &__cpu_enabled_mask); +} + static inline void set_cpu_present(unsigned int cpu, bool present) { @@ -1173,6 +1186,7 @@ static __always_inline unsigned int num_online_cpus(void) return raw_atomic_read(&__num_online_cpus); } #define num_possible_cpus() cpumask_weight(cpu_possible_mask) +#define num_enabled_cpus() cpumask_weight(cpu_enabled_mask) #define num_present_cpus() cpumask_weight(cpu_present_mask) #define num_active_cpus() cpumask_weight(cpu_active_mask) @@ -1181,6 +1195,11 @@ static inline bool cpu_online(unsigned int cpu) return cpumask_test_cpu(cpu, cpu_online_mask); } +static inline bool cpu_enabled(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_enabled_mask); +} + static inline bool cpu_possible(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_possible_mask); @@ -1205,6 +1224,7 @@ static inline bool cpu_dying(unsigned int cpu) #define num_online_cpus() 1U #define num_possible_cpus() 1U +#define num_enabled_cpus() 1U #define num_present_cpus() 1U #define num_active_cpus() 1U @@ -1218,6 +1238,11 @@ static inline bool cpu_possible(unsigned int cpu) return cpu == 0; } +static inline bool cpu_enabled(unsigned int cpu) +{ + return cpu == 0; +} + static inline bool cpu_present(unsigned int cpu) { return cpu == 0; -- cgit v1.2.3 From 791ed23f735b0fb1b984772edddf1571195780d8 Mon Sep 17 00:00:00 2001 From: Vladimir Lypak Date: Fri, 28 Jun 2024 16:01:44 +0200 Subject: dt-bindings: interconnect: qcom: Add Qualcomm MSM8953 NoC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the device-tree bindings for interconnect providers used on MSM8953 platform. Signed-off-by: Vladimir Lypak Reviewed-by: Krzysztof Kozlowski Signed-off-by: Barnabás Czémán Link: https://lore.kernel.org/r/20240628-msm8953-interconnect-v3-1-a70d582182dc@mainlining.org Signed-off-by: Georgi Djakov --- include/dt-bindings/interconnect/qcom,msm8953.h | 93 +++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 include/dt-bindings/interconnect/qcom,msm8953.h (limited to 'include') diff --git a/include/dt-bindings/interconnect/qcom,msm8953.h b/include/dt-bindings/interconnect/qcom,msm8953.h new file mode 100644 index 000000000000..12564c434af7 --- /dev/null +++ b/include/dt-bindings/interconnect/qcom,msm8953.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Qualcomm MSM8953 interconnect IDs + */ + +#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_MSM8953_H +#define __DT_BINDINGS_INTERCONNECT_QCOM_MSM8953_H + +/* BIMC fabric */ +#define MAS_APPS_PROC 0 +#define MAS_OXILI 1 +#define MAS_SNOC_BIMC_0 2 +#define MAS_SNOC_BIMC_2 3 +#define MAS_SNOC_BIMC_1 4 +#define MAS_TCU_0 5 +#define SLV_EBI 6 +#define SLV_BIMC_SNOC 7 + +/* PCNOC fabric */ +#define MAS_SPDM 0 +#define MAS_BLSP_1 1 +#define MAS_BLSP_2 2 +#define MAS_USB3 3 +#define MAS_CRYPTO 4 +#define MAS_SDCC_1 5 +#define MAS_SDCC_2 6 +#define MAS_SNOC_PCNOC 7 +#define PCNOC_M_0 8 +#define PCNOC_M_1 9 +#define PCNOC_INT_1 10 +#define PCNOC_INT_2 11 +#define PCNOC_S_0 12 +#define PCNOC_S_1 13 +#define PCNOC_S_2 14 +#define PCNOC_S_3 15 +#define PCNOC_S_4 16 +#define PCNOC_S_6 17 +#define PCNOC_S_7 18 +#define PCNOC_S_8 19 +#define PCNOC_S_9 20 +#define SLV_SPDM 21 +#define SLV_PDM 22 +#define SLV_TCSR 23 +#define SLV_SNOC_CFG 24 +#define SLV_TLMM 25 +#define SLV_MESSAGE_RAM 26 +#define SLV_BLSP_1 27 +#define SLV_BLSP_2 28 +#define SLV_PRNG 29 +#define SLV_CAMERA_SS_CFG 30 +#define SLV_DISP_SS_CFG 31 +#define SLV_VENUS_CFG 32 +#define SLV_GPU_CFG 33 +#define SLV_SDCC_1 34 +#define SLV_SDCC_2 35 +#define SLV_CRYPTO_0_CFG 36 +#define SLV_PMIC_ARB 37 +#define SLV_USB3 38 +#define SLV_IPA_CFG 39 +#define SLV_TCU 40 +#define SLV_PCNOC_SNOC 41 + +/* SNOC fabric */ +#define MAS_QDSS_BAM 0 +#define MAS_BIMC_SNOC 1 +#define MAS_PCNOC_SNOC 2 +#define MAS_IPA 3 +#define MAS_QDSS_ETR 4 +#define QDSS_INT 5 +#define SNOC_INT_0 6 +#define SNOC_INT_1 7 +#define SNOC_INT_2 8 +#define SLV_KPSS_AHB 9 +#define SLV_WCSS 10 +#define SLV_SNOC_BIMC_1 11 +#define SLV_IMEM 12 +#define SLV_SNOC_PCNOC 13 +#define SLV_QDSS_STM 14 +#define SLV_CATS_1 15 +#define SLV_LPASS 16 + +/* SNOC-MM fabric */ +#define MAS_JPEG 0 +#define MAS_MDP 1 +#define MAS_VENUS 2 +#define MAS_VFE0 3 +#define MAS_VFE1 4 +#define MAS_CPP 5 +#define SLV_SNOC_BIMC_0 6 +#define SLV_SNOC_BIMC_2 7 +#define SLV_CATS_0 8 + +#endif /* __DT_BINDINGS_INTERCONNECT_QCOM_MSM8953_H */ -- cgit v1.2.3 From ed2a2ef16a6b9197a0e452308bf6acee6e01f709 Mon Sep 17 00:00:00 2001 From: Sven Peter Date: Wed, 15 May 2024 18:02:58 +0000 Subject: Bluetooth: Add quirk to ignore reserved PHY bits in LE Extended Adv Report Some Broadcom controllers found on Apple Silicon machines abuse the reserved bits inside the PHY fields of LE Extended Advertising Report events for additional flags. Add a quirk to drop these and correctly extract the Primary/Secondary_PHY field. The following excerpt from a btmon trace shows a report received with "Reserved" for "Primary PHY" on a 4388 controller: > HCI Event: LE Meta Event (0x3e) plen 26 LE Extended Advertising Report (0x0d) Num reports: 1 Entry 0 Event type: 0x2515 Props: 0x0015 Connectable Directed Use legacy advertising PDUs Data status: Complete Reserved (0x2500) Legacy PDU Type: Reserved (0x2515) Address type: Random (0x01) Address: 00:00:00:00:00:00 (Static) Primary PHY: Reserved Secondary PHY: No packets SID: no ADI field (0xff) TX power: 127 dBm RSSI: -60 dBm (0xc4) Periodic advertising interval: 0.00 msec (0x0000) Direct address type: Public (0x00) Direct address: 00:00:00:00:00:00 (Apple, Inc.) Data length: 0x00 Cc: stable@vger.kernel.org Fixes: 2e7ed5f5e69b ("Bluetooth: hci_sync: Use advertised PHYs on hci_le_ext_create_conn_sync") Reported-by: Janne Grunau Closes: https://lore.kernel.org/all/Zjz0atzRhFykROM9@robin Tested-by: Janne Grunau Signed-off-by: Sven Peter Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index fe932ca3bc8c..e372a88e8c3f 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -324,6 +324,17 @@ enum { * claim to support it. */ HCI_QUIRK_BROKEN_READ_ENC_KEY_SIZE, + + /* + * When this quirk is set, the reserved bits of Primary/Secondary_PHY + * inside the LE Extended Advertising Report events are discarded. + * This is required for some Apple/Broadcom controllers which + * abuse these reserved bits for unrelated flags. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_FIXUP_LE_EXT_ADV_REPORT_PHY, }; /* HCI device flags */ -- cgit v1.2.3 From f1a8f402f13f94263cf349216c257b2985100927 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 24 Jun 2024 09:42:09 -0400 Subject: Bluetooth: L2CAP: Fix deadlock This fixes the following deadlock introduced by 39a92a55be13 ("bluetooth/l2cap: sync sock recv cb and release") ============================================ WARNING: possible recursive locking detected 6.10.0-rc3-g4029dba6b6f1 #6823 Not tainted -------------------------------------------- kworker/u5:0/35 is trying to acquire lock: ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at: l2cap_sock_recv_cb+0x44/0x1e0 but task is already holding lock: ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at: l2cap_get_chan_by_scid+0xaf/0xd0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&chan->lock#2/1); lock(&chan->lock#2/1); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by kworker/u5:0/35: #0: ffff888002b8a940 ((wq_completion)hci0#2){+.+.}-{0:0}, at: process_one_work+0x750/0x930 #1: ffff888002c67dd0 ((work_completion)(&hdev->rx_work)){+.+.}-{0:0}, at: process_one_work+0x44e/0x930 #2: ffff888002ec2510 (&chan->lock#2/1){+.+.}-{3:3}, at: l2cap_get_chan_by_scid+0xaf/0xd0 To fix the original problem this introduces l2cap_chan_lock at l2cap_conless_channel to ensure that l2cap_sock_recv_cb is called with chan->lock held. Fixes: 89e856e124f9 ("bluetooth/l2cap: sync sock recv cb and release") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 6a9d063e9f47..534c3386e714 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -38,6 +38,8 @@ int __hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen, int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u8 event, u32 timeout, struct sock *sk); +int hci_cmd_sync_status(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, u32 timeout); void hci_cmd_sync_init(struct hci_dev *hdev); void hci_cmd_sync_clear(struct hci_dev *hdev); -- cgit v1.2.3 From 43c0226c9ba56ffebdef9a858dbcf6eb0d376291 Mon Sep 17 00:00:00 2001 From: Dhruva Gole Date: Thu, 27 Jun 2024 11:31:17 +0530 Subject: cpufreq: make cpufreq_boost_enabled() return bool Since this function is supposed to return boost_enabled which is anyway a bool type make sure that it's return value is also marked as bool. This helps maintain better consistency in data types being used. Signed-off-by: Dhruva Gole Reviewed-by: Mario Limonciello Link: https://patch.msgid.link/20240627060117.1809477-1-d-gole@ti.com Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 20f7e98ee8af..6f57de7de433 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -785,7 +785,7 @@ ssize_t cpufreq_show_cpus(const struct cpumask *mask, char *buf); #ifdef CONFIG_CPU_FREQ int cpufreq_boost_trigger_state(int state); -int cpufreq_boost_enabled(void); +bool cpufreq_boost_enabled(void); int cpufreq_enable_boost_support(void); bool policy_has_boost_freq(struct cpufreq_policy *policy); @@ -1164,9 +1164,9 @@ static inline int cpufreq_boost_trigger_state(int state) { return 0; } -static inline int cpufreq_boost_enabled(void) +static inline bool cpufreq_boost_enabled(void) { - return 0; + return false; } static inline int cpufreq_enable_boost_support(void) -- cgit v1.2.3 From 0214b27fc949a7bcaf57e71a7f9f534d6481d08b Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Mon, 24 Jun 2024 13:46:00 -0400 Subject: iio: Add iio_read_channel_label to inkern API It can be convenient for other in-kernel drivers to reuse IIO channel labels. Export the iio_read_channel_label function to allow this. The signature is different depending on where we are calling it from, so the meat is moved to do_iio_read_channel_label. Signed-off-by: Sean Anderson Acked-by: Jonathan Cameron Link: https://patch.msgid.link/20240624174601.1527244-2-sean.anderson@linux.dev Signed-off-by: Jonathan Cameron --- include/linux/iio/consumer.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h index e9910b41d48e..333d1d8ccb37 100644 --- a/include/linux/iio/consumer.h +++ b/include/linux/iio/consumer.h @@ -441,4 +441,14 @@ ssize_t iio_read_channel_ext_info(struct iio_channel *chan, ssize_t iio_write_channel_ext_info(struct iio_channel *chan, const char *attr, const char *buf, size_t len); +/** + * iio_read_channel_label() - read label for a given channel + * @chan: The channel being queried. + * @buf: Where to store the attribute value. Assumed to hold + * at least PAGE_SIZE bytes. + * + * Returns the number of bytes written to buf, or an error code. + */ +ssize_t iio_read_channel_label(struct iio_channel *chan, char *buf); + #endif -- cgit v1.2.3 From 5476394aa9f27d670dd2bac426fdb6ac12b12cb3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Jun 2024 13:14:01 +0200 Subject: block: simplify queue_logical_block_size queue_logical_block_size is never called with a 0 queue, and the logical_block_size field in queue_limits is always initialized for a live queue. Signed-off-by: Christoph Hellwig Reviewed-by: John Garry Link: https://lore.kernel.org/r/20240627111407.476276-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 53c41ef4222c..4d0d4b83bc74 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1227,12 +1227,7 @@ static inline unsigned int bdev_max_segments(struct block_device *bdev) static inline unsigned queue_logical_block_size(const struct request_queue *q) { - int retval = 512; - - if (q && q->limits.logical_block_size) - retval = q->limits.logical_block_size; - - return retval; + return q->limits.logical_block_size; } static inline unsigned int bdev_logical_block_size(struct block_device *bdev) -- cgit v1.2.3 From 3ebbd9f6de7ec6d538639ebb657246f629ace81e Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 27 Jun 2024 16:33:46 +0100 Subject: net: move ethtool-related netdev state into its own struct net_dev->ethtool is a pointer to new struct ethtool_netdev_state, which currently contains only the wol_enabled field. Suggested-by: Jakub Kicinski Signed-off-by: Edward Cree Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/293a562278371de7534ed1eb17531838ca090633.1719502239.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 8 ++++++++ include/linux/netdevice.h | 8 +++++--- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c7f6f2bc9cac..374639e661d1 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1004,6 +1004,14 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *cmd, u32 *dev_speed, u8 *dev_duplex); +/** + * struct ethtool_netdev_state - per-netdevice state for ethtool features + * @wol_enabled: Wake-on-LAN is enabled + */ +struct ethtool_netdev_state { + unsigned wol_enabled:1; +}; + struct phy_device; struct phy_tdr_config; struct phy_plca_cfg; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1e3401093c13..3c719f0d5f5a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -80,6 +80,7 @@ struct xdp_buff; struct xdp_frame; struct xdp_metadata_ops; struct xdp_md; +struct ethtool_netdev_state; typedef u32 xdp_features_t; @@ -1986,8 +1987,6 @@ enum netdev_reg_state { * switch driver and used to set the phys state of the * switch port. * - * @wol_enabled: Wake-on-LAN is enabled - * * @threaded: napi threaded mode is enabled * * @module_fw_flash_in_progress: Module firmware flashing is in progress. @@ -2001,6 +2000,7 @@ enum netdev_reg_state { * @udp_tunnel_nic_info: static structure describing the UDP tunnel * offload capabilities of the device * @udp_tunnel_nic: UDP tunnel offload state + * @ethtool: ethtool related state * @xdp_state: stores info on attached XDP BPF programs * * @nested_level: Used as a parameter of spin_lock_nested() of @@ -2375,7 +2375,7 @@ struct net_device { struct lock_class_key *qdisc_tx_busylock; bool proto_down; bool threaded; - unsigned wol_enabled:1; + unsigned module_fw_flash_in_progress:1; struct list_head net_notifier_list; @@ -2386,6 +2386,8 @@ struct net_device { const struct udp_tunnel_nic_info *udp_tunnel_nic_info; struct udp_tunnel_nic *udp_tunnel_nic; + struct ethtool_netdev_state *ethtool; + /* protected by rtnl_lock */ struct bpf_xdp_entity xdp_state[__MAX_XDP_MODE]; -- cgit v1.2.3 From 6ad2962f8adfd53fca52dce7f830783e95d99ce7 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 27 Jun 2024 16:33:47 +0100 Subject: net: ethtool: attach an XArray of custom RSS contexts to a netdevice Each context stores the RXFH settings (indir, key, and hfunc) as well as optionally some driver private data. Delete any still-existing contexts at netdev unregister time. Signed-off-by: Edward Cree Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/cbd1c402cec38f2e03124f2ab65b4ae4e08bd90d.1719502240.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 374639e661d1..c741d6403364 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -159,6 +159,46 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) return index % n_rx_rings; } +/** + * struct ethtool_rxfh_context - a custom RSS context configuration + * @indir_size: Number of u32 entries in indirection table + * @key_size: Size of hash key, in bytes + * @priv_size: Size of driver private data, in bytes + * @hfunc: RSS hash function identifier. One of the %ETH_RSS_HASH_* + * @input_xfrm: Defines how the input data is transformed. Valid values are one + * of %RXH_XFRM_*. + * @indir_configured: indir has been specified (at create time or subsequently) + * @key_configured: hkey has been specified (at create time or subsequently) + */ +struct ethtool_rxfh_context { + u32 indir_size; + u32 key_size; + u16 priv_size; + u8 hfunc; + u8 input_xfrm; + u8 indir_configured:1; + u8 key_configured:1; + /* private: driver private data, indirection table, and hash key are + * stored sequentially in @data area. Use below helpers to access. + */ + u8 data[] __aligned(sizeof(void *)); +}; + +static inline void *ethtool_rxfh_context_priv(struct ethtool_rxfh_context *ctx) +{ + return ctx->data; +} + +static inline u32 *ethtool_rxfh_context_indir(struct ethtool_rxfh_context *ctx) +{ + return (u32 *)(ctx->data + ALIGN(ctx->priv_size, sizeof(u32))); +} + +static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx) +{ + return (u8 *)(ethtool_rxfh_context_indir(ctx) + ctx->indir_size); +} + /* declare a link mode bitmap */ #define __ETHTOOL_DECLARE_LINK_MODE_MASK(name) \ DECLARE_BITMAP(name, __ETHTOOL_LINK_MODE_MASK_NBITS) @@ -1006,9 +1046,11 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, /** * struct ethtool_netdev_state - per-netdevice state for ethtool features + * @rss_ctx: XArray of custom RSS contexts * @wol_enabled: Wake-on-LAN is enabled */ struct ethtool_netdev_state { + struct xarray rss_ctx; unsigned wol_enabled:1; }; -- cgit v1.2.3 From eac9122f0c41b832065e01977c34946ec8e76c24 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 27 Jun 2024 16:33:48 +0100 Subject: net: ethtool: record custom RSS contexts in the XArray Since drivers are still choosing the context IDs, we have to force the XArray to use the ID they've chosen rather than picking one ourselves, and handle the case where they give us an ID that's already in use. Signed-off-by: Edward Cree Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/801f5faa4cec87c65b2c6e27fb220c944bce593a.1719502240.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c741d6403364..43a2a143034f 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -199,6 +199,17 @@ static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx) return (u8 *)(ethtool_rxfh_context_indir(ctx) + ctx->indir_size); } +static inline size_t ethtool_rxfh_context_size(u32 indir_size, u32 key_size, + u16 priv_size) +{ + size_t indir_bytes = array_size(indir_size, sizeof(u32)); + size_t flex_len; + + flex_len = size_add(size_add(indir_bytes, key_size), + ALIGN(priv_size, sizeof(u32))); + return struct_size_t(struct ethtool_rxfh_context, data, flex_len); +} + /* declare a link mode bitmap */ #define __ETHTOOL_DECLARE_LINK_MODE_MASK(name) \ DECLARE_BITMAP(name, __ETHTOOL_LINK_MODE_MASK_NBITS) @@ -710,6 +721,8 @@ struct ethtool_rxfh_param { * contexts. * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor * RSS. + * @rxfh_priv_size: size of the driver private data area the core should + * allocate for an RSS context (in &struct ethtool_rxfh_context). * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. * @get_drvinfo: Report driver/device information. Modern drivers no @@ -895,6 +908,7 @@ struct ethtool_ops { u32 cap_link_lanes_supported:1; u32 cap_rss_ctx_supported:1; u32 cap_rss_sym_xor_supported:1; + u16 rxfh_priv_size; u32 supported_coalesce_params; u32 supported_ring_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); -- cgit v1.2.3 From 847a8ab186767be6ee95643f9739fa9d0f839589 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 27 Jun 2024 16:33:49 +0100 Subject: net: ethtool: let the core choose RSS context IDs Add a new API to create/modify/remove RSS contexts, that passes in the newly-chosen context ID (not as a pointer) rather than leaving the driver to choose it on create. Also pass in the ctx, allowing drivers to easily use its private data area to store their hardware-specific state. Keep the existing .set_rxfh API for now as a fallback, but deprecate it for custom contexts (rss_context != 0). Signed-off-by: Edward Cree Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/45f1fe61df2163c091ec394c9f52000c8b16cc3b.1719502240.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 43a2a143034f..4292a25b2427 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -723,6 +723,10 @@ struct ethtool_rxfh_param { * RSS. * @rxfh_priv_size: size of the driver private data area the core should * allocate for an RSS context (in &struct ethtool_rxfh_context). + * @rxfh_max_context_id: maximum (exclusive) supported RSS context ID. If this + * is zero then the core may choose any (nonzero) ID, otherwise the core + * will only use IDs strictly less than this value, as the @rss_context + * argument to @create_rxfh_context and friends. * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. * @get_drvinfo: Report driver/device information. Modern drivers no @@ -819,6 +823,32 @@ struct ethtool_rxfh_param { * will remain unchanged. * Returns a negative error code or zero. An error code must be returned * if at least one unsupported change was requested. + * @create_rxfh_context: Create a new RSS context with the specified RX flow + * hash indirection table, hash key, and hash function. + * The &struct ethtool_rxfh_context for this context is passed in @ctx; + * note that the indir table, hkey and hfunc are not yet populated as + * of this call. The driver does not need to update these; the core + * will do so if this op succeeds. + * However, if @rxfh.indir is set to %NULL, the driver must update the + * indir table in @ctx with the (default or inherited) table actually in + * use; similarly, if @rxfh.key is %NULL, @rxfh.hfunc is + * %ETH_RSS_HASH_NO_CHANGE, or @rxfh.input_xfrm is %RXH_XFRM_NO_CHANGE, + * the driver should update the corresponding information in @ctx. + * If the driver provides this method, it must also provide + * @modify_rxfh_context and @remove_rxfh_context. + * Returns a negative error code or zero. + * @modify_rxfh_context: Reconfigure the specified RSS context. Allows setting + * the contents of the RX flow hash indirection table, hash key, and/or + * hash function associated with the given context. + * Parameters which are set to %NULL or zero will remain unchanged. + * The &struct ethtool_rxfh_context for this context is passed in @ctx; + * note that it will still contain the *old* settings. The driver does + * not need to update these; the core will do so if this op succeeds. + * Returns a negative error code or zero. An error code must be returned + * if at least one unsupported change was requested. + * @remove_rxfh_context: Remove the specified RSS context. + * The &struct ethtool_rxfh_context for this context is passed in @ctx. + * Returns a negative error code or zero. * @get_channels: Get number of channels. * @set_channels: Set number of channels. Returns a negative error code or * zero. @@ -909,6 +939,7 @@ struct ethtool_ops { u32 cap_rss_ctx_supported:1; u32 cap_rss_sym_xor_supported:1; u16 rxfh_priv_size; + u32 rxfh_max_context_id; u32 supported_coalesce_params; u32 supported_ring_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); @@ -971,6 +1002,15 @@ struct ethtool_ops { int (*get_rxfh)(struct net_device *, struct ethtool_rxfh_param *); int (*set_rxfh)(struct net_device *, struct ethtool_rxfh_param *, struct netlink_ext_ack *extack); + int (*create_rxfh_context)(struct net_device *, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh); + int (*modify_rxfh_context)(struct net_device *, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh); + int (*remove_rxfh_context)(struct net_device *, + struct ethtool_rxfh_context *ctx, + u32 rss_context); void (*get_channels)(struct net_device *, struct ethtool_channels *); int (*set_channels)(struct net_device *, struct ethtool_channels *); int (*get_dump_flag)(struct net_device *, struct ethtool_dump *); -- cgit v1.2.3 From 30a32cdf6b130356805b3193a6208de25cbb2015 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 27 Jun 2024 16:33:50 +0100 Subject: net: ethtool: add an extack parameter to new rxfh_context APIs Currently passed as NULL, but will allow drivers to report back errors when ethnl support for these ops is added. Signed-off-by: Edward Cree Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/6e0012347d175fdd1280363d7bfa76a2f2777e17.1719502240.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 4292a25b2427..9cdbc8e3ed5c 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1004,13 +1004,16 @@ struct ethtool_ops { struct netlink_ext_ack *extack); int (*create_rxfh_context)(struct net_device *, struct ethtool_rxfh_context *ctx, - const struct ethtool_rxfh_param *rxfh); + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack); int (*modify_rxfh_context)(struct net_device *, struct ethtool_rxfh_context *ctx, - const struct ethtool_rxfh_param *rxfh); + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack); int (*remove_rxfh_context)(struct net_device *, struct ethtool_rxfh_context *ctx, - u32 rss_context); + u32 rss_context, + struct netlink_ext_ack *extack); void (*get_channels)(struct net_device *, struct ethtool_channels *); int (*set_channels)(struct net_device *, struct ethtool_channels *); int (*get_dump_flag)(struct net_device *, struct ethtool_dump *); -- cgit v1.2.3 From 87925151191b64d9623e63ccf11e517eacc99d7d Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 27 Jun 2024 16:33:51 +0100 Subject: net: ethtool: add a mutex protecting RSS contexts While this is not needed to serialise the ethtool entry points (which are all under RTNL), drivers may have cause to asynchronously access dev->ethtool->rss_ctx; taking dev->ethtool->rss_lock allows them to do this safely without needing to take the RTNL. Signed-off-by: Edward Cree Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/7f9c15eb7525bf87af62c275dde3a8570ee8bf0a.1719502240.git.ecree.xilinx@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 9cdbc8e3ed5c..f74bb0cf8ed1 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1104,10 +1104,13 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, /** * struct ethtool_netdev_state - per-netdevice state for ethtool features * @rss_ctx: XArray of custom RSS contexts + * @rss_lock: Protects entries in @rss_ctx. May be taken from + * within RTNL. * @wol_enabled: Wake-on-LAN is enabled */ struct ethtool_netdev_state { struct xarray rss_ctx; + struct mutex rss_lock; unsigned wol_enabled:1; }; -- cgit v1.2.3 From b8c7dd15ceb87e5f37ec1ed7b56c279d98f3eb53 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 11 Jun 2024 17:12:22 -0700 Subject: kernel-wide: fix spelling mistakes like "assocative" -> "associative" There were several instances of the string "assocat" in the kernel, which should have been spelled "associat", with the various endings of -ive, -ed, -ion, and sometimes beginnging with dis-. Add to the spelling dictionary the corrections so that future instances will be caught by checkpatch, and fix the instances found. Originally noticed by accident with a 'git grep socat'. Link: https://lkml.kernel.org/r/20240612001247.356867-1-jesse.brandeburg@intel.com Signed-off-by: Jesse Brandeburg Cc: Randy Dunlap Signed-off-by: Andrew Morton --- include/linux/nvme-fc-driver.h | 2 +- include/linux/soc/apple/rtkit.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 4109f1bd6128..1177dde77104 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -620,7 +620,7 @@ enum { * * Structure used between LLDD and nvmet-fc layer to represent the exchange * context for a FC-NVME FCP I/O operation (e.g. a nvme sqe, the sqe-related - * memory transfers, and its assocated cqe transfer). + * memory transfers, and its associated cqe transfer). * * The structure is allocated by the LLDD whenever a FCP CMD IU is received * from the FC link. The address of the structure is passed to the nvmet-fc diff --git a/include/linux/soc/apple/rtkit.h b/include/linux/soc/apple/rtkit.h index 8c9ca857ccf6..c06d17599ae7 100644 --- a/include/linux/soc/apple/rtkit.h +++ b/include/linux/soc/apple/rtkit.h @@ -69,7 +69,7 @@ struct apple_rtkit; * Initializes the internal state required to handle RTKit. This * should usually be called within _probe. * - * @dev: Pointer to the device node this coprocessor is assocated with + * @dev: Pointer to the device node this coprocessor is associated with * @cookie: opaque cookie passed to all functions defined in rtkit_ops * @mbox_name: mailbox name used to communicate with the co-processor * @mbox_idx: mailbox index to be used if mbox_name is NULL @@ -83,7 +83,7 @@ struct apple_rtkit *devm_apple_rtkit_init(struct device *dev, void *cookie, * Non-devm version of devm_apple_rtkit_init. Must be freed with * apple_rtkit_free. * - * @dev: Pointer to the device node this coprocessor is assocated with + * @dev: Pointer to the device node this coprocessor is associated with * @cookie: opaque cookie passed to all functions defined in rtkit_ops * @mbox_name: mailbox name used to communicate with the co-processor * @mbox_idx: mailbox index to be used if mbox_name is NULL -- cgit v1.2.3 From 7c812814e8c34a41bff6fe49987760ffaf8702af Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 24 Jun 2024 18:39:49 +0300 Subject: compiler.h: simplify data_race() macro -Wdeclaration-after-statement used since forever required statement expressions to inject __kcsan_disable_current(), __kcsan_enable_current() to mark data race. Now that it is gone, make macro expansion simpler. __unqual_scalar_typeof() is wordy macro by itself. "expr" is expanded twice. Link: https://lkml.kernel.org/r/fb62163f-ba21-4661-be5b-bb5124abc87d@p183 Signed-off-by: Alexey Dobriyan Reviewed-by: Marco Elver Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- include/linux/compiler.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 8c252e073bd8..2ea6120f3f7a 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -200,10 +200,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, */ #define data_race(expr) \ ({ \ - __unqual_scalar_typeof(({ expr; })) __v = ({ \ - __kcsan_disable_current(); \ - expr; \ - }); \ + __kcsan_disable_current(); \ + __auto_type __v = (expr); \ __kcsan_enable_current(); \ __v; \ }) -- cgit v1.2.3 From 32c1935280f11ef03efdb069aa46c0bc631eab7d Mon Sep 17 00:00:00 2001 From: Nils Rothaug Date: Sun, 23 Jun 2024 12:46:43 +0200 Subject: media: tuner-simple: Add support for Tena TNF931D-DFDR1 Tuner ranges were determined by USB capturing the vendor driver of a MyGica UTV3 video capture card. Signed-off-by: Nils Rothaug Signed-off-by: Sean Young Signed-off-by: Hans Verkuil --- include/media/tuner.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/media/tuner.h b/include/media/tuner.h index a7796e0a3659..c5fd6faabfd6 100644 --- a/include/media/tuner.h +++ b/include/media/tuner.h @@ -133,6 +133,7 @@ #define TUNER_SONY_BTF_PK467Z 90 /* NTSC_JP */ #define TUNER_SONY_BTF_PB463Z 91 /* NTSC */ #define TUNER_SI2157 92 +#define TUNER_TENA_TNF_931D_DFDR1 93 /* tv card specific */ #define TDA9887_PRESENT (1<<0) -- cgit v1.2.3 From e31604d5922ef76a58bd3f9ae719486887eb266e Mon Sep 17 00:00:00 2001 From: Nils Rothaug Date: Sun, 23 Jun 2024 12:46:44 +0200 Subject: media: rc: add keymap for MyGica UTV3 remote Add keymap for the simple IR (RC-5) remote that comes with the MyGica UTV3 Analog USB2.0 TV Box video capture card. Signed-off-by: Nils Rothaug Signed-off-by: Sean Young Signed-off-by: Hans Verkuil --- include/media/rc-map.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/media/rc-map.h b/include/media/rc-map.h index 4676545ffd8f..4867eb2f931e 100644 --- a/include/media/rc-map.h +++ b/include/media/rc-map.h @@ -290,6 +290,7 @@ struct rc_map *rc_map_get(const char *name); #define RC_MAP_MSI_DIGIVOX_III "rc-msi-digivox-iii" #define RC_MAP_MSI_TVANYWHERE "rc-msi-tvanywhere" #define RC_MAP_MSI_TVANYWHERE_PLUS "rc-msi-tvanywhere-plus" +#define RC_MAP_MYGICA_UTV3 "rc-mygica-utv3" #define RC_MAP_NEBULA "rc-nebula" #define RC_MAP_NEC_TERRATEC_CINERGY_XS "rc-nec-terratec-cinergy-xs" #define RC_MAP_NORWOOD "rc-norwood" -- cgit v1.2.3 From 3e26d9f08fbe0b73e951a5e810fdb7a332b7e37f Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Thu, 20 Jun 2024 14:27:22 +0200 Subject: iio: core: Add new DMABUF interface infrastructure Add the necessary infrastructure to the IIO core to support a new optional DMABUF based interface. With this new interface, DMABUF objects (externally created) can be attached to a IIO buffer, and subsequently used for data transfer. A userspace application can then use this interface to share DMABUF objects between several interfaces, allowing it to transfer data in a zero-copy fashion, for instance between IIO and the USB stack. The userspace application can also memory-map the DMABUF objects, and access the sample data directly. The advantage of doing this vs. the read() interface is that it avoids an extra copy of the data between the kernel and userspace. This is particularly userful for high-speed devices which produce several megabytes or even gigabytes of data per second. As part of the interface, 3 new IOCTLs have been added: IIO_BUFFER_DMABUF_ATTACH_IOCTL(int fd): Attach the DMABUF object identified by the given file descriptor to the buffer. IIO_BUFFER_DMABUF_DETACH_IOCTL(int fd): Detach the DMABUF object identified by the given file descriptor from the buffer. Note that closing the IIO buffer's file descriptor will automatically detach all previously attached DMABUF objects. IIO_BUFFER_DMABUF_ENQUEUE_IOCTL(struct iio_dmabuf *): Request a data transfer to/from the given DMABUF object. Its file descriptor, as well as the transfer size and flags are provided in the "iio_dmabuf" structure. These three IOCTLs have to be performed on the IIO buffer's file descriptor, obtained using the IIO_BUFFER_GET_FD_IOCTL() ioctl. Signed-off-by: Paul Cercueil Co-developed-by: Nuno Sa Signed-off-by: Nuno Sa Link: https://patch.msgid.link/20240620122726.41232-4-paul@crapouillou.net Signed-off-by: Jonathan Cameron --- include/linux/iio/buffer_impl.h | 33 +++++++++++++++++++++++++++++++++ include/uapi/linux/iio/buffer.h | 22 ++++++++++++++++++++++ 2 files changed, 55 insertions(+) (limited to 'include') diff --git a/include/linux/iio/buffer_impl.h b/include/linux/iio/buffer_impl.h index 89c3fd7c29ca..e72552e026f3 100644 --- a/include/linux/iio/buffer_impl.h +++ b/include/linux/iio/buffer_impl.h @@ -9,8 +9,12 @@ #include #include +struct dma_buf_attachment; +struct dma_fence; struct iio_dev; +struct iio_dma_buffer_block; struct iio_buffer; +struct sg_table; /** * INDIO_BUFFER_FLAG_FIXED_WATERMARK - Watermark level of the buffer can not be @@ -39,6 +43,16 @@ struct iio_buffer; * device stops sampling. Calles are balanced with @enable. * @release: called when the last reference to the buffer is dropped, * should free all resources allocated by the buffer. + * @attach_dmabuf: called from userspace via ioctl to attach one external + * DMABUF. + * @detach_dmabuf: called from userspace via ioctl to detach one previously + * attached DMABUF. + * @enqueue_dmabuf: called from userspace via ioctl to queue this DMABUF + * object to this buffer. Requires a valid DMABUF fd, that + * was previouly attached to this buffer. + * @lock_queue: called when the core needs to lock the buffer queue; + * it is used when enqueueing DMABUF objects. + * @unlock_queue: used to unlock a previously locked buffer queue * @modes: Supported operating modes by this buffer type * @flags: A bitmask combination of INDIO_BUFFER_FLAG_* * @@ -68,6 +82,17 @@ struct iio_buffer_access_funcs { void (*release)(struct iio_buffer *buffer); + struct iio_dma_buffer_block * (*attach_dmabuf)(struct iio_buffer *buffer, + struct dma_buf_attachment *attach); + void (*detach_dmabuf)(struct iio_buffer *buffer, + struct iio_dma_buffer_block *block); + int (*enqueue_dmabuf)(struct iio_buffer *buffer, + struct iio_dma_buffer_block *block, + struct dma_fence *fence, struct sg_table *sgt, + size_t size, bool cyclic); + void (*lock_queue)(struct iio_buffer *buffer); + void (*unlock_queue)(struct iio_buffer *buffer); + unsigned int modes; unsigned int flags; }; @@ -136,6 +161,12 @@ struct iio_buffer { /* @ref: Reference count of the buffer. */ struct kref ref; + + /* @dmabufs: List of DMABUF attachments */ + struct list_head dmabufs; /* P: dmabufs_mutex */ + + /* @dmabufs_mutex: Protects dmabufs */ + struct mutex dmabufs_mutex; }; /** @@ -159,6 +190,8 @@ void iio_buffer_init(struct iio_buffer *buffer); struct iio_buffer *iio_buffer_get(struct iio_buffer *buffer); void iio_buffer_put(struct iio_buffer *buffer); +void iio_buffer_signal_dmabuf_done(struct dma_fence *fence, int ret); + #else /* CONFIG_IIO_BUFFER */ static inline void iio_buffer_get(struct iio_buffer *buffer) {} diff --git a/include/uapi/linux/iio/buffer.h b/include/uapi/linux/iio/buffer.h index 13939032b3f6..c666aa95e532 100644 --- a/include/uapi/linux/iio/buffer.h +++ b/include/uapi/linux/iio/buffer.h @@ -5,6 +5,28 @@ #ifndef _UAPI_IIO_BUFFER_H_ #define _UAPI_IIO_BUFFER_H_ +#include + +/* Flags for iio_dmabuf.flags */ +#define IIO_BUFFER_DMABUF_CYCLIC (1 << 0) +#define IIO_BUFFER_DMABUF_SUPPORTED_FLAGS 0x00000001 + +/** + * struct iio_dmabuf - Descriptor for a single IIO DMABUF object + * @fd: file descriptor of the DMABUF object + * @flags: one or more IIO_BUFFER_DMABUF_* flags + * @bytes_used: number of bytes used in this DMABUF for the data transfer. + * Should generally be set to the DMABUF's size. + */ +struct iio_dmabuf { + __u32 fd; + __u32 flags; + __u64 bytes_used; +}; + #define IIO_BUFFER_GET_FD_IOCTL _IOWR('i', 0x91, int) +#define IIO_BUFFER_DMABUF_ATTACH_IOCTL _IOW('i', 0x92, int) +#define IIO_BUFFER_DMABUF_DETACH_IOCTL _IOW('i', 0x93, int) +#define IIO_BUFFER_DMABUF_ENQUEUE_IOCTL _IOW('i', 0x94, struct iio_dmabuf) #endif /* _UAPI_IIO_BUFFER_H_ */ -- cgit v1.2.3 From d85318900c1c06a251ad3d86fba6bbab116a95d5 Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Thu, 20 Jun 2024 14:27:23 +0200 Subject: iio: buffer-dma: Enable support for DMABUFs Implement iio_dma_buffer_attach_dmabuf(), iio_dma_buffer_detach_dmabuf() and iio_dma_buffer_transfer_dmabuf(), which can then be used by the IIO DMA buffer implementations. Signed-off-by: Paul Cercueil Co-developed-by: Nuno Sa Signed-off-by: Nuno Sa Link: https://patch.msgid.link/20240620122726.41232-5-paul@crapouillou.net Signed-off-by: Jonathan Cameron --- include/linux/iio/buffer-dma.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include') diff --git a/include/linux/iio/buffer-dma.h b/include/linux/iio/buffer-dma.h index 6e27e47077d5..5eb66a399002 100644 --- a/include/linux/iio/buffer-dma.h +++ b/include/linux/iio/buffer-dma.h @@ -7,6 +7,7 @@ #ifndef __INDUSTRIALIO_DMA_BUFFER_H__ #define __INDUSTRIALIO_DMA_BUFFER_H__ +#include #include #include #include @@ -16,6 +17,9 @@ struct iio_dma_buffer_queue; struct iio_dma_buffer_ops; struct device; +struct dma_buf_attachment; +struct dma_fence; +struct sg_table; /** * enum iio_block_state - State of a struct iio_dma_buffer_block @@ -41,6 +45,10 @@ enum iio_block_state { * @queue: Parent DMA buffer queue * @kref: kref used to manage the lifetime of block * @state: Current state of the block + * @cyclic: True if this is a cyclic buffer + * @fileio: True if this buffer is used for fileio mode + * @sg_table: DMA table for the transfer when transferring a DMABUF + * @fence: DMA fence to be signaled when a DMABUF transfer is complete */ struct iio_dma_buffer_block { /* May only be accessed by the owner of the block */ @@ -63,6 +71,12 @@ struct iio_dma_buffer_block { * queue->list_lock if the block is not owned by the core. */ enum iio_block_state state; + + bool cyclic; + bool fileio; + + struct sg_table *sg_table; + struct dma_fence *fence; }; /** @@ -72,6 +86,7 @@ struct iio_dma_buffer_block { * @pos: Read offset in the active block * @block_size: Size of each block * @next_dequeue: index of next block that will be dequeued + * @enabled: Whether the buffer is operating in fileio mode */ struct iio_dma_buffer_queue_fileio { struct iio_dma_buffer_block *blocks[2]; @@ -80,6 +95,7 @@ struct iio_dma_buffer_queue_fileio { size_t block_size; unsigned int next_dequeue; + bool enabled; }; /** @@ -95,6 +111,7 @@ struct iio_dma_buffer_queue_fileio { * the DMA controller * @incoming: List of buffers on the incoming queue * @active: Whether the buffer is currently active + * @num_dmabufs: Total number of DMABUFs attached to this queue * @fileio: FileIO state */ struct iio_dma_buffer_queue { @@ -107,6 +124,7 @@ struct iio_dma_buffer_queue { struct list_head incoming; bool active; + atomic_t num_dmabufs; struct iio_dma_buffer_queue_fileio fileio; }; @@ -144,4 +162,17 @@ int iio_dma_buffer_init(struct iio_dma_buffer_queue *queue, void iio_dma_buffer_exit(struct iio_dma_buffer_queue *queue); void iio_dma_buffer_release(struct iio_dma_buffer_queue *queue); +struct iio_dma_buffer_block * +iio_dma_buffer_attach_dmabuf(struct iio_buffer *buffer, + struct dma_buf_attachment *attach); +void iio_dma_buffer_detach_dmabuf(struct iio_buffer *buffer, + struct iio_dma_buffer_block *block); +int iio_dma_buffer_enqueue_dmabuf(struct iio_buffer *buffer, + struct iio_dma_buffer_block *block, + struct dma_fence *fence, + struct sg_table *sgt, + size_t size, bool cyclic); +void iio_dma_buffer_lock_queue(struct iio_buffer *buffer); +void iio_dma_buffer_unlock_queue(struct iio_buffer *buffer); + #endif -- cgit v1.2.3 From dbbe7eaf0e4795bf003ac06872aaf52b6b6b1310 Mon Sep 17 00:00:00 2001 From: Nuno Sa Date: Thu, 6 Jun 2024 09:22:37 +0200 Subject: dev_printk: add new dev_err_probe() helpers This is similar to dev_err_probe() but for cases where an ERR_PTR() or ERR_CAST() is to be returned simplifying patterns like: dev_err_probe(dev, ret, ...); return ERR_PTR(ret) or dev_err_probe(dev, PTR_ERR(ptr), ...); return ERR_CAST(ptr) Signed-off-by: Nuno Sa Link: https://patch.msgid.link/20240606-dev-add_dev_errp_probe-v3-1-51bb229edd79@analog.com Signed-off-by: Jonathan Cameron --- include/linux/dev_printk.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/dev_printk.h b/include/linux/dev_printk.h index ae80a303c216..ca32b5bb28eb 100644 --- a/include/linux/dev_printk.h +++ b/include/linux/dev_printk.h @@ -277,4 +277,12 @@ do { \ __printf(3, 4) int dev_err_probe(const struct device *dev, int err, const char *fmt, ...); +/* Simple helper for dev_err_probe() when ERR_PTR() is to be returned. */ +#define dev_err_ptr_probe(dev, ___err, fmt, ...) \ + ERR_PTR(dev_err_probe(dev, ___err, fmt, ##__VA_ARGS__)) + +/* Simple helper for dev_err_probe() when ERR_CAST() is to be returned. */ +#define dev_err_cast_probe(dev, ___err_ptr, fmt, ...) \ + ERR_PTR(dev_err_probe(dev, PTR_ERR(___err_ptr), fmt, ##__VA_ARGS__)) + #endif /* _DEVICE_PRINTK_H_ */ -- cgit v1.2.3 From f6549f538fe0b2c389e1a7037f4e21039e25137a Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Sat, 29 Jun 2024 14:42:12 +0200 Subject: ata,scsi: libata-core: Do not leak memory for ata_port struct members libsas is currently not freeing all the struct ata_port struct members, e.g. ncq_sense_buf for a driver supporting Command Duration Limits (CDL). Add a function, ata_port_free(), that is used to free a ata_port, including its struct members. It makes sense to keep the code related to freeing a ata_port in its own function, which will also free all the struct members of struct ata_port. Fixes: 18bd7718b5c4 ("scsi: ata: libata: Handle completion of CDL commands using policy 0xD") Reviewed-by: John Garry Link: https://lore.kernel.org/r/20240629124210.181537-8-cassel@kernel.org Signed-off-by: Niklas Cassel --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 13fb41d25da6..7d3bd7c9664a 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1249,6 +1249,7 @@ extern int ata_slave_link_init(struct ata_port *ap); extern struct ata_port *ata_sas_port_alloc(struct ata_host *, struct ata_port_info *, struct Scsi_Host *); extern void ata_port_probe(struct ata_port *ap); +extern void ata_port_free(struct ata_port *ap); extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap); extern void ata_sas_tport_delete(struct ata_port *ap); int ata_sas_device_configure(struct scsi_device *sdev, struct queue_limits *lim, -- cgit v1.2.3 From 61842868de13aa7fd7391c626e889f4d6f1450bf Mon Sep 17 00:00:00 2001 From: Jose Ignacio Tornos Martinez Date: Fri, 10 May 2024 10:57:22 +0200 Subject: module: create weak dependecies It has been seen that for some network mac drivers (i.e. lan78xx) the related module for the phy is loaded dynamically depending on the current hardware. In this case, the associated phy is read using mdio bus and then the associated phy module is loaded during runtime (kernel function phy_request_driver_module). However, no software dependency is defined, so the user tools will no be able to get this dependency. For example, if dracut is used and the hardware is present, lan78xx will be included but no phy module will be added, and in the next restart the device will not work from boot because no related phy will be found during initramfs stage. In order to solve this, we could define a normal 'pre' software dependency in lan78xx module with all the possible phy modules (there may be some), but proceeding in that way, all the possible phy modules would be loaded while only one is necessary. The idea is to create a new type of dependency, that we are going to call 'weak' to be used only by the user tools that need to detect this situation. In that way, for example, dracut could check the 'weak' dependency of the modules involved in order to install these dependencies in initramfs too. That is, for the commented lan78xx module, defining the 'weak' dependency with the possible phy modules list, only the necessary phy would be loaded on demand keeping the same behavior, but all the possible phy modules would be available from initramfs. The 'weak' dependency support has been included in kmod: https://github.com/kmod-project/kmod/commit/05828b4a6e9327a63ef94df544a042b5e9ce4fe7 But, take into account that this can only be used if depmod is new enough. If it isn't, depmod will have the same behavior as always (keeping backward compatibility) and the information for the 'weak' dependency will not be provided. Signed-off-by: Jose Ignacio Tornos Martinez Reviewed-by: Lucas De Marchi Signed-off-by: Luis Chamberlain --- include/linux/module.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/module.h b/include/linux/module.h index ffa1c603163c..ae19bf7e3131 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -173,6 +173,12 @@ extern void cleanup_module(void); */ #define MODULE_SOFTDEP(_softdep) MODULE_INFO(softdep, _softdep) +/* + * Weak module dependencies. See man modprobe.d for details. + * Example: MODULE_WEAKDEP("module-foo") + */ +#define MODULE_WEAKDEP(_weakdep) MODULE_INFO(weakdep, _weakdep) + /* * MODULE_FILE is used for generating modules.builtin * So, make it no-op when this is being built as a module -- cgit v1.2.3 From 5b8feca8dee443055049c8ccfdd97f64a0e1d2b4 Mon Sep 17 00:00:00 2001 From: Daisuke Nojiri Date: Thu, 27 Jun 2024 16:53:08 -0700 Subject: dt-bindings: input: cros-ec-keyboard: Add keyboard matrix v3.0 Add support for keyboard matrix version 3.0, which reduces keyboard ghosting. Signed-off-by: Daisuke Nojiri Acked-by: Krzysztof Kozlowski Reviewed-by: Dmitry Torokhov Link: https://lore.kernel.org/r/9ae4d96cc2ce8c9de8755b9beffb78c641100fe7.1719531519.git.dnojiri@chromium.org Signed-off-by: Tzung-Bi Shih --- include/dt-bindings/input/cros-ec-keyboard.h | 104 +++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/input/cros-ec-keyboard.h b/include/dt-bindings/input/cros-ec-keyboard.h index f0ae03634a96..afc12f6aa642 100644 --- a/include/dt-bindings/input/cros-ec-keyboard.h +++ b/include/dt-bindings/input/cros-ec-keyboard.h @@ -100,4 +100,108 @@ MATRIX_KEY(0x07, 0x0b, KEY_UP) \ MATRIX_KEY(0x07, 0x0c, KEY_LEFT) +/* No numpad */ +#define CROS_TOP_ROW_KEYMAP_V30 \ + MATRIX_KEY(0x00, 0x01, KEY_F11) /* T11 */ \ + MATRIX_KEY(0x00, 0x02, KEY_F1) /* T1 */ \ + MATRIX_KEY(0x00, 0x04, KEY_F10) /* T10 */ \ + MATRIX_KEY(0x00, 0x0b, KEY_F14) /* T14 */ \ + MATRIX_KEY(0x00, 0x0c, KEY_F15) /* T15 */ \ + MATRIX_KEY(0x01, 0x02, KEY_F4) /* T4 */ \ + MATRIX_KEY(0x01, 0x04, KEY_F7) /* T7 */ \ + MATRIX_KEY(0x01, 0x05, KEY_F12) /* T12 */ \ + MATRIX_KEY(0x01, 0x09, KEY_F9) /* T9 */ \ + MATRIX_KEY(0x02, 0x02, KEY_F3) /* T3 */ \ + MATRIX_KEY(0x02, 0x04, KEY_F6) /* T6 */ \ + MATRIX_KEY(0x02, 0x0b, KEY_F8) /* T8 */ \ + MATRIX_KEY(0x03, 0x02, KEY_F2) /* T2 */ \ + MATRIX_KEY(0x03, 0x05, KEY_F13) /* T13 */ \ + MATRIX_KEY(0x04, 0x04, KEY_F5) /* T5 */ + +#define CROS_MAIN_KEYMAP_V30 /* Keycode */ \ + MATRIX_KEY(0x00, 0x03, KEY_B) /* 50 */ \ + MATRIX_KEY(0x00, 0x05, KEY_N) /* 51 */ \ + MATRIX_KEY(0x00, 0x06, KEY_RO) /* 56 (JIS) */ \ + MATRIX_KEY(0x00, 0x08, KEY_EQUAL) /* 13 */ \ + MATRIX_KEY(0x00, 0x09, KEY_HOME) /* 80 (Numpad) */ \ + MATRIX_KEY(0x00, 0x0a, KEY_RIGHTALT) /* 62 */ \ + MATRIX_KEY(0x00, 0x10, KEY_FN) /* 127 */ \ + \ + MATRIX_KEY(0x01, 0x01, KEY_ESC) /* 110 */ \ + MATRIX_KEY(0x01, 0x03, KEY_G) /* 35 */ \ + MATRIX_KEY(0x01, 0x06, KEY_H) /* 36 */ \ + MATRIX_KEY(0x01, 0x08, KEY_APOSTROPHE) /* 41 */ \ + MATRIX_KEY(0x01, 0x0b, KEY_BACKSPACE) /* 15 */ \ + MATRIX_KEY(0x01, 0x0c, KEY_HENKAN) /* 65 (JIS) */ \ + MATRIX_KEY(0x01, 0x0e, KEY_LEFTCTRL) /* 58 */ \ + \ + MATRIX_KEY(0x02, 0x01, KEY_TAB) /* 16 */ \ + MATRIX_KEY(0x02, 0x03, KEY_T) /* 21 */ \ + MATRIX_KEY(0x02, 0x05, KEY_RIGHTBRACE) /* 28 */ \ + MATRIX_KEY(0x02, 0x06, KEY_Y) /* 22 */ \ + MATRIX_KEY(0x02, 0x08, KEY_LEFTBRACE) /* 27 */ \ + MATRIX_KEY(0x02, 0x09, KEY_DELETE) /* 76 (Numpad) */ \ + MATRIX_KEY(0x02, 0x0c, KEY_PAGEUP) /* 85 (Numpad) */ \ + MATRIX_KEY(0x02, 0x011, KEY_YEN) /* 14 (JIS) */ \ + \ + MATRIX_KEY(0x03, 0x00, KEY_LEFTMETA) /* Launcher */ \ + MATRIX_KEY(0x03, 0x01, KEY_GRAVE) /* 1 */ \ + MATRIX_KEY(0x03, 0x03, KEY_5) /* 6 */ \ + MATRIX_KEY(0x03, 0x04, KEY_S) /* 32 */ \ + MATRIX_KEY(0x03, 0x06, KEY_MINUS) /* 12 */ \ + MATRIX_KEY(0x03, 0x08, KEY_6) /* 7 */ \ + MATRIX_KEY(0x03, 0x09, KEY_SLEEP) /* Lock */ \ + MATRIX_KEY(0x03, 0x0b, KEY_BACKSLASH) /* 29 */ \ + MATRIX_KEY(0x03, 0x0c, KEY_MUHENKAN) /* 63 (JIS) */ \ + MATRIX_KEY(0x03, 0x0e, KEY_RIGHTCTRL) /* 64 */ \ + \ + MATRIX_KEY(0x04, 0x01, KEY_A) /* 31 */ \ + MATRIX_KEY(0x04, 0x02, KEY_D) /* 33 */ \ + MATRIX_KEY(0x04, 0x03, KEY_F) /* 34 */ \ + MATRIX_KEY(0x04, 0x05, KEY_K) /* 38 */ \ + MATRIX_KEY(0x04, 0x06, KEY_J) /* 37 */ \ + MATRIX_KEY(0x04, 0x08, KEY_SEMICOLON) /* 40 */ \ + MATRIX_KEY(0x04, 0x09, KEY_L) /* 39 */ \ + MATRIX_KEY(0x04, 0x0b, KEY_ENTER) /* 43 */ \ + MATRIX_KEY(0x04, 0x0c, KEY_END) /* 81 (Numpad) */ \ + \ + MATRIX_KEY(0x05, 0x01, KEY_1) /* 2 */ \ + MATRIX_KEY(0x05, 0x02, KEY_COMMA) /* 53 */ \ + MATRIX_KEY(0x05, 0x03, KEY_DOT) /* 54 */ \ + MATRIX_KEY(0x05, 0x04, KEY_SLASH) /* 55 */ \ + MATRIX_KEY(0x05, 0x05, KEY_C) /* 48 */ \ + MATRIX_KEY(0x05, 0x06, KEY_SPACE) /* 61 */ \ + MATRIX_KEY(0x05, 0x07, KEY_LEFTSHIFT) /* 44 */ \ + MATRIX_KEY(0x05, 0x08, KEY_X) /* 47 */ \ + MATRIX_KEY(0x05, 0x09, KEY_V) /* 49 */ \ + MATRIX_KEY(0x05, 0x0b, KEY_M) /* 52 */ \ + MATRIX_KEY(0x05, 0x0c, KEY_PAGEDOWN) /* 86 (Numpad) */ \ + \ + MATRIX_KEY(0x06, 0x01, KEY_Z) /* 46 */ \ + MATRIX_KEY(0x06, 0x02, KEY_3) /* 4 */ \ + MATRIX_KEY(0x06, 0x03, KEY_4) /* 5 */ \ + MATRIX_KEY(0x06, 0x04, KEY_2) /* 3 */ \ + MATRIX_KEY(0x06, 0x05, KEY_8) /* 9 */ \ + MATRIX_KEY(0x06, 0x06, KEY_0) /* 11 */ \ + MATRIX_KEY(0x06, 0x08, KEY_7) /* 8 */ \ + MATRIX_KEY(0x06, 0x09, KEY_9) /* 10 */ \ + MATRIX_KEY(0x06, 0x0b, KEY_DOWN) /* 84 */ \ + MATRIX_KEY(0x06, 0x0c, KEY_RIGHT) /* 89 */ \ + MATRIX_KEY(0x06, 0x0d, KEY_LEFTALT) /* 60 */ \ + MATRIX_KEY(0x06, 0x0f, KEY_ASSISTANT) /* 128 */ \ + MATRIX_KEY(0x06, 0x11, KEY_BACKSLASH) /* 42 (JIS, ISO) */ \ + \ + MATRIX_KEY(0x07, 0x01, KEY_U) /* 23 */ \ + MATRIX_KEY(0x07, 0x02, KEY_I) /* 24 */ \ + MATRIX_KEY(0x07, 0x03, KEY_O) /* 25 */ \ + MATRIX_KEY(0x07, 0x04, KEY_P) /* 26 */ \ + MATRIX_KEY(0x07, 0x05, KEY_Q) /* 17 */ \ + MATRIX_KEY(0x07, 0x06, KEY_W) /* 18 */ \ + MATRIX_KEY(0x07, 0x07, KEY_RIGHTSHIFT) /* 57 */ \ + MATRIX_KEY(0x07, 0x08, KEY_E) /* 19 */ \ + MATRIX_KEY(0x07, 0x09, KEY_R) /* 20 */ \ + MATRIX_KEY(0x07, 0x0b, KEY_UP) /* 83 */ \ + MATRIX_KEY(0x07, 0x0c, KEY_LEFT) /* 79 */ \ + MATRIX_KEY(0x07, 0x11, KEY_102ND) /* 45 (ISO) */ + #endif /* _CROS_EC_KEYBOARD_H */ -- cgit v1.2.3 From 7c8110057b1bb9ab9f47ac0efc554d5c3a53b693 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Jun 2024 14:35:50 -0700 Subject: tcp_metrics: add UAPI to the header guard tcp_metrics' header lacks the customary _UAPI in the header guard. This makes YNL build rules work less seamlessly. We can easily fix that on YNL side, but this could also be problematic if we ever needed to create a kernel-only tcp_metrics.h. Signed-off-by: Jakub Kicinski Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- include/uapi/linux/tcp_metrics.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h index 7cb4a172feed..c48841076998 100644 --- a/include/uapi/linux/tcp_metrics.h +++ b/include/uapi/linux/tcp_metrics.h @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* tcp_metrics.h - TCP Metrics Interface */ -#ifndef _LINUX_TCP_METRICS_H -#define _LINUX_TCP_METRICS_H +#ifndef _UAPI_LINUX_TCP_METRICS_H +#define _UAPI_LINUX_TCP_METRICS_H #include @@ -58,4 +58,4 @@ enum { #define TCP_METRICS_CMD_MAX (__TCP_METRICS_CMD_MAX - 1) -#endif /* _LINUX_TCP_METRICS_H */ +#endif /* _UAPI_LINUX_TCP_METRICS_H */ -- cgit v1.2.3 From 85674625e0bc25be4dbaa165a556ef6037328379 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 27 Jun 2024 14:35:51 -0700 Subject: tcp_metrics: add netlink protocol spec in YAML Add a protocol spec for tcp_metrics, so that it's accessible via YNL. Useful at the very least for testing fixes. In this episode of "10,000 ways to complicate netlink" the metric nest has defines which are off by 1. iproute2 does: struct rtattr *m[TCP_METRIC_MAX + 1 + 1]; parse_rtattr_nested(m, TCP_METRIC_MAX + 1, a); for (i = 0; i < TCP_METRIC_MAX + 1; i++) { // ... attr = m[i + 1]; This is too weird to support in YNL, add a new set of defines with _correct_ values to the official kernel header. Signed-off-by: Jakub Kicinski Reviewed-by: Donald Hunter Signed-off-by: David S. Miller --- include/uapi/linux/tcp_metrics.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h index c48841076998..927c735a5b0e 100644 --- a/include/uapi/linux/tcp_metrics.h +++ b/include/uapi/linux/tcp_metrics.h @@ -27,6 +27,22 @@ enum tcp_metric_index { #define TCP_METRIC_MAX (__TCP_METRIC_MAX - 1) +/* Re-define enum tcp_metric_index, again, using the values carried + * as netlink attribute types. + */ +enum { + TCP_METRICS_A_METRICS_RTT = 1, + TCP_METRICS_A_METRICS_RTTVAR, + TCP_METRICS_A_METRICS_SSTHRESH, + TCP_METRICS_A_METRICS_CWND, + TCP_METRICS_A_METRICS_REODERING, + TCP_METRICS_A_METRICS_RTT_US, + TCP_METRICS_A_METRICS_RTTVAR_US, + + __TCP_METRICS_A_METRICS_MAX +}; +#define TCP_METRICS_A_METRICS_MAX (__TCP_METRICS_A_METRICS_MAX - 1) + enum { TCP_METRICS_ATTR_UNSPEC, TCP_METRICS_ATTR_ADDR_IPV4, /* u32 */ -- cgit v1.2.3 From 65528cfb21fdb68de8ae6dccae19af180d93e143 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Sun, 16 Jun 2024 19:08:34 +0300 Subject: net/mlx5: mlx5_ifc update for multi-plane support Add new fields to support mlx5 multi-plane feature. Actual support will be added in following patches. Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/36a74a1b1d2b7b59c99cda4abad1794ddde30230.1718553901.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 09d9d87d62c6..61738990e399 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -793,7 +793,7 @@ struct mlx5_ifc_ads_bits { u8 reserved_at_2[0xe]; u8 pkey_index[0x10]; - u8 reserved_at_20[0x8]; + u8 plane_index[0x8]; u8 grh[0x1]; u8 mlid[0x7]; u8 rlid[0x10]; @@ -1990,7 +1990,8 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_c0[0x8]; u8 migration_multi_load[0x1]; u8 migration_tracking_state[0x1]; - u8 reserved_at_ca[0x6]; + u8 multiplane_qp_ud[0x1]; + u8 reserved_at_cb[0x5]; u8 migration_in_chunks[0x1]; u8 reserved_at_d1[0xf]; @@ -4172,7 +4173,8 @@ struct mlx5_ifc_hca_vport_context_bits { u8 has_smi[0x1]; u8 has_raw[0x1]; u8 grh_required[0x1]; - u8 reserved_at_104[0xc]; + u8 reserved_at_104[0x4]; + u8 num_port_plane[0x8]; u8 port_physical_state[0x4]; u8 vport_state_policy[0x4]; u8 port_state[0x4]; @@ -7692,7 +7694,7 @@ struct mlx5_ifc_mad_ifc_in_bits { u8 op_mod[0x10]; u8 remote_lid[0x10]; - u8 reserved_at_50[0x8]; + u8 plane_index[0x8]; u8 port[0x8]; u8 reserved_at_60[0x20]; @@ -9621,7 +9623,9 @@ struct mlx5_ifc_ptys_reg_bits { u8 an_disable_cap[0x1]; u8 reserved_at_3[0x5]; u8 local_port[0x8]; - u8 reserved_at_10[0xd]; + u8 reserved_at_10[0x8]; + u8 plane_ind[0x4]; + u8 reserved_at_1c[0x1]; u8 proto_mask[0x3]; u8 an_status[0x4]; -- cgit v1.2.3 From 2a5db20fa532198639671713c6213f96ff285b85 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Sun, 16 Jun 2024 19:08:35 +0300 Subject: RDMA/mlx5: Add support to multi-plane device and port When multi-plane is supported, a logical port, which is aggregation of multiple physical plane ports, is exposed for data transmission. Compared with a normal mlx5 IB port, this logical port supports all functionalities except Subnet Management. Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/7e37c06c9cb243be9ac79930cd17053903785b95.1718553901.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 0d31f77396fc..a96438ded15f 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -917,6 +917,7 @@ struct mlx5_hca_vport_context { u16 qkey_violation_counter; u16 pkey_violation_counter; bool grh_required; + u8 num_plane; }; #define STRUCT_FIELD(header, field) \ -- cgit v1.2.3 From 5d7e328e20b3d2bd3e1e8bea7a868ab8892aeed1 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 1 Jul 2024 11:44:42 +0100 Subject: ASoC: cs35l56: Revert support for dual-ownership of ASP registers This patch reverts a series of commits that allowed for the ASP registers to be owned by either the driver or the firmware. Nothing currently depends on the functionality that is being reverted, so it is safe to remove. The commits being reverted are (last 3 are bugfixes to the first 2): commit 72a77d7631c6 ("ASoC: cs35l56: Fix to ensure ASP1 registers match cache") commit 07f7d6e7a124 ("ASoC: cs35l56: Fix for initializing ASP1 mixer registers") commit 4703b014f28b ("ASoC: cs35l56: fix reversed if statement in cs35l56_dspwait_asp1tx_put()") commit c14f09f010cc ("ASoC: cs35l56: Fix deadlock in ASP1 mixer register initialization") commit dfd2ffb37399 ("ASoC: cs35l56: Prevent overwriting firmware ASP config") These reverts have been squashed into a single commit because there would be no reason to revert only some of them (which would just reintroduce bugs). The changes introduced by the commits were well-intentioned but somewhat misguided. ACPI does not provide any information about how audio hardware is linked together, so that information has to be hardcoded into drivers. On Windows the firmware is customized to statically setup appropriate configuration of the audio links, and the intent of the commits was to re-use this information if the Linux host drivers aren't taking control of the ASP. This would avoid having to hardcode the ASP config into the machine driver on some systems. However, this added complexity and race conditions into the driver. It also complicates implementation of new code. The only case where the ASP is used but the host is not taking ownership is when CS35L56 is used in SoundWire mode with the ASP as a reference audio interconnect. But even in that case it's not necessarily required even if the firmware initialized it. Typically it is used to avoid the host SDCA drivers having to be capable of aggregating capture paths from multiple SoundWire peripherals. But the SOF SoundWire support is capable of doing that aggregation. Reverting all these commits significantly simplifies the driver. Let's just use the normal Linux mechanisms of the machine driver and ALSA controls to set things up instead of trying to use the firmware to do use-case setup. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20240701104444.172556-2-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index 1a3c6f66f620..2e19722989f7 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -267,13 +267,18 @@ struct cs35l56_base { bool fw_patched; bool secured; bool can_hibernate; - bool fw_owns_asp1; bool cal_data_valid; s8 cal_index; struct cirrus_amp_cal_data cal_data; struct gpio_desc *reset_gpio; }; +/* Temporary to avoid a build break with the HDA driver */ +static inline int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_base *cs35l56_base) +{ + return 0; +} + extern struct regmap_config cs35l56_regmap_i2c; extern struct regmap_config cs35l56_regmap_spi; extern struct regmap_config cs35l56_regmap_sdw; @@ -284,8 +289,6 @@ extern const char * const cs35l56_tx_input_texts[CS35L56_NUM_INPUT_SRC]; extern const unsigned int cs35l56_tx_input_values[CS35L56_NUM_INPUT_SRC]; int cs35l56_set_patch(struct cs35l56_base *cs35l56_base); -int cs35l56_init_asp1_regs_for_driver_control(struct cs35l56_base *cs35l56_base); -int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_base *cs35l56_base); int cs35l56_mbox_send(struct cs35l56_base *cs35l56_base, unsigned int command); int cs35l56_firmware_shutdown(struct cs35l56_base *cs35l56_base); int cs35l56_wait_for_firmware_boot(struct cs35l56_base *cs35l56_base); -- cgit v1.2.3 From e2996141d6db0d8b353e1c221a37c8e1be109d4a Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 1 Jul 2024 11:44:43 +0100 Subject: ASoC: cs35l56: Remove support for A1 silicon No product was ever released with A1 silicon so there is no need for the driver to include support for it. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20240701104444.172556-3-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index 2e19722989f7..642ef690ebc2 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -80,9 +80,7 @@ #define CS35L56_DSP1_AHBM_WINDOW_DEBUG_1 0x25E2044 #define CS35L56_DSP1_XMEM_UNPACKED24_0 0x2800000 #define CS35L56_DSP1_FW_VER 0x2800010 -#define CS35L56_DSP1_HALO_STATE_A1 0x2801E58 #define CS35L56_DSP1_HALO_STATE 0x28021E0 -#define CS35L56_DSP1_PM_CUR_STATE_A1 0x2804000 #define CS35L56_DSP1_PM_CUR_STATE 0x2804308 #define CS35L56_DSP1_XMEM_UNPACKED24_8191 0x2807FFC #define CS35L56_DSP1_CORE_BASE 0x2B80000 -- cgit v1.2.3 From bca51197620a257e2954be99b16f05115c3b2630 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Sun, 16 Jun 2024 19:08:36 +0300 Subject: RDMA/core: Support IB sub device with type "SMI" This patch adds 2 APIs, as well as driver operations to support adding and deleting an IB sub device, which provides part of functionalities of it's parent. A sub device has a type; for a sub device with type "SMI", it provides the smi capability through umad for its parent, meaning uverb is not supported. A sub device cannot live without a parent. So when a parent is released, all it's sub devices are released as well. Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/44253f7508b21eb2caefea3980c2bc072869116c.1718553901.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 43 ++++++++++++++++++++++++++++++++++++++++ include/uapi/rdma/rdma_netlink.h | 5 +++++ 2 files changed, 48 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 825043512b2d..d4128958908f 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2663,6 +2663,18 @@ struct ib_device_ops { */ int (*get_numa_node)(struct ib_device *dev); + /** + * add_sub_dev - Add a sub IB device + */ + struct ib_device *(*add_sub_dev)(struct ib_device *parent, + enum rdma_nl_dev_type type, + const char *name); + + /** + * del_sub_dev - Delete a sub IB device + */ + void (*del_sub_dev)(struct ib_device *sub_dev); + DECLARE_RDMA_OBJ_SIZE(ib_ah); DECLARE_RDMA_OBJ_SIZE(ib_counters); DECLARE_RDMA_OBJ_SIZE(ib_cq); @@ -2773,6 +2785,15 @@ struct ib_device { char iw_ifname[IFNAMSIZ]; u32 iw_driver_flags; u32 lag_flags; + + /* A parent device has a list of sub-devices */ + struct mutex subdev_lock; + struct list_head subdev_list_head; + + /* A sub device has a type and a parent */ + enum rdma_nl_dev_type type; + struct ib_device *parent; + struct list_head subdev_list; }; static inline void *rdma_zalloc_obj(struct ib_device *dev, size_t size, @@ -4822,4 +4843,26 @@ static inline u16 rdma_get_udp_sport(u32 fl, u32 lqpn, u32 rqpn) const struct ib_port_immutable* ib_port_immutable_read(struct ib_device *dev, unsigned int port); + +/** ib_add_sub_device - Add a sub IB device on an existing one + * + * @parent: The IB device that needs to add a sub device + * @type: The type of the new sub device + * @name: The name of the new sub device + * + * + * Return 0 on success, an error code otherwise + */ +int ib_add_sub_device(struct ib_device *parent, + enum rdma_nl_dev_type type, + const char *name); + + +/** ib_del_sub_device_and_put - Delect an IB sub device while holding a 'get' + * + * @sub: The sub device that is going to be deleted + * + * Return 0 on success, an error code otherwise + */ +int ib_del_sub_device_and_put(struct ib_device *sub); #endif /* IB_VERBS_H */ diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index a214fc259f28..d15ee16be722 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -602,4 +602,9 @@ enum rdma_nl_counter_mask { RDMA_COUNTER_MASK_QP_TYPE = 1, RDMA_COUNTER_MASK_PID = 1 << 1, }; + +/* Supported rdma device types. */ +enum rdma_nl_dev_type { + RDMA_DEVICE_TYPE_SMI = 1, +}; #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From 36e97bbc2dca61751c5b4b7f248cd850a7654a19 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Sun, 16 Jun 2024 19:08:37 +0300 Subject: RDMA: Set type of rdma_ah to IB for a SMI sub device An address handle created on a SMI port has type IB, as a SMI port it's used for SMI management through umad. Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/195be77aae0cce93522269f22f1303d2ccbef605.1718553901.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index d4128958908f..e09d4f09b602 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4662,6 +4662,8 @@ static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev, return RDMA_AH_ATTR_TYPE_OPA; return RDMA_AH_ATTR_TYPE_IB; } + if (dev->type == RDMA_DEVICE_TYPE_SMI) + return RDMA_AH_ATTR_TYPE_IB; return RDMA_AH_ATTR_TYPE_UNDEFINED; } -- cgit v1.2.3 From 060c642b2ab8b40b39f9db99c1d14c7d19ba507f Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Sun, 16 Jun 2024 19:08:40 +0300 Subject: RDMA/nldev: Add support to add/delete a sub IB device through netlink Add new netlink commands and attributes to support adding and deleting a sub IB device with admin privilege. Examples: $ rdma dev add smi1 type SMI parent ibp8s0f1 $ rdma dev del smi1 Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/77cbf1b36359642be8a8d8c5c2f4e585b544282f.1718553901.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/uapi/rdma/rdma_netlink.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index d15ee16be722..bd52fb325e22 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -301,6 +301,10 @@ enum rdma_nldev_command { RDMA_NLDEV_CMD_RES_SRQ_GET_RAW, + RDMA_NLDEV_CMD_NEWDEV, + + RDMA_NLDEV_CMD_DELDEV, + RDMA_NLDEV_NUM_OPS }; @@ -564,6 +568,8 @@ enum rdma_nldev_attr { */ RDMA_NLDEV_ATTR_RES_SUBTYPE, /* string */ + RDMA_NLDEV_ATTR_DEV_TYPE, /* u8 */ + /* * Always the end */ -- cgit v1.2.3 From 294424839b5ec2ecd17f4c8409796846b2b8dd31 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Sun, 16 Jun 2024 19:08:41 +0300 Subject: RDMA/nldev: Add support to dump device type and parent device if exists If a device has a specific type or a parent device, dump them as well. Example: $ rdma dev show smi1 3: smi1: node_type ca fw 20.38.1002 node_guid 9803:9b03:009f:d5ef sys_image_guid 9803:9b03:009f:d5ee type smi parent ibp8s0f1 Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/4c022e3e34b5de1254a3b367d502a362cdd0c53a.1718553901.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/uapi/rdma/rdma_netlink.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index bd52fb325e22..4b69242d7848 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -570,6 +570,8 @@ enum rdma_nldev_attr { RDMA_NLDEV_ATTR_DEV_TYPE, /* u8 */ + RDMA_NLDEV_ATTR_PARENT_NAME, /* string */ + /* * Always the end */ -- cgit v1.2.3 From 3b43399b297c52cfe4dfe0194b6ad89d52bf8c35 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Sun, 16 Jun 2024 19:08:42 +0300 Subject: RDMA/mlx5: Add plane index support when querying PTYS registers Support the new "plane_ind" field when querying port PTYS registers. This is needed when querying the rate of a plane port. Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/1f703c36306aa46917fcd88eadbb23b3e380d526.1718553901.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/port.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 26092c78a985..e68d42b8ce65 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -155,10 +155,11 @@ struct mlx5_port_eth_proto { int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, - int ptys_size, int proto_mask, u8 local_port); + int ptys_size, int proto_mask, + u8 local_port, u8 plane_index); int mlx5_query_ib_port_oper(struct mlx5_core_dev *dev, u16 *link_width_oper, - u16 *proto_oper, u8 local_port); + u16 *proto_oper, u8 local_port, u8 plane_index); void mlx5_toggle_port_link(struct mlx5_core_dev *dev); int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status status); -- cgit v1.2.3 From c6b6677d85d47f5562020aa082d9b5f90738fdcd Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Sun, 16 Jun 2024 19:08:43 +0300 Subject: net/mlx5: mlx5_ifc update for accessing ppcnt register of plane ports This patch adds new fields to support multi-plane and the extend port counters group. Actual support will be added in the next patch. Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/70221cdd79aad0e21cbf385d9567e3ebffbc5137.1718553901.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 47 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 61738990e399..5fea7b747607 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2651,6 +2651,46 @@ struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits { u8 port_xmit_wait[0x20]; }; +struct mlx5_ifc_ib_ext_port_cntrs_grp_data_layout_bits { + u8 reserved_at_0[0x300]; + + u8 port_xmit_data_high[0x20]; + + u8 port_xmit_data_low[0x20]; + + u8 port_rcv_data_high[0x20]; + + u8 port_rcv_data_low[0x20]; + + u8 port_xmit_pkts_high[0x20]; + + u8 port_xmit_pkts_low[0x20]; + + u8 port_rcv_pkts_high[0x20]; + + u8 port_rcv_pkts_low[0x20]; + + u8 reserved_at_400[0x80]; + + u8 port_unicast_xmit_pkts_high[0x20]; + + u8 port_unicast_xmit_pkts_low[0x20]; + + u8 port_multicast_xmit_pkts_high[0x20]; + + u8 port_multicast_xmit_pkts_low[0x20]; + + u8 port_unicast_rcv_pkts_high[0x20]; + + u8 port_unicast_rcv_pkts_low[0x20]; + + u8 port_multicast_rcv_pkts_high[0x20]; + + u8 port_multicast_rcv_pkts_low[0x20]; + + u8 reserved_at_580[0x240]; +}; + struct mlx5_ifc_eth_per_tc_prio_grp_data_layout_bits { u8 transmit_queue_high[0x20]; @@ -4543,6 +4583,7 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits { struct mlx5_ifc_eth_per_tc_prio_grp_data_layout_bits eth_per_tc_prio_grp_data_layout; struct mlx5_ifc_eth_per_tc_congest_prio_grp_data_layout_bits eth_per_tc_congest_prio_grp_data_layout; struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits ib_port_cntrs_grp_data_layout; + struct mlx5_ifc_ib_ext_port_cntrs_grp_data_layout_bits ib_ext_port_cntrs_grp_data_layout; struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs; struct mlx5_ifc_phys_layer_statistical_cntrs_bits phys_layer_statistical_cntrs; u8 reserved_at_0[0x7c0]; @@ -9851,8 +9892,10 @@ struct mlx5_ifc_ppcnt_reg_bits { u8 grp[0x6]; u8 clr[0x1]; - u8 reserved_at_21[0x1c]; - u8 prio_tc[0x3]; + u8 reserved_at_21[0x13]; + u8 plane_ind[0x4]; + u8 reserved_at_38[0x3]; + u8 prio_tc[0x5]; union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set; }; -- cgit v1.2.3 From 7a2210a57d423cbdb9c5f2e8b12c65d7472ff147 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Sun, 16 Jun 2024 19:08:44 +0300 Subject: RDMA/mlx5: Support per-plane port IB counters by querying PPCNT register Supports per-plane port counters by querying PPCNT register with the "extended port counters" group, as the query_vport_counter command doesn't support plane ports. Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/06ffb582d67159b7def4654c8272d3d6e8bd2f2f.1718553901.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/linux/mlx5/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index d7bb31d9a446..68bd1b4737ea 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1466,6 +1466,7 @@ enum { MLX5_PER_TRAFFIC_CLASS_CONGESTION_GROUP = 0x13, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP = 0x16, MLX5_INFINIBAND_PORT_COUNTERS_GROUP = 0x20, + MLX5_INFINIBAND_EXTENDED_PORT_COUNTERS_GROUP = 0x21, }; enum { -- cgit v1.2.3 From 762ced1630a97a457ad2fd5f5a36849009808431 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Mon, 1 Jul 2024 14:39:50 +0200 Subject: HID: bpf: fix gcc warning and unify __u64 into u64 I've got multiple reports of: error: cast from pointer to integer of different size [-Werror=pointer-to-int-cast]. Let's use the same trick than kernel/bpf/helpers.c to shut up that warning. Even if we were on an architecture with addresses on more than 64 bits, this isn't much of an issue as the address is not used as a pointer, but as an hash and the caller is not supposed to go back to the kernel address ever. And while we change those, make sure we use u64 instead of __u64 for consistency Reported-by: Stephen Rothwell Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202406280633.OPB5uIFj-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202406282304.UydSVncq-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202406282242.Fk738zzy-lkp@intel.com/ Reported-by: Mirsad Todorovac Fixes: 67eccf151d76 ("HID: add source argument to HID low level functions") Link: https://patch.msgid.link/20240701-fix-cki-v2-2-20564e2e1393@kernel.org Signed-off-by: Benjamin Tissoires --- include/linux/hid_bpf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index 93546ee7677a..3f6584014311 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -68,9 +68,9 @@ struct hid_ops { unsigned char reportnum, __u8 *buf, size_t len, enum hid_report_type rtype, enum hid_class_request reqtype, - __u64 source, bool from_bpf); + u64 source, bool from_bpf); int (*hid_hw_output_report)(struct hid_device *hdev, __u8 *buf, size_t len, - __u64 source, bool from_bpf); + u64 source, bool from_bpf); int (*hid_input_report)(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 size, int interrupt, u64 source, bool from_bpf, bool lock_already_taken); @@ -115,7 +115,7 @@ struct hid_bpf_ops { * Context: Interrupt context. */ int (*hid_device_event)(struct hid_bpf_ctx *ctx, enum hid_report_type report_type, - __u64 source); + u64 source); /** * @hid_rdesc_fixup: called when the probe function parses the report descriptor -- cgit v1.2.3 From 260ffc9676b635c2ededc39285bfa41f83536ee1 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Mon, 1 Jul 2024 14:39:51 +0200 Subject: HID: bpf: doc fixes for hid_hw_request() hooks We had the following errors while doing make htmldocs: Documentation/hid/hid-bpf:185: include/linux/hid_bpf.h:144: ERROR: Unexpected indentation. Documentation/hid/hid-bpf:185: include/linux/hid_bpf.h:145: WARNING: Block quote ends without a blank line; unexpected unindent. Documentation/hid/hid-bpf:185: include/linux/hid_bpf.h:147: ERROR: Unexpected indentation. Reported-by: Stephen Rothwell Fixes: 8bd0488b5ea5 ("HID: bpf: add HID-BPF hooks for hid_hw_raw_requests") Link: https://patch.msgid.link/20240701-fix-cki-v2-3-20564e2e1393@kernel.org Signed-off-by: Benjamin Tissoires --- include/linux/hid_bpf.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index 3f6584014311..c30c31b79419 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -139,12 +139,15 @@ struct hid_bpf_ops { * * ``ctx``: The HID-BPF context as &struct hid_bpf_ctx * ``reportnum``: the report number, as in hid_hw_raw_request() + * * ``rtype``: the report type (``HID_INPUT_REPORT``, ``HID_FEATURE_REPORT``, - * ``HID_OUTPUT_REPORT``) + * ``HID_OUTPUT_REPORT``) + * * ``reqtype``: the request + * * ``source``: a u64 referring to a uniq but identifiable source. If %0, the - * kernel itself emitted that call. For hidraw, ``source`` is set - * to the associated ``struct file *``. + * kernel itself emitted that call. For hidraw, ``source`` is set + * to the associated ``struct file *``. * * Return: %0 to keep processing the request by hid-core; any other value * stops hid-core from processing that event. A positive value should be @@ -153,7 +156,7 @@ struct hid_bpf_ops { */ int (*hid_hw_request)(struct hid_bpf_ctx *ctx, unsigned char reportnum, enum hid_report_type rtype, enum hid_class_request reqtype, - __u64 source); + u64 source); /** * @hid_hw_output_report: called whenever a hid_hw_output_report() call is emitted -- cgit v1.2.3 From c79de517a226b86419a5baa867e65e3f8118829f Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Mon, 1 Jul 2024 14:39:52 +0200 Subject: HID: bpf: doc fixes for hid_hw_request() hooks We had the following errors while doing make htmldocs: Documentation/hid/hid-bpf:185: include/linux/hid_bpf.h:167: ERROR: Unexpected indentation. Also ensure consistency with the rest of the __u64 vs u64. Reported-by: Stephen Rothwell Fixes: 9286675a2aed ("HID: bpf: add HID-BPF hooks for hid_hw_output_report") Link: https://patch.msgid.link/20240701-fix-cki-v2-4-20564e2e1393@kernel.org Signed-off-by: Benjamin Tissoires --- include/linux/hid_bpf.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index c30c31b79419..9ca96fc90449 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -138,6 +138,7 @@ struct hid_bpf_ops { * It has the following arguments: * * ``ctx``: The HID-BPF context as &struct hid_bpf_ctx + * * ``reportnum``: the report number, as in hid_hw_raw_request() * * ``rtype``: the report type (``HID_INPUT_REPORT``, ``HID_FEATURE_REPORT``, @@ -165,16 +166,17 @@ struct hid_bpf_ops { * It has the following arguments: * * ``ctx``: The HID-BPF context as &struct hid_bpf_ctx + * * ``source``: a u64 referring to a uniq but identifiable source. If %0, the - * kernel itself emitted that call. For hidraw, ``source`` is set - * to the associated ``struct file *``. + * kernel itself emitted that call. For hidraw, ``source`` is set + * to the associated ``struct file *``. * * Return: %0 to keep processing the request by hid-core; any other value * stops hid-core from processing that event. A positive value should be * returned with the number of bytes written to the device; a negative error * code interrupts the processing of this call. */ - int (*hid_hw_output_report)(struct hid_bpf_ctx *ctx, __u64 source); + int (*hid_hw_output_report)(struct hid_bpf_ctx *ctx, u64 source); /* private: do not show up in the docs */ @@ -203,9 +205,9 @@ int dispatch_hid_bpf_raw_requests(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, u32 size, enum hid_report_type rtype, enum hid_class_request reqtype, - __u64 source, bool from_bpf); + u64 source, bool from_bpf); int dispatch_hid_bpf_output_report(struct hid_device *hdev, __u8 *buf, u32 size, - __u64 source, bool from_bpf); + u64 source, bool from_bpf); int hid_bpf_connect_device(struct hid_device *hdev); void hid_bpf_disconnect_device(struct hid_device *hdev); void hid_bpf_destroy_device(struct hid_device *hid); @@ -221,7 +223,7 @@ static inline int dispatch_hid_bpf_raw_requests(struct hid_device *hdev, enum hid_class_request reqtype, u64 source, bool from_bpf) { return 0; } static inline int dispatch_hid_bpf_output_report(struct hid_device *hdev, __u8 *buf, u32 size, - __u64 source, bool from_bpf) { return 0; } + u64 source, bool from_bpf) { return 0; } static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; } static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {} static inline void hid_bpf_destroy_device(struct hid_device *hid) {} -- cgit v1.2.3 From 63e2f40c9e3187641afacde4153f54b3ee4dbc8c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 29 Jun 2024 21:48:41 +0200 Subject: syscalls: fix sys_fanotify_mark prototype My earlier fix missed an incorrect function prototype that shows up on native 32-bit builds: In file included from fs/notify/fanotify/fanotify_user.c:14: include/linux/syscalls.h:248:25: error: conflicting types for 'sys_fanotify_mark'; have 'long int(int, unsigned int, u32, u32, int, const char *)' {aka 'long int(int, unsigned int, unsigned int, unsigned int, int, const char *)'} 1924 | SYSCALL32_DEFINE6(fanotify_mark, | ^~~~~~~~~~~~~~~~~ include/linux/syscalls.h:862:17: note: previous declaration of 'sys_fanotify_mark' with type 'long int(int, unsigned int, u64, int, const char *)' {aka 'long int(int, unsigned int, long long unsigned int, int, const char *)'} On x86 and powerpc, the prototype is also wrong but hidden in an #ifdef, so it never caused problems. Add another alternative declaration that matches the conditional function definition. Fixes: 403f17a33073 ("parisc: use generic sys_fanotify_mark implementation") Cc: stable@vger.kernel.org Reported-by: Guenter Roeck Reported-by: Geert Uytterhoeven Reported-by: kernel test robot Signed-off-by: Arnd Bergmann --- include/linux/syscalls.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 63424af87bba..fff820c3e93e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -859,9 +859,15 @@ asmlinkage long sys_prlimit64(pid_t pid, unsigned int resource, const struct rlimit64 __user *new_rlim, struct rlimit64 __user *old_rlim); asmlinkage long sys_fanotify_init(unsigned int flags, unsigned int event_f_flags); +#if defined(CONFIG_ARCH_SPLIT_ARG64) +asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, + unsigned int mask_1, unsigned int mask_2, + int dfd, const char __user * pathname); +#else asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, u64 mask, int fd, const char __user *pathname); +#endif asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name, struct file_handle __user *handle, int __user *mnt_id, int flag); -- cgit v1.2.3 From 2681bbaa39cc2b5711494cc7e0166538f24e9c16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sun, 30 Jun 2024 22:54:08 +0200 Subject: ACPI: battery: add devm_battery_hook_register() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a utility function for device-managed registration of battery hooks. The function makes it easier to manage the lifecycle of a hook. Acked-by: Rafael J. Wysocki Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20240630-cros_ec-charge-control-v5-1-8f649d018c52@weissschuh.net Signed-off-by: Tzung-Bi Shih --- include/acpi/battery.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/acpi/battery.h b/include/acpi/battery.h index 611a2561a014..c93f16dfb944 100644 --- a/include/acpi/battery.h +++ b/include/acpi/battery.h @@ -2,6 +2,7 @@ #ifndef __ACPI_BATTERY_H #define __ACPI_BATTERY_H +#include #include #define ACPI_BATTERY_CLASS "battery" @@ -19,5 +20,6 @@ struct acpi_battery_hook { void battery_hook_register(struct acpi_battery_hook *hook); void battery_hook_unregister(struct acpi_battery_hook *hook); +int devm_battery_hook_register(struct device *dev, struct acpi_battery_hook *hook); #endif -- cgit v1.2.3 From c05cb5bdf413a2a5051701a397082380758e37ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sun, 30 Jun 2024 22:54:09 +0200 Subject: platform/chrome: Update binary interface for EC-based charge control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The charge-control command v2/v3 is more featureful than v1, it additionally supports charge thresholds. The definitions were imported from ChromeOS EC commit 32870d602317 ("squirtle: modify motionsense rotation matrix") Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20240630-cros_ec-charge-control-v5-2-8f649d018c52@weissschuh.net Signed-off-by: Tzung-Bi Shih --- include/linux/platform_data/cros_ec_commands.h | 49 ++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index ec598057d1da..e574b790be6f 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -3843,16 +3843,61 @@ struct ec_params_i2c_write { * discharge the battery. */ #define EC_CMD_CHARGE_CONTROL 0x0096 -#define EC_VER_CHARGE_CONTROL 1 +#define EC_VER_CHARGE_CONTROL 3 enum ec_charge_control_mode { CHARGE_CONTROL_NORMAL = 0, CHARGE_CONTROL_IDLE, CHARGE_CONTROL_DISCHARGE, + /* Add no more entry below. */ + CHARGE_CONTROL_COUNT, +}; + +#define EC_CHARGE_MODE_TEXT \ + { \ + [CHARGE_CONTROL_NORMAL] = "NORMAL", \ + [CHARGE_CONTROL_IDLE] = "IDLE", \ + [CHARGE_CONTROL_DISCHARGE] = "DISCHARGE", \ + } + +enum ec_charge_control_cmd { + EC_CHARGE_CONTROL_CMD_SET = 0, + EC_CHARGE_CONTROL_CMD_GET, +}; + +enum ec_charge_control_flag { + EC_CHARGE_CONTROL_FLAG_NO_IDLE = BIT(0), }; struct ec_params_charge_control { - uint32_t mode; /* enum charge_control_mode */ + uint32_t mode; /* enum charge_control_mode */ + + /* Below are the fields added in V2. */ + uint8_t cmd; /* enum ec_charge_control_cmd. */ + uint8_t flags; /* enum ec_charge_control_flag (v3+) */ + /* + * Lower and upper thresholds for battery sustainer. This struct isn't + * named to avoid tainting foreign projects' name spaces. + * + * If charge mode is explicitly set (e.g. DISCHARGE), battery sustainer + * will be disabled. To disable battery sustainer, set mode=NORMAL, + * lower=-1, upper=-1. + */ + struct { + int8_t lower; /* Display SoC in percentage. */ + int8_t upper; /* Display SoC in percentage. */ + } sustain_soc; +} __ec_align4; + +/* Added in v2 */ +struct ec_response_charge_control { + uint32_t mode; /* enum charge_control_mode */ + struct { /* Battery sustainer thresholds */ + int8_t lower; + int8_t upper; + } sustain_soc; + uint8_t flags; /* enum ec_charge_control_flag (v3+) */ + uint8_t reserved; } __ec_align4; /*****************************************************************************/ -- cgit v1.2.3 From 69a13742b7c64ed89894caf7091539e164b3e97c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sun, 30 Jun 2024 22:54:10 +0200 Subject: platform/chrome: cros_ec_proto: Introduce cros_ec_get_cmd_versions() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Retrieving the supported versions of a command is a fairly common operation. Provide a helper for it. If the command is not supported at all the EC returns -EINVAL/EC_RES_INVALID_PARAMS. This error is translated into an empty version mask as that is easier to handle for callers and they don't need to know about the error details. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20240630-cros_ec-charge-control-v5-3-8f649d018c52@weissschuh.net Signed-off-by: Tzung-Bi Shih --- include/linux/platform_data/cros_ec_proto.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 6e9225bdf903..b34ed0cc1f8d 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -263,6 +263,8 @@ int cros_ec_cmd(struct cros_ec_device *ec_dev, unsigned int version, int command int cros_ec_cmd_readmem(struct cros_ec_device *ec_dev, u8 offset, u8 size, void *dest); +int cros_ec_get_cmd_versions(struct cros_ec_device *ec_dev, u16 cmd); + /** * cros_ec_get_time_ns() - Return time in ns. * -- cgit v1.2.3 From 992f1a3d4e88498de04b0b13b94705d8540f3d81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Mon, 1 Jul 2024 13:30:04 +0200 Subject: platform: cznic: Add preliminary support for Turris Omnia MCU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the basic skeleton for a new platform driver for the microcontroller found on the Turris Omnia board. Signed-off-by: Marek Behún Reviewed-by: Andy Shevchenko Acked-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20240701113010.16447-3-kabel@kernel.org Signed-off-by: Arnd Bergmann --- include/linux/turris-omnia-mcu-interface.h | 249 +++++++++++++++++++++++++++++ 1 file changed, 249 insertions(+) create mode 100644 include/linux/turris-omnia-mcu-interface.h (limited to 'include') diff --git a/include/linux/turris-omnia-mcu-interface.h b/include/linux/turris-omnia-mcu-interface.h new file mode 100644 index 000000000000..2da8cbeb158a --- /dev/null +++ b/include/linux/turris-omnia-mcu-interface.h @@ -0,0 +1,249 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * CZ.NIC's Turris Omnia MCU I2C interface commands definitions + * + * 2024 by Marek Behún + */ + +#ifndef __TURRIS_OMNIA_MCU_INTERFACE_H +#define __TURRIS_OMNIA_MCU_INTERFACE_H + +#include +#include + +enum omnia_commands_e { + OMNIA_CMD_GET_STATUS_WORD = 0x01, /* slave sends status word back */ + OMNIA_CMD_GENERAL_CONTROL = 0x02, + OMNIA_CMD_LED_MODE = 0x03, /* default/user */ + OMNIA_CMD_LED_STATE = 0x04, /* LED on/off */ + OMNIA_CMD_LED_COLOR = 0x05, /* LED number + RED + GREEN + BLUE */ + OMNIA_CMD_USER_VOLTAGE = 0x06, + OMNIA_CMD_SET_BRIGHTNESS = 0x07, + OMNIA_CMD_GET_BRIGHTNESS = 0x08, + OMNIA_CMD_GET_RESET = 0x09, + OMNIA_CMD_GET_FW_VERSION_APP = 0x0A, /* 20B git hash number */ + OMNIA_CMD_SET_WATCHDOG_STATE = 0x0B, /* 0 - disable + * 1 - enable / ping + * after boot watchdog is started + * with 2 minutes timeout + */ + + /* OMNIA_CMD_WATCHDOG_STATUS = 0x0C, not implemented anymore */ + + OMNIA_CMD_GET_WATCHDOG_STATE = 0x0D, + OMNIA_CMD_GET_FW_VERSION_BOOT = 0x0E, /* 20B Git hash number */ + OMNIA_CMD_GET_FW_CHECKSUM = 0x0F, /* 4B length, 4B checksum */ + + /* available if FEATURES_SUPPORTED bit set in status word */ + OMNIA_CMD_GET_FEATURES = 0x10, + + /* available if EXT_CMD bit set in features */ + OMNIA_CMD_GET_EXT_STATUS_DWORD = 0x11, + OMNIA_CMD_EXT_CONTROL = 0x12, + OMNIA_CMD_GET_EXT_CONTROL_STATUS = 0x13, + + /* available if NEW_INT_API bit set in features */ + OMNIA_CMD_GET_INT_AND_CLEAR = 0x14, + OMNIA_CMD_GET_INT_MASK = 0x15, + OMNIA_CMD_SET_INT_MASK = 0x16, + + /* available if FLASHING bit set in features */ + OMNIA_CMD_FLASH = 0x19, + + /* available if WDT_PING bit set in features */ + OMNIA_CMD_SET_WDT_TIMEOUT = 0x20, + OMNIA_CMD_GET_WDT_TIMELEFT = 0x21, + + /* available if POWEROFF_WAKEUP bit set in features */ + OMNIA_CMD_SET_WAKEUP = 0x22, + OMNIA_CMD_GET_UPTIME_AND_WAKEUP = 0x23, + OMNIA_CMD_POWER_OFF = 0x24, + + /* available if USB_OVC_PROT_SETTING bit set in features */ + OMNIA_CMD_SET_USB_OVC_PROT = 0x25, + OMNIA_CMD_GET_USB_OVC_PROT = 0x26, + + /* available if TRNG bit set in features */ + OMNIA_CMD_TRNG_COLLECT_ENTROPY = 0x28, + + /* available if CRYPTO bit set in features */ + OMNIA_CMD_CRYPTO_GET_PUBLIC_KEY = 0x29, + OMNIA_CMD_CRYPTO_SIGN_MESSAGE = 0x2A, + OMNIA_CMD_CRYPTO_COLLECT_SIGNATURE = 0x2B, + + /* available if BOARD_INFO it set in features */ + OMNIA_CMD_BOARD_INFO_GET = 0x2C, + OMNIA_CMD_BOARD_INFO_BURN = 0x2D, + + /* available only at address 0x2b (LED-controller) */ + /* available only if LED_GAMMA_CORRECTION bit set in features */ + OMNIA_CMD_SET_GAMMA_CORRECTION = 0x30, + OMNIA_CMD_GET_GAMMA_CORRECTION = 0x31, + + /* available only at address 0x2b (LED-controller) */ + /* available only if PER_LED_CORRECTION bit set in features */ + /* available only if FROM_BIT_16_INVALID bit NOT set in features */ + OMNIA_CMD_SET_LED_CORRECTIONS = 0x32, + OMNIA_CMD_GET_LED_CORRECTIONS = 0x33, +}; + +enum omnia_flashing_commands_e { + OMNIA_FLASH_CMD_UNLOCK = 0x01, + OMNIA_FLASH_CMD_SIZE_AND_CSUM = 0x02, + OMNIA_FLASH_CMD_PROGRAM = 0x03, + OMNIA_FLASH_CMD_RESET = 0x04, +}; + +enum omnia_sts_word_e { + OMNIA_STS_MCU_TYPE_MASK = GENMASK(1, 0), + OMNIA_STS_MCU_TYPE_STM32 = FIELD_PREP_CONST(OMNIA_STS_MCU_TYPE_MASK, 0), + OMNIA_STS_MCU_TYPE_GD32 = FIELD_PREP_CONST(OMNIA_STS_MCU_TYPE_MASK, 1), + OMNIA_STS_MCU_TYPE_MKL = FIELD_PREP_CONST(OMNIA_STS_MCU_TYPE_MASK, 2), + OMNIA_STS_FEATURES_SUPPORTED = BIT(2), + OMNIA_STS_USER_REGULATOR_NOT_SUPPORTED = BIT(3), + OMNIA_STS_CARD_DET = BIT(4), + OMNIA_STS_MSATA_IND = BIT(5), + OMNIA_STS_USB30_OVC = BIT(6), + OMNIA_STS_USB31_OVC = BIT(7), + OMNIA_STS_USB30_PWRON = BIT(8), + OMNIA_STS_USB31_PWRON = BIT(9), + OMNIA_STS_ENABLE_4V5 = BIT(10), + OMNIA_STS_BUTTON_MODE = BIT(11), + OMNIA_STS_BUTTON_PRESSED = BIT(12), + OMNIA_STS_BUTTON_COUNTER_MASK = GENMASK(15, 13), +}; + +enum omnia_ctl_byte_e { + OMNIA_CTL_LIGHT_RST = BIT(0), + OMNIA_CTL_HARD_RST = BIT(1), + /* BIT(2) is currently reserved */ + OMNIA_CTL_USB30_PWRON = BIT(3), + OMNIA_CTL_USB31_PWRON = BIT(4), + OMNIA_CTL_ENABLE_4V5 = BIT(5), + OMNIA_CTL_BUTTON_MODE = BIT(6), + OMNIA_CTL_BOOTLOADER = BIT(7), +}; + +enum omnia_features_e { + OMNIA_FEAT_PERIPH_MCU = BIT(0), + OMNIA_FEAT_EXT_CMDS = BIT(1), + OMNIA_FEAT_WDT_PING = BIT(2), + OMNIA_FEAT_LED_STATE_EXT_MASK = GENMASK(4, 3), + OMNIA_FEAT_LED_STATE_EXT = FIELD_PREP_CONST(OMNIA_FEAT_LED_STATE_EXT_MASK, 1), + OMNIA_FEAT_LED_STATE_EXT_V32 = FIELD_PREP_CONST(OMNIA_FEAT_LED_STATE_EXT_MASK, 2), + OMNIA_FEAT_LED_GAMMA_CORRECTION = BIT(5), + OMNIA_FEAT_NEW_INT_API = BIT(6), + OMNIA_FEAT_BOOTLOADER = BIT(7), + OMNIA_FEAT_FLASHING = BIT(8), + OMNIA_FEAT_NEW_MESSAGE_API = BIT(9), + OMNIA_FEAT_BRIGHTNESS_INT = BIT(10), + OMNIA_FEAT_POWEROFF_WAKEUP = BIT(11), + OMNIA_FEAT_CAN_OLD_MESSAGE_API = BIT(12), + OMNIA_FEAT_TRNG = BIT(13), + OMNIA_FEAT_CRYPTO = BIT(14), + OMNIA_FEAT_BOARD_INFO = BIT(15), + + /* + * Orginally the features command replied only 16 bits. If more were + * read, either the I2C transaction failed or 0xff bytes were sent. + * Therefore to consider bits 16 - 31 valid, one bit (20) was reserved + * to be zero. + */ + + /* Bits 16 - 19 correspond to bits 0 - 3 of status word */ + OMNIA_FEAT_MCU_TYPE_MASK = GENMASK(17, 16), + OMNIA_FEAT_MCU_TYPE_STM32 = FIELD_PREP_CONST(OMNIA_FEAT_MCU_TYPE_MASK, 0), + OMNIA_FEAT_MCU_TYPE_GD32 = FIELD_PREP_CONST(OMNIA_FEAT_MCU_TYPE_MASK, 1), + OMNIA_FEAT_MCU_TYPE_MKL = FIELD_PREP_CONST(OMNIA_FEAT_MCU_TYPE_MASK, 2), + OMNIA_FEAT_FEATURES_SUPPORTED = BIT(18), + OMNIA_FEAT_USER_REGULATOR_NOT_SUPPORTED = BIT(19), + + /* must not be set */ + OMNIA_FEAT_FROM_BIT_16_INVALID = BIT(20), + + OMNIA_FEAT_PER_LED_CORRECTION = BIT(21), + OMNIA_FEAT_USB_OVC_PROT_SETTING = BIT(22), +}; + +enum omnia_ext_sts_dword_e { + OMNIA_EXT_STS_SFP_nDET = BIT(0), + OMNIA_EXT_STS_LED_STATES_MASK = GENMASK(31, 12), + OMNIA_EXT_STS_WLAN0_MSATA_LED = BIT(12), + OMNIA_EXT_STS_WLAN1_LED = BIT(13), + OMNIA_EXT_STS_WLAN2_LED = BIT(14), + OMNIA_EXT_STS_WPAN0_LED = BIT(15), + OMNIA_EXT_STS_WPAN1_LED = BIT(16), + OMNIA_EXT_STS_WPAN2_LED = BIT(17), + OMNIA_EXT_STS_WAN_LED0 = BIT(18), + OMNIA_EXT_STS_WAN_LED1 = BIT(19), + OMNIA_EXT_STS_LAN0_LED0 = BIT(20), + OMNIA_EXT_STS_LAN0_LED1 = BIT(21), + OMNIA_EXT_STS_LAN1_LED0 = BIT(22), + OMNIA_EXT_STS_LAN1_LED1 = BIT(23), + OMNIA_EXT_STS_LAN2_LED0 = BIT(24), + OMNIA_EXT_STS_LAN2_LED1 = BIT(25), + OMNIA_EXT_STS_LAN3_LED0 = BIT(26), + OMNIA_EXT_STS_LAN3_LED1 = BIT(27), + OMNIA_EXT_STS_LAN4_LED0 = BIT(28), + OMNIA_EXT_STS_LAN4_LED1 = BIT(29), + OMNIA_EXT_STS_LAN5_LED0 = BIT(30), + OMNIA_EXT_STS_LAN5_LED1 = BIT(31), +}; + +enum omnia_ext_ctl_e { + OMNIA_EXT_CTL_nRES_MMC = BIT(0), + OMNIA_EXT_CTL_nRES_LAN = BIT(1), + OMNIA_EXT_CTL_nRES_PHY = BIT(2), + OMNIA_EXT_CTL_nPERST0 = BIT(3), + OMNIA_EXT_CTL_nPERST1 = BIT(4), + OMNIA_EXT_CTL_nPERST2 = BIT(5), + OMNIA_EXT_CTL_PHY_SFP = BIT(6), + OMNIA_EXT_CTL_PHY_SFP_AUTO = BIT(7), + OMNIA_EXT_CTL_nVHV_CTRL = BIT(8), +}; + +enum omnia_int_e { + OMNIA_INT_CARD_DET = BIT(0), + OMNIA_INT_MSATA_IND = BIT(1), + OMNIA_INT_USB30_OVC = BIT(2), + OMNIA_INT_USB31_OVC = BIT(3), + OMNIA_INT_BUTTON_PRESSED = BIT(4), + OMNIA_INT_SFP_nDET = BIT(5), + OMNIA_INT_BRIGHTNESS_CHANGED = BIT(6), + OMNIA_INT_TRNG = BIT(7), + OMNIA_INT_MESSAGE_SIGNED = BIT(8), + + OMNIA_INT_LED_STATES_MASK = GENMASK(31, 12), + OMNIA_INT_WLAN0_MSATA_LED = BIT(12), + OMNIA_INT_WLAN1_LED = BIT(13), + OMNIA_INT_WLAN2_LED = BIT(14), + OMNIA_INT_WPAN0_LED = BIT(15), + OMNIA_INT_WPAN1_LED = BIT(16), + OMNIA_INT_WPAN2_LED = BIT(17), + OMNIA_INT_WAN_LED0 = BIT(18), + OMNIA_INT_WAN_LED1 = BIT(19), + OMNIA_INT_LAN0_LED0 = BIT(20), + OMNIA_INT_LAN0_LED1 = BIT(21), + OMNIA_INT_LAN1_LED0 = BIT(22), + OMNIA_INT_LAN1_LED1 = BIT(23), + OMNIA_INT_LAN2_LED0 = BIT(24), + OMNIA_INT_LAN2_LED1 = BIT(25), + OMNIA_INT_LAN3_LED0 = BIT(26), + OMNIA_INT_LAN3_LED1 = BIT(27), + OMNIA_INT_LAN4_LED0 = BIT(28), + OMNIA_INT_LAN4_LED1 = BIT(29), + OMNIA_INT_LAN5_LED0 = BIT(30), + OMNIA_INT_LAN5_LED1 = BIT(31), +}; + +enum omnia_cmd_poweroff_e { + OMNIA_CMD_POWER_OFF_POWERON_BUTTON = BIT(0), + OMNIA_CMD_POWER_OFF_MAGIC = 0xdead, +}; + +enum omnia_cmd_usb_ovc_prot_e { + OMNIA_CMD_xET_USB_OVC_PROT_PORT_MASK = GENMASK(3, 0), + OMNIA_CMD_xET_USB_OVC_PROT_ENABLE = BIT(4), +}; + +#endif /* __TURRIS_OMNIA_MCU_INTERFACE_H */ -- cgit v1.2.3 From 89cc8f1c5f22568142b7ad118c738204708e4207 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 30 Jun 2024 00:26:48 +0200 Subject: netfilter: nf_tables: Add flowtable map for xdp offload This adds a small internal mapping table so that a new bpf (xdp) kfunc can perform lookups in a flowtable. As-is, xdp program has access to the device pointer, but no way to do a lookup in a flowtable -- there is no way to obtain the needed struct without questionable stunts. This allows to obtain an nf_flowtable pointer given a net_device structure. In order to keep backward compatibility, the infrastructure allows the user to add a given device to multiple flowtables, but it will always return the first added mapping performing the lookup since it assumes the right configuration is 1:1 mapping between flowtables and net_devices. Co-developed-by: Lorenzo Bianconi Signed-off-by: Florian Westphal Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Acked-by: Pablo Neira Ayuso Link: https://lore.kernel.org/bpf/9f20e2c36f494b3bf177328718367f636bb0b2ab.1719698275.git.lorenzo@kernel.org --- include/net/netfilter/nf_flow_table.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 9abb7ee40d72..d845745207d2 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -305,6 +305,11 @@ struct flow_ports { __be16 source, dest; }; +struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev); +int nf_flow_offload_xdp_setup(struct nf_flowtable *flowtable, + struct net_device *dev, + enum flow_block_command cmd); + unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, -- cgit v1.2.3 From 391bb6594fd3a567efb1cd3efc8136c78c4c9e31 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 30 Jun 2024 00:26:49 +0200 Subject: netfilter: Add bpf_xdp_flow_lookup kfunc Introduce bpf_xdp_flow_lookup kfunc in order to perform the lookup of a given flowtable entry based on a fib tuple of incoming traffic. bpf_xdp_flow_lookup can be used as building block to offload in xdp the processing of sw flowtable when hw flowtable is not available. Signed-off-by: Lorenzo Bianconi Signed-off-by: Daniel Borkmann Acked-by: Kumar Kartikeya Dwivedi Acked-by: Pablo Neira Ayuso Link: https://lore.kernel.org/bpf/55d38a4e5856f6d1509d823ff4e98aaa6d356097.1719698275.git.lorenzo@kernel.org --- include/net/netfilter/nf_flow_table.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index d845745207d2..b63d53bb9dd6 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -315,6 +315,16 @@ unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); +#if (IS_BUILTIN(CONFIG_NF_FLOW_TABLE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ + (IS_MODULE(CONFIG_NF_FLOW_TABLE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) +extern int nf_flow_register_bpf(void); +#else +static inline int nf_flow_register_bpf(void) +{ + return 0; +} +#endif + #define MODULE_ALIAS_NF_FLOWTABLE(family) \ MODULE_ALIAS("nf-flowtable-" __stringify(family)) -- cgit v1.2.3 From face1c543e89ebc657f7948b0541a76954cf9382 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Thu, 20 Jun 2024 21:14:10 +0200 Subject: ACPI: bus: Indicate support for battery charge limiting thru _OSC The ACPI battery driver can handle the "charge limiting" state of the battery, so the platform can advertise this state. Indicate this by setting bit 19 ("Battery Charge Limiting Support") when evaluating _OSC. Tested on a Lenovo Ideapad S145-14IWL. Signed-off-by: Armin Wolf Link: https://patch.msgid.link/20240620191410.3646-2-W_Armin@gmx.de Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 28c3fb2bef0d..b87ef8f3caa0 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -576,6 +576,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); #define OSC_SB_CPC_FLEXIBLE_ADR_SPACE 0x00004000 #define OSC_SB_GENERIC_INITIATOR_SUPPORT 0x00020000 #define OSC_SB_NATIVE_USB4_SUPPORT 0x00040000 +#define OSC_SB_BATTERY_CHARGE_LIMITING_SUPPORT 0x00080000 #define OSC_SB_PRM_SUPPORT 0x00200000 #define OSC_SB_FFH_OPR_SUPPORT 0x00400000 -- cgit v1.2.3 From 4d8aa430624006ba06254aa607f8756a75e42c8d Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Tue, 4 Jun 2024 14:30:04 +0200 Subject: dt-bindings: iio: adc: Add MediaTek MT6359 PMIC AUXADC Add a new binding for the MT6350 Series (MT6357/8/9) PMIC AUXADC, providing various ADC channels for both internal temperatures and voltages, audio accessory detection (hp/mic/hp+mic and buttons, usually on a 3.5mm jack) other than some basic battery statistics on boards where the battery is managed by this PMIC. Also add the necessary dt-binding headers for devicetree consumers. Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Rob Herring Link: https://patch.msgid.link/20240604123008.327424-2-angelogioacchino.delregno@collabora.com Signed-off-by: Jonathan Cameron --- .../dt-bindings/iio/adc/mediatek,mt6357-auxadc.h | 21 +++++++++++++++++++++ .../dt-bindings/iio/adc/mediatek,mt6358-auxadc.h | 22 ++++++++++++++++++++++ .../dt-bindings/iio/adc/mediatek,mt6359-auxadc.h | 22 ++++++++++++++++++++++ 3 files changed, 65 insertions(+) create mode 100644 include/dt-bindings/iio/adc/mediatek,mt6357-auxadc.h create mode 100644 include/dt-bindings/iio/adc/mediatek,mt6358-auxadc.h create mode 100644 include/dt-bindings/iio/adc/mediatek,mt6359-auxadc.h (limited to 'include') diff --git a/include/dt-bindings/iio/adc/mediatek,mt6357-auxadc.h b/include/dt-bindings/iio/adc/mediatek,mt6357-auxadc.h new file mode 100644 index 000000000000..03ebb1d23953 --- /dev/null +++ b/include/dt-bindings/iio/adc/mediatek,mt6357-auxadc.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ + +#ifndef _DT_BINDINGS_MEDIATEK_MT6357_AUXADC_H +#define _DT_BINDINGS_MEDIATEK_MT6357_AUXADC_H + +/* ADC Channel Index */ +#define MT6357_AUXADC_BATADC 0 +#define MT6357_AUXADC_ISENSE 1 +#define MT6357_AUXADC_VCDT 2 +#define MT6357_AUXADC_BAT_TEMP 3 +#define MT6357_AUXADC_CHIP_TEMP 4 +#define MT6357_AUXADC_ACCDET 5 +#define MT6357_AUXADC_VDCXO 6 +#define MT6357_AUXADC_TSX_TEMP 7 +#define MT6357_AUXADC_HPOFS_CAL 8 +#define MT6357_AUXADC_DCXO_TEMP 9 +#define MT6357_AUXADC_VCORE_TEMP 10 +#define MT6357_AUXADC_VPROC_TEMP 11 +#define MT6357_AUXADC_VBAT 12 + +#endif diff --git a/include/dt-bindings/iio/adc/mediatek,mt6358-auxadc.h b/include/dt-bindings/iio/adc/mediatek,mt6358-auxadc.h new file mode 100644 index 000000000000..efa08398fafd --- /dev/null +++ b/include/dt-bindings/iio/adc/mediatek,mt6358-auxadc.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ + +#ifndef _DT_BINDINGS_MEDIATEK_MT6358_AUXADC_H +#define _DT_BINDINGS_MEDIATEK_MT6358_AUXADC_H + +/* ADC Channel Index */ +#define MT6358_AUXADC_BATADC 0 +#define MT6358_AUXADC_VCDT 1 +#define MT6358_AUXADC_BAT_TEMP 2 +#define MT6358_AUXADC_CHIP_TEMP 3 +#define MT6358_AUXADC_ACCDET 4 +#define MT6358_AUXADC_VDCXO 5 +#define MT6358_AUXADC_TSX_TEMP 6 +#define MT6358_AUXADC_HPOFS_CAL 7 +#define MT6358_AUXADC_DCXO_TEMP 8 +#define MT6358_AUXADC_VBIF 9 +#define MT6358_AUXADC_VCORE_TEMP 10 +#define MT6358_AUXADC_VPROC_TEMP 11 +#define MT6358_AUXADC_VGPU_TEMP 12 +#define MT6358_AUXADC_VBAT 13 + +#endif diff --git a/include/dt-bindings/iio/adc/mediatek,mt6359-auxadc.h b/include/dt-bindings/iio/adc/mediatek,mt6359-auxadc.h new file mode 100644 index 000000000000..59826393ee7e --- /dev/null +++ b/include/dt-bindings/iio/adc/mediatek,mt6359-auxadc.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ + +#ifndef _DT_BINDINGS_MEDIATEK_MT6359_AUXADC_H +#define _DT_BINDINGS_MEDIATEK_MT6359_AUXADC_H + +/* ADC Channel Index */ +#define MT6359_AUXADC_BATADC 0 +#define MT6359_AUXADC_BAT_TEMP 1 +#define MT6359_AUXADC_CHIP_TEMP 2 +#define MT6359_AUXADC_ACCDET 3 +#define MT6359_AUXADC_VDCXO 4 +#define MT6359_AUXADC_TSX_TEMP 5 +#define MT6359_AUXADC_HPOFS_CAL 6 +#define MT6359_AUXADC_DCXO_TEMP 7 +#define MT6359_AUXADC_VBIF 8 +#define MT6359_AUXADC_VCORE_TEMP 9 +#define MT6359_AUXADC_VPROC_TEMP 10 +#define MT6359_AUXADC_VGPU_TEMP 11 +#define MT6359_AUXADC_VBAT 12 +#define MT6359_AUXADC_IBAT 13 + +#endif -- cgit v1.2.3 From e38a82df2c9924577d39ce5ccee9e9edcadc3b5d Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Tue, 4 Jun 2024 14:30:06 +0200 Subject: math.h: Add unsigned 8 bits fractional numbers type Some users may be requiring only rather small numbers as both numerator and denominator: add signed and unsigned 8 bits structs {s8,u8}_fract. Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20240604123008.327424-4-angelogioacchino.delregno@collabora.com Signed-off-by: Jonathan Cameron --- include/linux/math.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/math.h b/include/linux/math.h index dd4152711de7..f5f18dc3616b 100644 --- a/include/linux/math.h +++ b/include/linux/math.h @@ -112,6 +112,8 @@ struct type##_fract { \ __##type numerator; \ __##type denominator; \ }; +__STRUCT_FRACT(s8) +__STRUCT_FRACT(u8) __STRUCT_FRACT(s16) __STRUCT_FRACT(u16) __STRUCT_FRACT(s32) -- cgit v1.2.3 From 8d9ffd15ff5c9da7bc6171f2536aaaff40bcab6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 1 Jun 2024 14:56:16 -0400 Subject: drm/amdgpu: remove AMD_FMT_MOD_GFX12_DCC_MAX_COMPRESSED_BLOCK_* definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit They were added accidentally. Signed-off-by: Marek Olšák Acked-by: Alex Deucher Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- include/uapi/drm/drm_fourcc.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index d0063ac6e09f..4168445fbb8b 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -1540,9 +1540,6 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) #define AMD_FMT_MOD_DCC_MAX_COMPRESSED_BLOCK_SHIFT 18 #define AMD_FMT_MOD_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 -#define AMD_FMT_MOD_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT 3 -#define AMD_FMT_MOD_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 /* 0:64B, 1:128B, 2:256B */ - /* * DCC supports embedding some clear colors directly in the DCC surface. * However, on older GPUs the rendering HW ignores the embedded clear color -- cgit v1.2.3 From 8dd1426e2c80e32ac1995007330c8f95ffa28ebb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 1 Jun 2024 19:53:01 -0400 Subject: drm/amdgpu: handle gfx12 in amdgpu_display_verify_sizes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It verified GFX9-11 swizzle modes on GFX12, which has undefined behavior. Signed-off-by: Marek Olšák Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/drm/drm_fourcc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 4168445fbb8b..2d84a8052b15 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -1506,6 +1506,8 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) * 6 - 64KB_3D * 7 - 256KB_3D */ +#define AMD_FMT_MOD_TILE_GFX12_256B_2D 1 +#define AMD_FMT_MOD_TILE_GFX12_4K_2D 2 #define AMD_FMT_MOD_TILE_GFX12_64K_2D 3 #define AMD_FMT_MOD_TILE_GFX12_256K_2D 4 -- cgit v1.2.3 From 9f111059e725f7ca79a136bfc734da3c8c1838b4 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 27 Jun 2024 19:26:24 -0500 Subject: fs_parse: add uid & gid option option parsing helpers Multiple filesystems take uid and gid as options, and the code to create the ID from an integer and validate it is standard boilerplate that can be moved into common helper functions, so do that for consistency and less cut&paste. This also helps avoid the buggy pattern noted by Seth Jenkins at https://lore.kernel.org/lkml/CALxfFW4BXhEwxR0Q5LSkg-8Vb4r2MONKCcUCVioehXQKr35eHg@mail.gmail.com/ because uid/gid parsing will fail before any assignment in most filesystems. Signed-off-by: Eric Sandeen Link: https://lore.kernel.org/r/de859d0a-feb9-473d-a5e2-c195a3d47abb@redhat.com Signed-off-by: Christian Brauner --- include/linux/fs_parser.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h index d3350979115f..6cf713a7e6c6 100644 --- a/include/linux/fs_parser.h +++ b/include/linux/fs_parser.h @@ -28,7 +28,7 @@ typedef int fs_param_type(struct p_log *, */ fs_param_type fs_param_is_bool, fs_param_is_u32, fs_param_is_s32, fs_param_is_u64, fs_param_is_enum, fs_param_is_string, fs_param_is_blob, fs_param_is_blockdev, - fs_param_is_path, fs_param_is_fd; + fs_param_is_path, fs_param_is_fd, fs_param_is_uid, fs_param_is_gid; /* * Specification of the type of value a parameter wants. @@ -57,6 +57,8 @@ struct fs_parse_result { int int_32; /* For spec_s32/spec_enum */ unsigned int uint_32; /* For spec_u32{,_octal,_hex}/spec_enum */ u64 uint_64; /* For spec_u64 */ + kuid_t uid; + kgid_t gid; }; }; @@ -131,6 +133,8 @@ static inline bool fs_validate_description(const char *name, #define fsparam_bdev(NAME, OPT) __fsparam(fs_param_is_blockdev, NAME, OPT, 0, NULL) #define fsparam_path(NAME, OPT) __fsparam(fs_param_is_path, NAME, OPT, 0, NULL) #define fsparam_fd(NAME, OPT) __fsparam(fs_param_is_fd, NAME, OPT, 0, NULL) +#define fsparam_uid(NAME, OPT) __fsparam(fs_param_is_uid, NAME, OPT, 0, NULL) +#define fsparam_gid(NAME, OPT) __fsparam(fs_param_is_gid, NAME, OPT, 0, NULL) /* String parameter that allows empty argument */ #define fsparam_string_empty(NAME, OPT) \ -- cgit v1.2.3 From f05c1ffc274516ef101d2e0f860bcb9b08c6c622 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Tue, 25 Jun 2024 19:25:46 +0200 Subject: ALSA: pcm: reinvent the stream synchronization ID API Until the commit e11f0f90a626 ("ALSA: pcm: remove SNDRV_PCM_IOCTL1_INFO internal command"), there was a possibility to pass information about the synchronized streams to the user space. The mentioned commit removed blindly the appropriate code with an irrelevant comment. The revert may be appropriate, but since this API was lost for several years without any complains, it's time to improve it. The hardware parameters may change the used stream clock source (e.g. USB hardware) so move this synchronization ID to hw_params as read-only field. It seems that pipewire can benefit from this API (disable adaptive resampling for perfectly synchronized PCM streams) now. Note that the contents of ID is not supposed to be used for direct comparison with a specific byte sequence. The "empty" case is when all bytes are zero (driver does not offer this information) and all other cases must be only used for equal comparison among PCM streams (including different sound cards) if they are using identical hardware clock. Cc: Takashi Sakamoto Signed-off-by: Jaroslav Kysela Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20240625172836.589380-2-perex@perex.cz --- include/sound/pcm.h | 3 ++- include/uapi/sound/asound.h | 9 +++++---- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 3edd7a7346da..dbce137d8806 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -93,6 +93,7 @@ struct snd_pcm_ops { #define SNDRV_PCM_IOCTL1_CHANNEL_INFO 2 /* 3 is absent slot. */ #define SNDRV_PCM_IOCTL1_FIFO_SIZE 4 +#define SNDRV_PCM_IOCTL1_SYNC_ID 5 #define SNDRV_PCM_TRIGGER_STOP 0 #define SNDRV_PCM_TRIGGER_START 1 @@ -401,7 +402,7 @@ struct snd_pcm_runtime { snd_pcm_uframes_t silence_start; /* starting pointer to silence area */ snd_pcm_uframes_t silence_filled; /* already filled part of silence area */ - union snd_pcm_sync_id sync; /* hardware synchronization ID */ + unsigned char sync[16]; /* hardware synchronization ID */ /* -- mmap -- */ struct snd_pcm_mmap_status *status; diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index 628d46a0da92..8bf7e8a0eb6f 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -142,7 +142,7 @@ struct snd_hwdep_dsp_image { * * *****************************************************************************/ -#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 17) +#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 18) typedef unsigned long snd_pcm_uframes_t; typedef signed long snd_pcm_sframes_t; @@ -334,7 +334,7 @@ union snd_pcm_sync_id { unsigned char id[16]; unsigned short id16[8]; unsigned int id32[4]; -}; +} __attribute__((deprecated)); struct snd_pcm_info { unsigned int device; /* RO/WR (control): device number */ @@ -348,7 +348,7 @@ struct snd_pcm_info { int dev_subclass; /* SNDRV_PCM_SUBCLASS_* */ unsigned int subdevices_count; unsigned int subdevices_avail; - union snd_pcm_sync_id sync; /* hardware synchronization ID */ + unsigned char pad1[16]; /* was: hardware synchronization ID */ unsigned char reserved[64]; /* reserved for future... */ }; @@ -420,7 +420,8 @@ struct snd_pcm_hw_params { unsigned int rate_num; /* R: rate numerator */ unsigned int rate_den; /* R: rate denominator */ snd_pcm_uframes_t fifo_size; /* R: chip FIFO size in frames */ - unsigned char reserved[64]; /* reserved for future */ + unsigned char sync[16]; /* R: synchronization ID (perfect sync - one clock source) */ + unsigned char reserved[48]; /* reserved for future */ }; enum { -- cgit v1.2.3 From d712c58c55d9a4b4cc88ec2e1f8cd2e3b82359b5 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Tue, 25 Jun 2024 19:25:47 +0200 Subject: ALSA: pcm: optimize and clarify stream synchronization ID API Optimize the memory usage in struct snd_pcm_runtime - use boolean value for the standard sync ID scheme. Introduce snd_pcm_set_sync_per_card function to build synchronization IDs. Signed-off-by: Jaroslav Kysela Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/20240625172836.589380-3-perex@perex.cz --- include/sound/pcm.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/pcm.h b/include/sound/pcm.h index dbce137d8806..ac8f3aef9205 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -402,7 +402,7 @@ struct snd_pcm_runtime { snd_pcm_uframes_t silence_start; /* starting pointer to silence area */ snd_pcm_uframes_t silence_filled; /* already filled part of silence area */ - unsigned char sync[16]; /* hardware synchronization ID */ + bool std_sync_id; /* hardware synchronization - standard per card ID */ /* -- mmap -- */ struct snd_pcm_mmap_status *status; @@ -1156,7 +1156,18 @@ int snd_pcm_format_set_silence(snd_pcm_format_t format, void *buf, unsigned int void snd_pcm_set_ops(struct snd_pcm * pcm, int direction, const struct snd_pcm_ops *ops); -void snd_pcm_set_sync(struct snd_pcm_substream *substream); +void snd_pcm_set_sync_per_card(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, + const unsigned char *id, unsigned int len); +/** + * snd_pcm_set_sync - set the PCM sync id + * @substream: the pcm substream + * + * Use the default PCM sync identifier for the specific card. + */ +static inline void snd_pcm_set_sync(struct snd_pcm_substream *substream) +{ + substream->runtime->std_sync_id = true; +} int snd_pcm_lib_ioctl(struct snd_pcm_substream *substream, unsigned int cmd, void *arg); void snd_pcm_period_elapsed_under_stream_lock(struct snd_pcm_substream *substream); -- cgit v1.2.3 From 6f2a875024993449f1b19a144d3e4391411a1b51 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 25 Jun 2024 09:38:15 +0200 Subject: gpiolib: unexport gpiochip_get_desc() This function has been deprecated for some time and is now only used within the GPIOLIB core. Remove it from the public header and unexport it as all current users are linked against the compilation unit where it is defined. Link: https://lore.kernel.org/r/20240625073815.12376-1-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 6d31388dde0a..2dd7cb9cc270 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -787,7 +787,6 @@ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc, enum gpiod_flags dflags); void gpiochip_free_own_desc(struct gpio_desc *desc); -struct gpio_desc *gpiochip_get_desc(struct gpio_chip *gc, unsigned int hwnum); struct gpio_desc * gpio_device_get_desc(struct gpio_device *gdev, unsigned int hwnum); -- cgit v1.2.3 From 060f4ba6e40338a70932603a3564903acf5f5734 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 27 Jun 2024 13:59:44 +0100 Subject: io_uring/net: move charging socket out of zc io_uring Currently, io_uring's io_sg_from_iter() duplicates the part of __zerocopy_sg_from_iter() charging pages to the socket. It'd be too easy to miss while changing it in net/, the chunk is not the most straightforward for outside users and full of internal implementation details. io_uring is not a good place to keep it, deduplicate it by moving out of the callback into __zerocopy_sg_from_iter(). Signed-off-by: Pavel Begunkov Reviewed-by: Willem de Bruijn Reviewed-by: Jens Axboe Signed-off-by: Paolo Abeni --- include/linux/skbuff.h | 3 +++ include/linux/socket.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f4cda3fbdb75..9c29bdd5596d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1703,6 +1703,9 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, struct iov_iter *from, size_t length); +int zerocopy_fill_skb_from_iter(struct sk_buff *skb, + struct iov_iter *from, size_t length); + static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len) { diff --git a/include/linux/socket.h b/include/linux/socket.h index 89d16b90370b..2a1ff91d1914 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -76,7 +76,7 @@ struct msghdr { __kernel_size_t msg_controllen; /* ancillary data buffer length */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */ struct ubuf_info *msg_ubuf; - int (*sg_from_iter)(struct sock *sk, struct sk_buff *skb, + int (*sg_from_iter)(struct sk_buff *skb, struct iov_iter *from, size_t length); }; -- cgit v1.2.3 From d7f39aee79f04eeaa42085728423501b33ac5be5 Mon Sep 17 00:00:00 2001 From: David Wei Date: Wed, 26 Jun 2024 20:01:59 -0700 Subject: page_pool: export page_pool_disable_direct_recycling() 56ef27e3 unexported page_pool_unlink_napi() and renamed it to page_pool_disable_direct_recycling(). This is because there was no in-tree user of page_pool_unlink_napi(). Since then Rx queue API and an implementation in bnxt got merged. In the bnxt implementation, it broadly follows the following steps: allocate new queue memory + page pool, stop old rx queue, swap, then destroy old queue memory + page pool. The existing NAPI instance is re-used so when the old page pool that is no longer used but still linked to this shared NAPI instance is destroyed, it will trigger warnings. In my initial patches I unlinked a page pool from a NAPI instance directly. Instead, export page_pool_disable_direct_recycling() and call that instead to avoid having a driver touch a core struct. Suggested-by: Jakub Kicinski Signed-off-by: David Wei Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- include/net/page_pool/types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 7e8477057f3d..9093a964fc33 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -229,6 +229,7 @@ struct page_pool *page_pool_create_percpu(const struct page_pool_params *params, struct xdp_mem_info; #ifdef CONFIG_PAGE_POOL +void page_pool_disable_direct_recycling(struct page_pool *pool); void page_pool_destroy(struct page_pool *pool); void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), const struct xdp_mem_info *mem); -- cgit v1.2.3 From 32267c29bc7d5c9654b71e4f354064217a5fb053 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Mon, 17 Jun 2024 17:44:47 +0100 Subject: phy: exynos5-usbdrd: support Exynos USBDRD 3.1 combo phy (HS & SS) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for the Exynos USB 3.1 DRD combo phy, as found in Exynos 9 SoCs like Google GS101. It supports USB SS, HS and DisplayPort. In terms of UTMI+, this is very similar to the existing Exynos850 support in this driver. The difference is that this combo phy supports both UTMI+ (HS) and PIPE3 (SS). It also supports DP alt mode. The number of ports for UTMI+ and PIPE3 can be determined using the LINKPORT register (which also exists on Exynos E850). For SuperSpeed (SS) a new SS phy is in use and its PIPE3 interface is new compared to Exynos E850, and also very different from the existing support for older Exynos SoCs in this driver. The SS phy needs a bit more configuration work and register tuning for signal quality to work reliably, presumably due to the higher frequency, e.g. to account for different board layouts. Additionally, power needs to be enabled before writing to the SS phy registers. This commit adds the necessary changes for USB HS and SS to work. DisplayPort is out of scope in this commit. Notes: * For the register tuning, exynos5_usbdrd_apply_phy_tunes() has been added with the appropriate data structures to support tuning at various stages during initialisation. Since these are hardware specific, the platform data is supposed to be populated accordingly. The implementation is loosely modelled after the Samsung UFS PHY driver. There is one tuning state for UTMI+, PTS_UTMI_POSTINIT, to execute after init and generally intended for HS signal tuning, as done in this commit. PTS_PIPE3_PREINIT PTS_PIPE3_INIT PTS_PIPE3_POSTINIT PTS_PIPE3_POSTLOCK are tuning states for PIPE3. In the downstream driver, preinit differs by Exynos SoC, and postinit and postlock are different per board. The latter haven't been implemented for gs101 here, because downstream doesn't use them on gs101 either. * Signal lock acquisition for SS depends on the orientation of the USB-C plug. Since there currently is no infrastructure to chain connector events to both the USB DWC3 driver and this phy driver, a work-around has been added in exynos5_usbdrd_usbdp_g2_v4_pma_check_cdr_lock() to check both registers if it failed in one of the orientations. * Equally, we can only establish SS speed in one of the connector orientations due to programming differences when selecting the lane mux in exynos5_usbdrd_usbdp_g2_v4_pma_lane_mux_sel(), which really needs to be dynamic, based on the orientation of the connector. * As is, we can establish a HS link using any cable, and an SS link in one orientation of the plug, falling back to HS if the orientation is reversed to the expectation. Signed-off-by: André Draszik Reviewed-by: Peter Griffin Tested-by: Peter Griffin Tested-by: Will McVicker Link: https://lore.kernel.org/r/20240617-usb-phy-gs101-v3-6-b66de9ae7424@linaro.org Signed-off-by: Vinod Koul --- include/linux/soc/samsung/exynos-regs-pmu.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index aa840ed043e1..6765160eaab2 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -657,4 +657,8 @@ #define EXYNOS5433_PAD_RETENTION_UFS_OPTION (0x3268) #define EXYNOS5433_PAD_RETENTION_FSYSGENIO_OPTION (0x32A8) +/* For GS101 */ +#define GS101_PHY_CTRL_USB20 0x3eb0 +#define GS101_PHY_CTRL_USBDP 0x3eb4 + #endif /* __LINUX_SOC_EXYNOS_REGS_PMU_H */ -- cgit v1.2.3 From d839a73179ae91c07f5f2f97ccb9c69b2b7c3306 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 28 Jun 2024 12:18:55 +0200 Subject: net: Optimize xdp_do_flush() with bpf_net_context infos. Every NIC driver utilizing XDP should invoke xdp_do_flush() after processing all packages. With the introduction of the bpf_net_context logic the flush lists (for dev, CPU-map and xsk) are lazy initialized only if used. However xdp_do_flush() tries to flush all three of them so all three lists are always initialized and the likely empty lists are "iterated". Without the usage of XDP but with CONFIG_DEBUG_NET the lists are also initialized due to xdp_do_check_flushed(). Jakub suggest to utilize the hints in bpf_net_context and avoid invoking the flush function. This will also avoiding initializing the lists which are otherwise unused. Introduce bpf_net_ctx_get_all_used_flush_lists() to return the individual list if not-empty. Use the logic in xdp_do_flush() and xdp_do_check_flushed(). Remove the not needed .*_check_flush(). Suggested-by: Jakub Kicinski Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- include/linux/bpf.h | 10 ++++------ include/linux/filter.h | 27 +++++++++++++++++++++++++++ include/net/xdp_sock.h | 14 ++------------ 3 files changed, 33 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a834f4b761bc..f5c6bc9093a6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2494,7 +2494,7 @@ struct sk_buff; struct bpf_dtab_netdev; struct bpf_cpu_map_entry; -void __dev_flush(void); +void __dev_flush(struct list_head *flush_list); int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx); int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, @@ -2507,7 +2507,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, struct bpf_prog *xdp_prog, struct bpf_map *map, bool exclude_ingress); -void __cpu_map_flush(void); +void __cpu_map_flush(struct list_head *flush_list); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, struct net_device *dev_rx); int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu, @@ -2644,8 +2644,6 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr); void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr); -bool dev_check_flush(void); -bool cpu_map_check_flush(void); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -2738,7 +2736,7 @@ static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd) return ERR_PTR(-EOPNOTSUPP); } -static inline void __dev_flush(void) +static inline void __dev_flush(struct list_head *flush_list) { } @@ -2784,7 +2782,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, return 0; } -static inline void __cpu_map_flush(void) +static inline void __cpu_map_flush(struct list_head *flush_list) { } diff --git a/include/linux/filter.h b/include/linux/filter.h index c0349522de8f..02ddcfdf94c4 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -829,6 +829,33 @@ static inline struct list_head *bpf_net_ctx_get_xskmap_flush_list(void) return &bpf_net_ctx->xskmap_map_flush_list; } +static inline void bpf_net_ctx_get_all_used_flush_lists(struct list_head **lh_map, + struct list_head **lh_dev, + struct list_head **lh_xsk) +{ + struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get(); + u32 kern_flags = bpf_net_ctx->ri.kern_flags; + struct list_head *lh; + + *lh_map = *lh_dev = *lh_xsk = NULL; + + if (!IS_ENABLED(CONFIG_BPF_SYSCALL)) + return; + + lh = &bpf_net_ctx->dev_map_flush_list; + if (kern_flags & BPF_RI_F_DEV_MAP_INIT && !list_empty(lh)) + *lh_dev = lh; + + lh = &bpf_net_ctx->cpu_map_flush_list; + if (kern_flags & BPF_RI_F_CPU_MAP_INIT && !list_empty(lh)) + *lh_map = lh; + + lh = &bpf_net_ctx->xskmap_map_flush_list; + if (IS_ENABLED(CONFIG_XDP_SOCKETS) && + kern_flags & BPF_RI_F_XSK_MAP_INIT && !list_empty(lh)) + *lh_xsk = lh; +} + /* Compute the linear packet data range [data, data_end) which * will be accessed by various program types (cls_bpf, act_bpf, * lwt, ...). Subsystems allowing direct data access must (!) diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 3d54de168a6d..bfe625b55d55 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -121,7 +121,7 @@ struct xsk_tx_metadata_ops { int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp); -void __xsk_map_flush(void); +void __xsk_map_flush(struct list_head *flush_list); /** * xsk_tx_metadata_to_compl - Save enough relevant metadata information @@ -206,7 +206,7 @@ static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) return -EOPNOTSUPP; } -static inline void __xsk_map_flush(void) +static inline void __xsk_map_flush(struct list_head *flush_list) { } @@ -228,14 +228,4 @@ static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl, } #endif /* CONFIG_XDP_SOCKETS */ - -#if defined(CONFIG_XDP_SOCKETS) && defined(CONFIG_DEBUG_NET) -bool xsk_map_check_flush(void); -#else -static inline bool xsk_map_check_flush(void) -{ - return false; -} -#endif - #endif /* _LINUX_XDP_SOCK_H */ -- cgit v1.2.3 From 7a4479680d7fd05c7a3efa87b41f421af48fbbdf Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 1 Jul 2024 16:49:37 -0700 Subject: cgroup_misc: add kernel-doc comments for enum misc_res_type Fully document enum misc_res_type with kernel-doc comments to prevent kernel-doc warnings: misc_cgroup.h:12: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Types of misc cgroup entries supported by the host. misc_cgroup.h:12: warning: missing initial short description on line: * Types of misc cgroup entries supported by the host. Fixes: a72232eabdfc ("cgroup: Add misc cgroup controller") Signed-off-by: Randy Dunlap Cc: cgroups@vger.kernel.org Signed-off-by: Tejun Heo --- include/linux/misc_cgroup.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h index e799b1f8d05b..d70eab2501ee 100644 --- a/include/linux/misc_cgroup.h +++ b/include/linux/misc_cgroup.h @@ -9,15 +9,16 @@ #define _MISC_CGROUP_H_ /** - * Types of misc cgroup entries supported by the host. + * enum misc_res_type - Types of misc cgroup entries supported by the host. */ enum misc_res_type { #ifdef CONFIG_KVM_AMD_SEV - /* AMD SEV ASIDs resource */ + /** @MISC_CG_RES_SEV: AMD SEV ASIDs resource */ MISC_CG_RES_SEV, - /* AMD SEV-ES ASIDs resource */ + /** @MISC_CG_RES_SEV_ES: AMD SEV-ES ASIDs resource */ MISC_CG_RES_SEV_ES, #endif + /** @MISC_CG_RES_TYPES: count of enum misc_res_type constants */ MISC_CG_RES_TYPES }; -- cgit v1.2.3 From f436cb6913a57bf3e1e66d18bc663e6c20751929 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 28 Jun 2024 14:56:01 -0700 Subject: x86/resctrl: Prepare for new domain scope Resctrl resources operate on subsets of CPUs in the system with the defining attribute of each subset being an instance of a particular level of cache. E.g. all CPUs sharing an L3 cache would be part of the same domain. In preparation for features that are scoped at the NUMA node level, change the code from explicit references to "cache_level" to a more generic scope. At this point the only options for this scope are groups of CPUs that share an L2 cache or L3 cache. Clean up the error handling when looking up domains. Report invalid ids before calling rdt_find_domain() in preparation for better messages when scope can be other than cache scope. This means that rdt_find_domain() will never return an error. So remove checks for error from the call sites. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Link: https://lore.kernel.org/r/20240628215619.76401-2-tony.luck@intel.com --- include/linux/resctrl.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index a365f67131ec..ed693bfe474d 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -150,13 +150,18 @@ struct resctrl_membw { struct rdt_parse_data; struct resctrl_schema; +enum resctrl_scope { + RESCTRL_L2_CACHE = 2, + RESCTRL_L3_CACHE = 3, +}; + /** * struct rdt_resource - attributes of a resctrl resource * @rid: The index of the resource * @alloc_capable: Is allocation available on this machine * @mon_capable: Is monitor feature available on this machine * @num_rmid: Number of RMIDs available - * @cache_level: Which cache level defines scope of this resource + * @scope: Scope of this resource * @cache: Cache allocation related data * @membw: If the component has bandwidth controls, their properties. * @domains: RCU list of all domains for this resource @@ -174,7 +179,7 @@ struct rdt_resource { bool alloc_capable; bool mon_capable; int num_rmid; - int cache_level; + enum resctrl_scope scope; struct resctrl_cache cache; struct resctrl_membw membw; struct list_head domains; -- cgit v1.2.3 From c103d4d48e1599a88001fa6215be27d55f3c025b Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 28 Jun 2024 14:56:02 -0700 Subject: x86/resctrl: Prepare to split rdt_domain structure The rdt_domain structure is used for both control and monitor features. It is about to be split into separate structures for these two usages because the scope for control and monitoring features for a resource will be different for future resources. To allow for common code that scans a list of domains looking for a specific domain id, move all the common fields ("list", "id", "cpu_mask") into their own structure within the rdt_domain structure. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Link: https://lore.kernel.org/r/20240628215619.76401-3-tony.luck@intel.com --- include/linux/resctrl.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index ed693bfe474d..f63fcf17a3bc 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -59,10 +59,20 @@ struct resctrl_staged_config { }; /** - * struct rdt_domain - group of CPUs sharing a resctrl resource + * struct rdt_domain_hdr - common header for different domain types * @list: all instances of this resource * @id: unique id for this instance * @cpu_mask: which CPUs share this resource + */ +struct rdt_domain_hdr { + struct list_head list; + int id; + struct cpumask cpu_mask; +}; + +/** + * struct rdt_domain - group of CPUs sharing a resctrl resource + * @hdr: common header for different domain types * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold * @mbm_total: saved state for MBM total bandwidth * @mbm_local: saved state for MBM local bandwidth @@ -77,9 +87,7 @@ struct resctrl_staged_config { * by closid */ struct rdt_domain { - struct list_head list; - int id; - struct cpumask cpu_mask; + struct rdt_domain_hdr hdr; unsigned long *rmid_busy_llc; struct mbm_state *mbm_total; struct mbm_state *mbm_local; -- cgit v1.2.3 From cd84f72b6a5c10f79f19fab67b0edfbc4fdbc5b1 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 28 Jun 2024 14:56:03 -0700 Subject: x86/resctrl: Prepare for different scope for control/monitor operations Resctrl assumes that control and monitor operations on a resource are performed at the same scope. Prepare for systems that use different scope (specifically Intel needs to split the RDT_RESOURCE_L3 resource to use L3 scope for cache control and NODE scope for cache occupancy and memory bandwidth monitoring). Create separate domain lists for control and monitor operations. Note that errors during initialization of either control or monitor functions on a domain would previously result in that domain being excluded from both control and monitor operations. Now the domains are allocated independently it is no longer required to disable both control and monitor operations if either fail. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Link: https://lore.kernel.org/r/20240628215619.76401-4-tony.luck@intel.com --- include/linux/resctrl.h | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index f63fcf17a3bc..96ddf9ff3183 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -58,15 +58,22 @@ struct resctrl_staged_config { bool have_new_ctrl; }; +enum resctrl_domain_type { + RESCTRL_CTRL_DOMAIN, + RESCTRL_MON_DOMAIN, +}; + /** * struct rdt_domain_hdr - common header for different domain types * @list: all instances of this resource * @id: unique id for this instance + * @type: type of this instance * @cpu_mask: which CPUs share this resource */ struct rdt_domain_hdr { struct list_head list; int id; + enum resctrl_domain_type type; struct cpumask cpu_mask; }; @@ -169,10 +176,12 @@ enum resctrl_scope { * @alloc_capable: Is allocation available on this machine * @mon_capable: Is monitor feature available on this machine * @num_rmid: Number of RMIDs available - * @scope: Scope of this resource + * @ctrl_scope: Scope of this resource for control functions + * @mon_scope: Scope of this resource for monitor functions * @cache: Cache allocation related data * @membw: If the component has bandwidth controls, their properties. - * @domains: RCU list of all domains for this resource + * @ctrl_domains: RCU list of all control domains for this resource + * @mon_domains: RCU list of all monitor domains for this resource * @name: Name to use in "schemata" file. * @data_width: Character width of data when displaying * @default_ctrl: Specifies default cache cbm or memory B/W percent. @@ -187,10 +196,12 @@ struct rdt_resource { bool alloc_capable; bool mon_capable; int num_rmid; - enum resctrl_scope scope; + enum resctrl_scope ctrl_scope; + enum resctrl_scope mon_scope; struct resctrl_cache cache; struct resctrl_membw membw; - struct list_head domains; + struct list_head ctrl_domains; + struct list_head mon_domains; char *name; int data_width; u32 default_ctrl; @@ -236,8 +247,10 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, u32 closid, enum resctrl_conf_type type); -int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d); -void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d); +int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d); +int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain *d); +void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d); +void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain *d); void resctrl_online_cpu(unsigned int cpu); void resctrl_offline_cpu(unsigned int cpu); -- cgit v1.2.3 From cae2bcb6a2c691ef7b537ad07e9819a5ed645bcc Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 28 Jun 2024 14:56:04 -0700 Subject: x86/resctrl: Split the rdt_domain and rdt_hw_domain structures The same rdt_domain structure is used for both control and monitor functions. But this results in wasted memory as some of the fields are only used by control functions, while most are only used for monitor functions. Split into separate rdt_ctrl_domain and rdt_mon_domain structures with just the fields required for control and monitoring respectively. Similar split of the rdt_hw_domain structure into rdt_hw_ctrl_domain and rdt_hw_mon_domain. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Link: https://lore.kernel.org/r/20240628215619.76401-5-tony.luck@intel.com --- include/linux/resctrl.h | 48 ++++++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 96ddf9ff3183..aa2c22a8e37b 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -78,7 +78,23 @@ struct rdt_domain_hdr { }; /** - * struct rdt_domain - group of CPUs sharing a resctrl resource + * struct rdt_ctrl_domain - group of CPUs sharing a resctrl control resource + * @hdr: common header for different domain types + * @plr: pseudo-locked region (if any) associated with domain + * @staged_config: parsed configuration to be applied + * @mbps_val: When mba_sc is enabled, this holds the array of user + * specified control values for mba_sc in MBps, indexed + * by closid + */ +struct rdt_ctrl_domain { + struct rdt_domain_hdr hdr; + struct pseudo_lock_region *plr; + struct resctrl_staged_config staged_config[CDP_NUM_TYPES]; + u32 *mbps_val; +}; + +/** + * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource * @hdr: common header for different domain types * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold * @mbm_total: saved state for MBM total bandwidth @@ -87,13 +103,8 @@ struct rdt_domain_hdr { * @cqm_limbo: worker to periodically read CQM h/w counters * @mbm_work_cpu: worker CPU for MBM h/w counters * @cqm_work_cpu: worker CPU for CQM h/w counters - * @plr: pseudo-locked region (if any) associated with domain - * @staged_config: parsed configuration to be applied - * @mbps_val: When mba_sc is enabled, this holds the array of user - * specified control values for mba_sc in MBps, indexed - * by closid */ -struct rdt_domain { +struct rdt_mon_domain { struct rdt_domain_hdr hdr; unsigned long *rmid_busy_llc; struct mbm_state *mbm_total; @@ -102,9 +113,6 @@ struct rdt_domain { struct delayed_work cqm_limbo; int mbm_work_cpu; int cqm_work_cpu; - struct pseudo_lock_region *plr; - struct resctrl_staged_config staged_config[CDP_NUM_TYPES]; - u32 *mbps_val; }; /** @@ -208,7 +216,7 @@ struct rdt_resource { const char *format_str; int (*parse_ctrlval)(struct rdt_parse_data *data, struct resctrl_schema *s, - struct rdt_domain *d); + struct rdt_ctrl_domain *d); struct list_head evt_list; unsigned long fflags; bool cdp_capable; @@ -242,15 +250,15 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); * Update the ctrl_val and apply this config right now. * Must be called on one of the domain's CPUs. */ -int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, +int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type t, u32 cfg_val); -u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, +u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d, u32 closid, enum resctrl_conf_type type); -int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d); -int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain *d); -void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d); -void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain *d); +int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d); +int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d); +void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d); +void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d); void resctrl_online_cpu(unsigned int cpu); void resctrl_offline_cpu(unsigned int cpu); @@ -279,7 +287,7 @@ void resctrl_offline_cpu(unsigned int cpu); * Return: * 0 on success, or -EIO, -EINVAL etc on error. */ -int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, +int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, u32 closid, u32 rmid, enum resctrl_event_id eventid, u64 *val, void *arch_mon_ctx); @@ -312,7 +320,7 @@ static inline void resctrl_arch_rmid_read_context_check(void) * * This can be called from any CPU. */ -void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, +void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, u32 closid, u32 rmid, enum resctrl_event_id eventid); @@ -325,7 +333,7 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, * * This can be called from any CPU. */ -void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d); +void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d); extern unsigned int resctrl_rmid_realloc_threshold; extern unsigned int resctrl_rmid_realloc_limit; -- cgit v1.2.3 From 1a171608ee8d40d22d604303e42f033c69151123 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 28 Jun 2024 14:56:05 -0700 Subject: x86/resctrl: Add node-scope to the options for feature scope Currently supported resctrl features are all domain scoped the same as the scope of the L2 or L3 caches. Add RESCTRL_L3_NODE as a new option for features that are scoped at the same granularity as NUMA nodes. This is needed for Intel's Sub-NUMA Cluster (SNC) feature where monitoring features are divided between nodes that share an L3 cache. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Link: https://lore.kernel.org/r/20240628215619.76401-6-tony.luck@intel.com --- include/linux/resctrl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index aa2c22a8e37b..64b6ad1b22a1 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -176,6 +176,7 @@ struct resctrl_schema; enum resctrl_scope { RESCTRL_L2_CACHE = 2, RESCTRL_L3_CACHE = 3, + RESCTRL_L3_NODE, }; /** -- cgit v1.2.3 From 328ea688746420e12ced6cfbc5064413180244cc Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 28 Jun 2024 14:56:08 -0700 Subject: x86/resctrl: Prepare for new Sub-NUMA Cluster (SNC) monitor files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When SNC is enabled, monitoring data is collected at the SNC node granularity, but must be reported at L3-cache granularity for backwards compatibility in addition to reporting at the node level. Add a "ci" field to the rdt_mon_domain structure to save the cache information about the enclosing L3 cache for the domain. This provides: 1) The cache id which is needed to compose the name of the legacy monitoring directory, and to determine which domains should be summed to provide L3-scoped data. 2) The shared_cpu_map which is needed to determine which CPUs can be used to read the RMID counters with the MSR interface. This is the first step to an eventual goal of monitor reporting files like this (for a system with two SNC nodes per L3): $ cd /sys/fs/resctrl/mon_data $ tree mon_L3_00 mon_L3_00 <- 00 here is L3 cache id ├── llc_occupancy \ These files provide legacy support ├── mbm_local_bytes > for non-SNC aware monitor apps ├── mbm_total_bytes / that expect data at L3 cache level ├── mon_sub_L3_00 <- 00 here is SNC node id │   ├── llc_occupancy \ These files are finer grained │   ├── mbm_local_bytes > data from each SNC node │   └── mbm_total_bytes / └── mon_sub_L3_01 ├── llc_occupancy \ ├── mbm_local_bytes > As above, but for node 1. └── mbm_total_bytes / [ bp: Massage commit message. ] Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Link: https://lore.kernel.org/r/20240628215619.76401-9-tony.luck@intel.com --- include/linux/resctrl.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 64b6ad1b22a1..b0875b99e811 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -2,6 +2,7 @@ #ifndef _RESCTRL_H #define _RESCTRL_H +#include #include #include #include @@ -96,6 +97,7 @@ struct rdt_ctrl_domain { /** * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource * @hdr: common header for different domain types + * @ci: cache info for this domain * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold * @mbm_total: saved state for MBM total bandwidth * @mbm_local: saved state for MBM local bandwidth @@ -106,6 +108,7 @@ struct rdt_ctrl_domain { */ struct rdt_mon_domain { struct rdt_domain_hdr hdr; + struct cacheinfo *ci; unsigned long *rmid_busy_llc; struct mbm_state *mbm_total; struct mbm_state *mbm_local; -- cgit v1.2.3 From 675e979db473d08be346a3190c5f0db095a57153 Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Fri, 7 Jun 2024 16:43:58 +0200 Subject: cxl/events: Use a common struct for DRAM and General Media events cxl_event_common was an unfortunate naming choice and caused confusion with the existing Common Event Record. Furthermore, its fields didn't map all the common information between DRAM and General Media Events. Remove cxl_event_common and introduce cxl_event_media_hdr to record common information between DRAM and General Media events. cxl_event_media_hdr, which is embedded in both cxl_event_gen_media and cxl_event_dram, leverages the commonalities between the two events to simplify their respective handling. Suggested-by: Dan Williams Reviewed-by: Alison Schofield Reviewed-by: Dan Williams Reviewed-by: Ira Weiny Signed-off-by: Fabio M. De Francesco Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240607144423.48681-1-fabio.m.de.francesco@linux.intel.com Signed-off-by: Dave Jiang --- include/linux/cxl-event.h | 45 +++++++++++++++++++-------------------------- 1 file changed, 19 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h index 60b25020281f..0bea1afbd747 100644 --- a/include/linux/cxl-event.h +++ b/include/linux/cxl-event.h @@ -21,6 +21,21 @@ struct cxl_event_record_hdr { u8 reserved[15]; } __packed; +struct cxl_event_media_hdr { + struct cxl_event_record_hdr hdr; + __le64 phys_addr; + u8 descriptor; + u8 type; + u8 transaction_type; + /* + * The meaning of Validity Flags from bit 2 is + * different across DRAM and General Media records + */ + u8 validity_flags[2]; + u8 channel; + u8 rank; +} __packed; + #define CXL_EVENT_RECORD_DATA_LENGTH 0x50 struct cxl_event_generic { struct cxl_event_record_hdr hdr; @@ -33,14 +48,7 @@ struct cxl_event_generic { */ #define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10 struct cxl_event_gen_media { - struct cxl_event_record_hdr hdr; - __le64 phys_addr; - u8 descriptor; - u8 type; - u8 transaction_type; - u8 validity_flags[2]; - u8 channel; - u8 rank; + struct cxl_event_media_hdr media_hdr; u8 device[3]; u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE]; u8 reserved[46]; @@ -52,14 +60,7 @@ struct cxl_event_gen_media { */ #define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20 struct cxl_event_dram { - struct cxl_event_record_hdr hdr; - __le64 phys_addr; - u8 descriptor; - u8 type; - u8 transaction_type; - u8 validity_flags[2]; - u8 channel; - u8 rank; + struct cxl_event_media_hdr media_hdr; u8 nibble_mask[3]; u8 bank_group; u8 bank; @@ -95,21 +96,13 @@ struct cxl_event_mem_module { u8 reserved[0x3d]; } __packed; -/* - * General Media or DRAM Event Common Fields - * - provides common access to phys_addr - */ -struct cxl_event_common { - struct cxl_event_record_hdr hdr; - __le64 phys_addr; -} __packed; - union cxl_event { struct cxl_event_generic generic; struct cxl_event_gen_media gen_media; struct cxl_event_dram dram; struct cxl_event_mem_module mem_module; - struct cxl_event_common common; + /* dram & gen_media event header */ + struct cxl_event_media_hdr media_hdr; } __packed; /* -- cgit v1.2.3 From 4dec64c52e24c2c9a15f81c115f1be5ea35121cb Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Fri, 28 Jun 2024 00:32:42 +0000 Subject: page_pool: convert to use netmem Abstract the memory type from the page_pool so we can later add support for new memory types. Convert the page_pool to use the new netmem type abstraction, rather than use struct page directly. As of this patch the netmem type is a no-op abstraction: it's always a struct page underneath. All the page pool internals are converted to use struct netmem instead of struct page, and the page pool now exports 2 APIs: 1. The existing struct page API. 2. The new struct netmem API. Keeping the existing API is transitional; we do not want to refactor all the current drivers using the page pool at once. The netmem abstraction is currently a no-op. The page_pool uses page_to_netmem() to convert allocated pages to netmem, and uses netmem_to_page() to convert the netmem back to pages to pass to mm APIs, Follow up patches to this series add non-paged netmem support to the page_pool. This change is factored out on its own to limit the code churn to this 1 patch, for ease of code review. Signed-off-by: Mina Almasry Reviewed-by: Pavel Begunkov Link: https://patch.msgid.link/20240628003253.1694510-6-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff_ref.h | 4 +- include/net/netmem.h | 15 +++++++ include/net/page_pool/helpers.h | 91 ++++++++++++++++++++++++++++++---------- include/net/page_pool/types.h | 14 +++++-- include/trace/events/page_pool.h | 30 ++++++------- 5 files changed, 114 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff_ref.h b/include/linux/skbuff_ref.h index 11f0a4063403..16c241a23472 100644 --- a/include/linux/skbuff_ref.h +++ b/include/linux/skbuff_ref.h @@ -32,13 +32,13 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f) __skb_frag_ref(&skb_shinfo(skb)->frags[f]); } -bool napi_pp_put_page(struct page *page); +bool napi_pp_put_page(netmem_ref netmem); static inline void skb_page_unref(struct page *page, bool recycle) { #ifdef CONFIG_PAGE_POOL - if (recycle && napi_pp_put_page(page)) + if (recycle && napi_pp_put_page(page_to_netmem(page))) return; #endif put_page(page); diff --git a/include/net/netmem.h b/include/net/netmem.h index d8b810245c1d..46cc9b89ac79 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -38,4 +38,19 @@ static inline netmem_ref page_to_netmem(struct page *page) return (__force netmem_ref)page; } +static inline int netmem_ref_count(netmem_ref netmem) +{ + return page_ref_count(netmem_to_page(netmem)); +} + +static inline unsigned long netmem_to_pfn(netmem_ref netmem) +{ + return page_to_pfn(netmem_to_page(netmem)); +} + +static inline netmem_ref netmem_compound_head(netmem_ref netmem) +{ + return page_to_netmem(compound_head(netmem_to_page(netmem))); +} + #endif /* _NET_NETMEM_H */ diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 873631c79ab1..2b43a893c619 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -55,6 +55,8 @@ #include #include +#include +#include #ifdef CONFIG_PAGE_POOL_STATS /* Deprecated driver-facing API, use netlink instead */ @@ -212,6 +214,11 @@ page_pool_get_dma_dir(const struct page_pool *pool) return pool->p.dma_dir; } +static inline void page_pool_fragment_netmem(netmem_ref netmem, long nr) +{ + atomic_long_set(&netmem_to_page(netmem)->pp_ref_count, nr); +} + /** * page_pool_fragment_page() - split a fresh page into fragments * @page: page to split @@ -232,11 +239,12 @@ page_pool_get_dma_dir(const struct page_pool *pool) */ static inline void page_pool_fragment_page(struct page *page, long nr) { - atomic_long_set(&page->pp_ref_count, nr); + page_pool_fragment_netmem(page_to_netmem(page), nr); } -static inline long page_pool_unref_page(struct page *page, long nr) +static inline long page_pool_unref_netmem(netmem_ref netmem, long nr) { + struct page *page = netmem_to_page(netmem); long ret; /* If nr == pp_ref_count then we have cleared all remaining @@ -279,15 +287,41 @@ static inline long page_pool_unref_page(struct page *page, long nr) return ret; } +static inline long page_pool_unref_page(struct page *page, long nr) +{ + return page_pool_unref_netmem(page_to_netmem(page), nr); +} + +static inline void page_pool_ref_netmem(netmem_ref netmem) +{ + atomic_long_inc(&netmem_to_page(netmem)->pp_ref_count); +} + static inline void page_pool_ref_page(struct page *page) { - atomic_long_inc(&page->pp_ref_count); + page_pool_ref_netmem(page_to_netmem(page)); } -static inline bool page_pool_is_last_ref(struct page *page) +static inline bool page_pool_is_last_ref(netmem_ref netmem) { /* If page_pool_unref_page() returns 0, we were the last user */ - return page_pool_unref_page(page, 1) == 0; + return page_pool_unref_netmem(netmem, 1) == 0; +} + +static inline void page_pool_put_netmem(struct page_pool *pool, + netmem_ref netmem, + unsigned int dma_sync_size, + bool allow_direct) +{ + /* When page_pool isn't compiled-in, net/core/xdp.c doesn't + * allow registering MEM_TYPE_PAGE_POOL, but shield linker. + */ +#ifdef CONFIG_PAGE_POOL + if (!page_pool_is_last_ref(netmem)) + return; + + page_pool_put_unrefed_netmem(pool, netmem, dma_sync_size, allow_direct); +#endif } /** @@ -308,15 +342,15 @@ static inline void page_pool_put_page(struct page_pool *pool, unsigned int dma_sync_size, bool allow_direct) { - /* When page_pool isn't compiled-in, net/core/xdp.c doesn't - * allow registering MEM_TYPE_PAGE_POOL, but shield linker. - */ -#ifdef CONFIG_PAGE_POOL - if (!page_pool_is_last_ref(page)) - return; + page_pool_put_netmem(pool, page_to_netmem(page), dma_sync_size, + allow_direct); +} - page_pool_put_unrefed_page(pool, page, dma_sync_size, allow_direct); -#endif +static inline void page_pool_put_full_netmem(struct page_pool *pool, + netmem_ref netmem, + bool allow_direct) +{ + page_pool_put_netmem(pool, netmem, -1, allow_direct); } /** @@ -331,7 +365,7 @@ static inline void page_pool_put_page(struct page_pool *pool, static inline void page_pool_put_full_page(struct page_pool *pool, struct page *page, bool allow_direct) { - page_pool_put_page(pool, page, -1, allow_direct); + page_pool_put_netmem(pool, page_to_netmem(page), -1, allow_direct); } /** @@ -365,6 +399,18 @@ static inline void page_pool_free_va(struct page_pool *pool, void *va, page_pool_put_page(pool, virt_to_head_page(va), -1, allow_direct); } +static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem) +{ + struct page *page = netmem_to_page(netmem); + + dma_addr_t ret = page->dma_addr; + + if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) + ret <<= PAGE_SHIFT; + + return ret; +} + /** * page_pool_get_dma_addr() - Retrieve the stored DMA address. * @page: page allocated from a page pool @@ -374,16 +420,14 @@ static inline void page_pool_free_va(struct page_pool *pool, void *va, */ static inline dma_addr_t page_pool_get_dma_addr(const struct page *page) { - dma_addr_t ret = page->dma_addr; - - if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) - ret <<= PAGE_SHIFT; - - return ret; + return page_pool_get_dma_addr_netmem(page_to_netmem((struct page *)page)); } -static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr) +static inline bool page_pool_set_dma_addr_netmem(netmem_ref netmem, + dma_addr_t addr) { + struct page *page = netmem_to_page(netmem); + if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) { page->dma_addr = addr >> PAGE_SHIFT; @@ -419,6 +463,11 @@ static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool, page_pool_get_dma_dir(pool)); } +static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr) +{ + return page_pool_set_dma_addr_netmem(page_to_netmem(page), addr); +} + static inline bool page_pool_put(struct page_pool *pool) { return refcount_dec_and_test(&pool->user_cnt); diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 9093a964fc33..b70bcc14ceda 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -6,6 +6,7 @@ #include #include #include +#include #define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA * map/unmap @@ -40,7 +41,7 @@ #define PP_ALLOC_CACHE_REFILL 64 struct pp_alloc_cache { u32 count; - struct page *cache[PP_ALLOC_CACHE_SIZE]; + netmem_ref cache[PP_ALLOC_CACHE_SIZE]; }; /** @@ -73,7 +74,7 @@ struct page_pool_params { struct net_device *netdev; unsigned int flags; /* private: used by test code only */ - void (*init_callback)(struct page *page, void *arg); + void (*init_callback)(netmem_ref netmem, void *arg); void *init_arg; ); }; @@ -151,7 +152,7 @@ struct page_pool { */ __cacheline_group_begin(frag) __aligned(4 * sizeof(long)); long frag_users; - struct page *frag_page; + netmem_ref frag_page; unsigned int frag_offset; __cacheline_group_end(frag); @@ -220,8 +221,12 @@ struct page_pool { }; struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); +netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp); struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, unsigned int size, gfp_t gfp); +netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool, + unsigned int *offset, unsigned int size, + gfp_t gfp); struct page_pool *page_pool_create(const struct page_pool_params *params); struct page_pool *page_pool_create_percpu(const struct page_pool_params *params, int cpuid); @@ -252,6 +257,9 @@ static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, } #endif +void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem, + unsigned int dma_sync_size, + bool allow_direct); void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, unsigned int dma_sync_size, bool allow_direct); diff --git a/include/trace/events/page_pool.h b/include/trace/events/page_pool.h index 6834356b2d2a..543e54e432a1 100644 --- a/include/trace/events/page_pool.h +++ b/include/trace/events/page_pool.h @@ -42,51 +42,53 @@ TRACE_EVENT(page_pool_release, TRACE_EVENT(page_pool_state_release, TP_PROTO(const struct page_pool *pool, - const struct page *page, u32 release), + netmem_ref netmem, u32 release), - TP_ARGS(pool, page, release), + TP_ARGS(pool, netmem, release), TP_STRUCT__entry( __field(const struct page_pool *, pool) - __field(const struct page *, page) + __field(unsigned long, netmem) __field(u32, release) __field(unsigned long, pfn) ), TP_fast_assign( __entry->pool = pool; - __entry->page = page; + __entry->netmem = (__force unsigned long)netmem; __entry->release = release; - __entry->pfn = page_to_pfn(page); + __entry->pfn = netmem_to_pfn(netmem); ), - TP_printk("page_pool=%p page=%p pfn=0x%lx release=%u", - __entry->pool, __entry->page, __entry->pfn, __entry->release) + TP_printk("page_pool=%p netmem=%p pfn=0x%lx release=%u", + __entry->pool, (void *)__entry->netmem, + __entry->pfn, __entry->release) ); TRACE_EVENT(page_pool_state_hold, TP_PROTO(const struct page_pool *pool, - const struct page *page, u32 hold), + netmem_ref netmem, u32 hold), - TP_ARGS(pool, page, hold), + TP_ARGS(pool, netmem, hold), TP_STRUCT__entry( __field(const struct page_pool *, pool) - __field(const struct page *, page) + __field(unsigned long, netmem) __field(u32, hold) __field(unsigned long, pfn) ), TP_fast_assign( __entry->pool = pool; - __entry->page = page; + __entry->netmem = (__force unsigned long)netmem; __entry->hold = hold; - __entry->pfn = page_to_pfn(page); + __entry->pfn = netmem_to_pfn(netmem); ), - TP_printk("page_pool=%p page=%p pfn=0x%lx hold=%u", - __entry->pool, __entry->page, __entry->pfn, __entry->hold) + TP_printk("page_pool=%p netmem=%p pfn=0x%lx hold=%u", + __entry->pool, (void *)__entry->netmem, + __entry->pfn, __entry->hold) ); TRACE_EVENT(page_pool_update_nid, -- cgit v1.2.3 From 7b6f9ec6ad51125facadecf77dc6e62928186d2e Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 3 Jul 2024 09:16:40 +0200 Subject: drm/panthor: Fix sync-only jobs A sync-only job is meant to provide a synchronization point on a queue, so we can't return a NULL fence there, we have to add a signal operation to the command stream which executes after all other previously submitted jobs are done. v2: - Fixed a UAF bug - Added R-bs Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block") Signed-off-by: Boris Brezillon Reviewed-by: Liviu Dudau Reviewed-by: Steven Price Link: https://patchwork.freedesktop.org/patch/msgid/20240703071640.231278-3-boris.brezillon@collabora.com --- include/uapi/drm/panthor_drm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h index aaed8e12ad0b..926b1deb1116 100644 --- a/include/uapi/drm/panthor_drm.h +++ b/include/uapi/drm/panthor_drm.h @@ -802,6 +802,9 @@ struct drm_panthor_queue_submit { * Must be 64-bit/8-byte aligned (the size of a CS instruction) * * Can be zero if stream_addr is zero too. + * + * When the stream size is zero, the queue submit serves as a + * synchronization point. */ __u32 stream_size; @@ -822,6 +825,8 @@ struct drm_panthor_queue_submit { * ensure the GPU doesn't get garbage when reading the indirect command * stream buffers. If you want the cache flush to happen * unconditionally, pass a zero here. + * + * Ignored when stream_size is zero. */ __u32 latest_flush; -- cgit v1.2.3 From 18a5daf0e4974ded74b2ed8a6aed1da49bde356d Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Thu, 13 Jun 2024 02:12:15 +0200 Subject: vfs: move d_lockref out of the area used by RCU lookup Stock kernel scales worse than FreeBSD when doing a 20-way stat(2) on the same tmpfs-backed file. According to perf top: 38.09% lockref_put_return 26.08% lockref_get_not_dead 25.60% __d_lookup_rcu 0.89% clear_bhb_loop __d_lookup_rcu is participating in cacheline ping pong due to the embedded name sharing a cacheline with lockref. Moving it out resolves the problem: 41.50% lockref_put_return 41.03% lockref_get_not_dead 1.54% clear_bhb_loop benchmark (will-it-scale, Sapphire Rapids, tmpfs, ops/s): FreeBSD:7219334 before: 5038006 after: 7842883 (+55%) One minor remark: the 'after' result is unstable, fluctuating in the range ~7.8 mln to ~9 mln during different runs. Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/r/20240613001215.648829-3-mjguzik@gmail.com Signed-off-by: Christian Brauner --- include/linux/dcache.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index bf53e3894aae..326dbccc3736 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -89,13 +89,18 @@ struct dentry { struct inode *d_inode; /* Where the name belongs to - NULL is * negative */ unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ + /* --- cacheline 1 boundary (64 bytes) was 32 bytes ago --- */ /* Ref lookup also touches following */ - struct lockref d_lockref; /* per-dentry lock and refcount */ const struct dentry_operations *d_op; struct super_block *d_sb; /* The root of the dentry tree */ unsigned long d_time; /* used by d_revalidate */ void *d_fsdata; /* fs-specific data */ + /* --- cacheline 2 boundary (128 bytes) --- */ + struct lockref d_lockref; /* per-dentry lock and refcount + * keep separate from RCU lookup area if + * possible! + */ union { struct list_head d_lru; /* LRU list */ -- cgit v1.2.3 From dc99c0ff53f588bb210b1e8b3314c7581cde68a2 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 18 Jun 2024 14:12:28 +0200 Subject: fs: fix dentry size On CONFIG_SMP=y and on 32bit we need to decrease DNAME_INLINE_LEN to 36 btyes to end up with 128 bytes in total. Reported-by: Linus Torvalds Links: https://lore.kernel.org/r/CAHk-=whtoqTSCcAvV-X-KPqoDWxS4vxmWpuKLB+Vv8=FtUd5vA@mail.gmail.com Signed-off-by: Christian Brauner --- include/linux/dcache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 326dbccc3736..58916b3f53ad 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -71,7 +71,7 @@ extern const struct qstr dotdot_name; # define DNAME_INLINE_LEN 40 /* 192 bytes */ #else # ifdef CONFIG_SMP -# define DNAME_INLINE_LEN 40 /* 128 bytes */ +# define DNAME_INLINE_LEN 36 /* 128 bytes */ # else # define DNAME_INLINE_LEN 44 /* 128 bytes */ # endif -- cgit v1.2.3 From 85b08b31a22b481ec6528130daf94eee4452e23f Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Fri, 28 Jun 2024 14:29:22 +0800 Subject: netfs, fscache: export fscache_put_volume() and add fscache_try_get_volume() Export fscache_put_volume() and add fscache_try_get_volume() helper function to allow cachefiles to get/put fscache_volume via linux/fscache-cache.h. Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240628062930.2467993-2-libaokun@huaweicloud.com Signed-off-by: Christian Brauner --- include/linux/fscache-cache.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index bdf7f3eddf0a..4c91a019972b 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -19,6 +19,7 @@ enum fscache_cache_trace; enum fscache_cookie_trace; enum fscache_access_trace; +enum fscache_volume_trace; enum fscache_cache_state { FSCACHE_CACHE_IS_NOT_PRESENT, /* No cache is present for this name */ @@ -97,6 +98,11 @@ extern void fscache_withdraw_cookie(struct fscache_cookie *cookie); extern void fscache_io_error(struct fscache_cache *cache); +extern struct fscache_volume * +fscache_try_get_volume(struct fscache_volume *volume, + enum fscache_volume_trace where); +extern void fscache_put_volume(struct fscache_volume *volume, + enum fscache_volume_trace where); extern void fscache_end_volume_access(struct fscache_volume *volume, struct fscache_cookie *cookie, enum fscache_access_trace why); -- cgit v1.2.3 From 522018a0de6b6fcce60c04f86dfc5f0e4b6a1b36 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Fri, 28 Jun 2024 14:29:23 +0800 Subject: cachefiles: fix slab-use-after-free in fscache_withdraw_volume() We got the following issue in our fault injection stress test: ================================================================== BUG: KASAN: slab-use-after-free in fscache_withdraw_volume+0x2e1/0x370 Read of size 4 at addr ffff88810680be08 by task ondemand-04-dae/5798 CPU: 0 PID: 5798 Comm: ondemand-04-dae Not tainted 6.8.0-dirty #565 Call Trace: kasan_check_range+0xf6/0x1b0 fscache_withdraw_volume+0x2e1/0x370 cachefiles_withdraw_volume+0x31/0x50 cachefiles_withdraw_cache+0x3ad/0x900 cachefiles_put_unbind_pincount+0x1f6/0x250 cachefiles_daemon_release+0x13b/0x290 __fput+0x204/0xa00 task_work_run+0x139/0x230 Allocated by task 5820: __kmalloc+0x1df/0x4b0 fscache_alloc_volume+0x70/0x600 __fscache_acquire_volume+0x1c/0x610 erofs_fscache_register_volume+0x96/0x1a0 erofs_fscache_register_fs+0x49a/0x690 erofs_fc_fill_super+0x6c0/0xcc0 vfs_get_super+0xa9/0x140 vfs_get_tree+0x8e/0x300 do_new_mount+0x28c/0x580 [...] Freed by task 5820: kfree+0xf1/0x2c0 fscache_put_volume.part.0+0x5cb/0x9e0 erofs_fscache_unregister_fs+0x157/0x1b0 erofs_kill_sb+0xd9/0x1c0 deactivate_locked_super+0xa3/0x100 vfs_get_super+0x105/0x140 vfs_get_tree+0x8e/0x300 do_new_mount+0x28c/0x580 [...] ================================================================== Following is the process that triggers the issue: mount failed | daemon exit ------------------------------------------------------------ deactivate_locked_super cachefiles_daemon_release erofs_kill_sb erofs_fscache_unregister_fs fscache_relinquish_volume __fscache_relinquish_volume fscache_put_volume(fscache_volume, fscache_volume_put_relinquish) zero = __refcount_dec_and_test(&fscache_volume->ref, &ref); cachefiles_put_unbind_pincount cachefiles_daemon_unbind cachefiles_withdraw_cache cachefiles_withdraw_volumes list_del_init(&volume->cache_link) fscache_free_volume(fscache_volume) cache->ops->free_volume cachefiles_free_volume list_del_init(&cachefiles_volume->cache_link); kfree(fscache_volume) cachefiles_withdraw_volume fscache_withdraw_volume fscache_volume->n_accesses // fscache_volume UAF !!! The fscache_volume in cache->volumes must not have been freed yet, but its reference count may be 0. So use the new fscache_try_get_volume() helper function try to get its reference count. If the reference count of fscache_volume is 0, fscache_put_volume() is freeing it, so wait for it to be removed from cache->volumes. If its reference count is not 0, call cachefiles_withdraw_volume() with reference count protection to avoid the above issue. Fixes: fe2140e2f57f ("cachefiles: Implement volume support") Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240628062930.2467993-3-libaokun@huaweicloud.com Signed-off-by: Christian Brauner --- include/trace/events/fscache.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index a6190aa1b406..f1a73aa83fbb 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -35,12 +35,14 @@ enum fscache_volume_trace { fscache_volume_get_cookie, fscache_volume_get_create_work, fscache_volume_get_hash_collision, + fscache_volume_get_withdraw, fscache_volume_free, fscache_volume_new_acquire, fscache_volume_put_cookie, fscache_volume_put_create_work, fscache_volume_put_hash_collision, fscache_volume_put_relinquish, + fscache_volume_put_withdraw, fscache_volume_see_create_work, fscache_volume_see_hash_wake, fscache_volume_wait_create_work, @@ -120,12 +122,14 @@ enum fscache_access_trace { EM(fscache_volume_get_cookie, "GET cook ") \ EM(fscache_volume_get_create_work, "GET creat") \ EM(fscache_volume_get_hash_collision, "GET hcoll") \ + EM(fscache_volume_get_withdraw, "GET withd") \ EM(fscache_volume_free, "FREE ") \ EM(fscache_volume_new_acquire, "NEW acq ") \ EM(fscache_volume_put_cookie, "PUT cook ") \ EM(fscache_volume_put_create_work, "PUT creat") \ EM(fscache_volume_put_hash_collision, "PUT hcoll") \ EM(fscache_volume_put_relinquish, "PUT relnq") \ + EM(fscache_volume_put_withdraw, "PUT withd") \ EM(fscache_volume_see_create_work, "SEE creat") \ EM(fscache_volume_see_hash_wake, "SEE hwake") \ E_(fscache_volume_wait_create_work, "WAIT crea") -- cgit v1.2.3 From ad59baa3169591e0b4cf1a217c9139f2145f4c7f Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 3 Jul 2024 09:25:21 +0200 Subject: slab, rust: extend kmalloc() alignment guarantees to remove Rust padding Slab allocators have been guaranteeing natural alignment for power-of-two sizes since commit 59bb47985c1d ("mm, sl[aou]b: guarantee natural alignment for kmalloc(power-of-two)"), while any other sizes are guaranteed to be aligned only to ARCH_KMALLOC_MINALIGN bytes (although in practice are aligned more than that in non-debug scenarios). Rust's allocator API specifies size and alignment per allocation, which have to satisfy the following rules, per Alice Ryhl [1]: 1. The alignment is a power of two. 2. The size is non-zero. 3. When you round up the size to the next multiple of the alignment, then it must not overflow the signed type isize / ssize_t. In order to map this to kmalloc()'s guarantees, some requested allocation sizes have to be padded to the next power-of-two size [2]. For example, an allocation of size 96 and alignment of 32 will be padded to an allocation of size 128, because the existing kmalloc-96 bucket doesn't guarantee alignent above ARCH_KMALLOC_MINALIGN. Without slab debugging active, the layout of the kmalloc-96 slabs however naturally align the objects to 32 bytes, so extending the size to 128 bytes is wasteful. To improve the situation we can extend the kmalloc() alignment guarantees in a way that 1) doesn't change the current slab layout (and thus does not increase internal fragmentation) when slab debugging is not active 2) reduces waste in the Rust allocator use case 3) is a superset of the current guarantee for power-of-two sizes. The extended guarantee is that alignment is at least the largest power-of-two divisor of the requested size. For power-of-two sizes the largest divisor is the size itself, but let's keep this case documented separately for clarity. For current kmalloc size buckets, it means kmalloc-96 will guarantee alignment of 32 bytes and kmalloc-196 will guarantee 64 bytes. This covers the rules 1 and 2 above of Rust's API as long as the size is a multiple of the alignment. The Rust layer should now only need to round up the size to the next multiple if it isn't, while enforcing the rule 3. Implementation-wise, this changes the alignment calculation in create_boot_cache(). While at it also do the calulation only for caches with the SLAB_KMALLOC flag, because the function is also used to create the initial kmem_cache and kmem_cache_node caches, where no alignment guarantee is necessary. In the Rust allocator's krealloc_aligned(), remove the code that padded sizes to the next power of two (suggested by Alice Ryhl) as it's no longer necessary with the new guarantees. Reported-by: Alice Ryhl Reported-by: Boqun Feng Link: https://lore.kernel.org/all/CAH5fLggjrbdUuT-H-5vbQfMazjRDpp2%2Bk3%3DYhPyS17ezEqxwcw@mail.gmail.com/ [1] Link: https://lore.kernel.org/all/CAH5fLghsZRemYUwVvhk77o6y1foqnCeDzW4WZv6ScEWna2+_jw@mail.gmail.com/ [2] Reviewed-by: Boqun Feng Acked-by: Roman Gushchin Reviewed-by: Alice Ryhl Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index ed6bee5ec2b6..640cea6e6323 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -604,7 +604,8 @@ void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) * * The allocated object address is aligned to at least ARCH_KMALLOC_MINALIGN * bytes. For @size of power of two bytes, the alignment is also guaranteed - * to be at least to the size. + * to be at least to the size. For other sizes, the alignment is guaranteed to + * be at least the largest power-of-two divisor of @size. * * The @flags argument may be one of the GFP flags defined at * include/linux/gfp_types.h and described at -- cgit v1.2.3 From 72e0fe2241ce113cbba339ca8c2450b167774530 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 1 Jul 2024 12:12:58 -0700 Subject: mm/slab: Introduce kmem_buckets typedef Encapsulate the concept of a single set of kmem_caches that are used for the kmalloc size buckets. Redefine kmalloc_caches as an array of these buckets (for the different global cache buckets). Signed-off-by: Kees Cook Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 640cea6e6323..922bf15794f7 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -426,8 +426,9 @@ enum kmalloc_cache_type { NR_KMALLOC_TYPES }; -extern struct kmem_cache * -kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1]; +typedef struct kmem_cache * kmem_buckets[KMALLOC_SHIFT_HIGH + 1]; + +extern kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES]; /* * Define gfp bits that should not be set for KMALLOC_NORMAL. -- cgit v1.2.3 From 67f2df3b82d091ed095d0e47e1f3a9d3e18e4e41 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 1 Jul 2024 12:12:59 -0700 Subject: mm/slab: Plumb kmem_buckets into __do_kmalloc_node() Introduce CONFIG_SLAB_BUCKETS which provides the infrastructure to support separated kmalloc buckets (in the following kmem_buckets_create() patches and future codetag-based separation). Since this will provide a mitigation for a very common case of exploits, it is recommended to enable this feature for general purpose distros. By default, the new Kconfig will be enabled if CONFIG_SLAB_FREELIST_HARDENED is enabled (and it is added to the hardening.config Kconfig fragment). To be able to choose which buckets to allocate from, make the buckets available to the internal kmalloc interfaces by adding them as the second argument, rather than depending on the buckets being chosen from the fixed set of global buckets. Where the bucket is not available, pass NULL, which means "use the default system kmalloc bucket set" (the prior existing behavior), as implemented in kmalloc_slab(). To avoid adding the extra argument when !CONFIG_SLAB_BUCKETS, only the top-level macros and static inlines use the buckets argument (where they are stripped out and compiled out respectively). The actual extern functions can then be built without the argument, and the internals fall back to the global kmalloc buckets unconditionally. Co-developed-by: Vlastimil Babka Signed-off-by: Kees Cook Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 922bf15794f7..a9200d453087 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -570,6 +570,21 @@ void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment __malloc; #define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__)) +/* + * These macros allow declaring a kmem_buckets * parameter alongside size, which + * can be compiled out with CONFIG_SLAB_BUCKETS=n so that a large number of call + * sites don't have to pass NULL. + */ +#ifdef CONFIG_SLAB_BUCKETS +#define DECL_BUCKET_PARAMS(_size, _b) size_t (_size), kmem_buckets *(_b) +#define PASS_BUCKET_PARAMS(_size, _b) (_size), (_b) +#define PASS_BUCKET_PARAM(_b) (_b) +#else +#define DECL_BUCKET_PARAMS(_size, _b) size_t (_size) +#define PASS_BUCKET_PARAMS(_size, _b) (_size) +#define PASS_BUCKET_PARAM(_b) NULL +#endif + /* * The following functions are not to be used directly and are intended only * for internal use from kmalloc() and kmalloc_node() @@ -579,7 +594,7 @@ void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags, void *__kmalloc_noprof(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1); -void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node) +void *__kmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) __assume_kmalloc_alignment __alloc_size(1); void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t flags, size_t size) @@ -680,7 +695,7 @@ static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gf kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], flags, node, size); } - return __kmalloc_node_noprof(size, flags, node); + return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node); } #define kmalloc_node(...) alloc_hooks(kmalloc_node_noprof(__VA_ARGS__)) @@ -731,8 +746,10 @@ static inline __realloc_size(2, 3) void * __must_check krealloc_array_noprof(voi */ #define kcalloc(n, size, flags) kmalloc_array(n, size, (flags) | __GFP_ZERO) -void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags, int node, - unsigned long caller) __alloc_size(1); +void *__kmalloc_node_track_caller_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node, + unsigned long caller) __alloc_size(1); +#define kmalloc_node_track_caller_noprof(size, flags, node, caller) \ + __kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node, caller) #define kmalloc_node_track_caller(...) \ alloc_hooks(kmalloc_node_track_caller_noprof(__VA_ARGS__, _RET_IP_)) @@ -758,7 +775,7 @@ static inline __alloc_size(1, 2) void *kmalloc_array_node_noprof(size_t n, size_ return NULL; if (__builtin_constant_p(n) && __builtin_constant_p(size)) return kmalloc_node_noprof(bytes, flags, node); - return __kmalloc_node_noprof(bytes, flags, node); + return __kmalloc_node_noprof(PASS_BUCKET_PARAMS(bytes, NULL), flags, node); } #define kmalloc_array_node(...) alloc_hooks(kmalloc_array_node_noprof(__VA_ARGS__)) -- cgit v1.2.3 From 2e8000b826fcd2716449d09753d5ed843067881e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 1 Jul 2024 12:13:00 -0700 Subject: mm/slab: Introduce kvmalloc_buckets_node() that can take kmem_buckets argument Plumb kmem_buckets arguments through kvmalloc_node_noprof() so it is possible to provide an API to perform kvmalloc-style allocations with a particular set of buckets. Introduce kvmalloc_buckets_node() that takes a kmem_buckets argument. Signed-off-by: Kees Cook Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index a9200d453087..837005314f96 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -799,7 +799,9 @@ static inline __alloc_size(1) void *kzalloc_noprof(size_t size, gfp_t flags) #define kzalloc(...) alloc_hooks(kzalloc_noprof(__VA_ARGS__)) #define kzalloc_node(_size, _flags, _node) kmalloc_node(_size, (_flags)|__GFP_ZERO, _node) -extern void *kvmalloc_node_noprof(size_t size, gfp_t flags, int node) __alloc_size(1); +void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) __alloc_size(1); +#define kvmalloc_node_noprof(size, flags, node) \ + __kvmalloc_node_noprof(PASS_BUCKET_PARAMS(size, NULL), flags, node) #define kvmalloc_node(...) alloc_hooks(kvmalloc_node_noprof(__VA_ARGS__)) #define kvmalloc(_size, _flags) kvmalloc_node(_size, _flags, NUMA_NO_NODE) -- cgit v1.2.3 From b32801d1255be1da62ea8134df3ed9f3331fba12 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 1 Jul 2024 12:13:01 -0700 Subject: mm/slab: Introduce kmem_buckets_create() and family Dedicated caches are available for fixed size allocations via kmem_cache_alloc(), but for dynamically sized allocations there is only the global kmalloc API's set of buckets available. This means it isn't possible to separate specific sets of dynamically sized allocations into a separate collection of caches. This leads to a use-after-free exploitation weakness in the Linux kernel since many heap memory spraying/grooming attacks depend on using userspace-controllable dynamically sized allocations to collide with fixed size allocations that end up in same cache. While CONFIG_RANDOM_KMALLOC_CACHES provides a probabilistic defense against these kinds of "type confusion" attacks, including for fixed same-size heap objects, we can create a complementary deterministic defense for dynamically sized allocations that are directly user controlled. Addressing these cases is limited in scope, so isolating these kinds of interfaces will not become an unbounded game of whack-a-mole. For example, many pass through memdup_user(), making isolation there very effective. In order to isolate user-controllable dynamically-sized allocations from the common system kmalloc allocations, introduce kmem_buckets_create(), which behaves like kmem_cache_create(). Introduce kmem_buckets_alloc(), which behaves like kmem_cache_alloc(). Introduce kmem_buckets_alloc_track_caller() for where caller tracking is needed. Introduce kmem_buckets_valloc() for cases where vmalloc fallback is needed. Note that these caches are specifically flagged with SLAB_NO_MERGE, since merging would defeat the entire purpose of the mitigation. This can also be used in the future to extend allocation profiling's use of code tagging to implement per-caller allocation cache isolation[1] even for dynamic allocations. Memory allocation pinning[2] is still needed to plug the Use-After-Free cross-allocator weakness (where attackers can arrange to free an entire slab page and have it reallocated to a different cache), but that is an existing and separate issue which is complementary to this improvement. Development continues for that feature via the SLAB_VIRTUAL[3] series (which could also provide guard pages -- another complementary improvement). Link: https://lore.kernel.org/lkml/202402211449.401382D2AF@keescook [1] Link: https://googleprojectzero.blogspot.com/2021/10/how-simple-linux-kernel-memory.html [2] Link: https://lore.kernel.org/lkml/20230915105933.495735-1-matteorizzo@google.com/ [3] Signed-off-by: Kees Cook Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 837005314f96..d99afce36098 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -549,6 +549,10 @@ void *kmem_cache_alloc_lru_noprof(struct kmem_cache *s, struct list_lru *lru, void kmem_cache_free(struct kmem_cache *s, void *objp); +kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags, + unsigned int useroffset, unsigned int usersize, + void (*ctor)(void *)); + /* * Bulk allocation and freeing operations. These are accelerated in an * allocator specific way to avoid taking locks repeatedly or building @@ -682,6 +686,12 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f } #define kmalloc(...) alloc_hooks(kmalloc_noprof(__VA_ARGS__)) +#define kmem_buckets_alloc(_b, _size, _flags) \ + alloc_hooks(__kmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE)) + +#define kmem_buckets_alloc_track_caller(_b, _size, _flags) \ + alloc_hooks(__kmalloc_node_track_caller_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE, _RET_IP_)) + static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gfp_t flags, int node) { if (__builtin_constant_p(size) && size) { @@ -809,6 +819,8 @@ void *__kvmalloc_node_noprof(DECL_BUCKET_PARAMS(size, b), gfp_t flags, int node) #define kvzalloc(_size, _flags) kvmalloc(_size, (_flags)|__GFP_ZERO) #define kvzalloc_node(_size, _flags, _node) kvmalloc_node(_size, (_flags)|__GFP_ZERO, _node) +#define kmem_buckets_valloc(_b, _size, _flags) \ + alloc_hooks(__kvmalloc_node_noprof(PASS_BUCKET_PARAMS(_size, _b), _flags, NUMA_NO_NODE)) static inline __alloc_size(1, 2) void * kvmalloc_array_node_noprof(size_t n, size_t size, gfp_t flags, int node) -- cgit v1.2.3 From 6555acdefc758a4dae7038c3f2301f311e287218 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Tue, 2 Jul 2024 19:21:18 +0200 Subject: um: time-travel: support time-travel protocol broadcast messages Add a message type to the time-travel protocol to broadcast a small (64-bit) value to all participants in a simulation. The main use case is to have an identical message come to all participants in a simulation, e.g. to separate out logs for different tests running in a single simulation. Down in the guts of time_travel_handle_message() we can't use printk() and not even printk_deferred(), so just store the message and print it at the start of the userspace() function. Unfortunately this means that other prints in the kernel can actually bypass the message, but in most cases where this is used, for example to separate test logs, userspace will be involved. Also, even if we could use printk_deferred(), we'd still need to flush it out in the userspace() function since otherwise userspace messages might cross it. As a result, this is a reasonable compromise, there's no need to have any core changes and it solves the main use case we have for it. Signed-off-by: Mordechay Goodstein Link: https://patch.msgid.link/20240702192118.c4093bc5b15e.I2ca8d006b67feeb866ac2017af7b741c9e06445a@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/um_timetravel.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/um_timetravel.h b/include/uapi/linux/um_timetravel.h index ca3238222b6d..078ea401aa2a 100644 --- a/include/uapi/linux/um_timetravel.h +++ b/include/uapi/linux/um_timetravel.h @@ -123,6 +123,17 @@ enum um_timetravel_ops { * the simulation. */ UM_TIMETRAVEL_GET_TOD = 8, + + /** + * @UM_TIMETRAVEL_BROADCAST: Send/Receive a broadcast message. + * This message can be used to sync all components in the system + * with a single message, if the calender gets the message, the + * calender broadcast the message to all components, and if a + * component receives it it should act based on it e.g print a + * message to it's log system. + * (calendar <-> host) + */ + UM_TIMETRAVEL_BROADCAST = 9, }; #endif /* _UAPI_LINUX_UM_TIMETRAVEL_H */ -- cgit v1.2.3 From bfb80d8bc92fa70f5b44a57ed2b24d57685fe188 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 2 Jul 2024 19:21:21 +0200 Subject: um: add shared memory optimisation for time-travel=ext With external time travel, a LOT of message can end up being exchanged on the socket, taking a significant amount of time just to do that. Add a new shared memory optimisation to that, where a number of changes are made: - the controller sends a client ID and a shared memory FD (and a logging FD we don't use) in the ACK message to the initial START - the shared memory holds the current time and the free_until value, so that there's no need to exchange messages for that - if the client that's running has shared memory support, any client (the running one included) can request the next time it wants to run inside the shared memory, rather than sending a message, by also updating the free_until value - when shared memory is enabled, RUN/WAIT messages no longer have an ACK, further cutting down on messages Together, this can reduce the number of messages very significantly, and reduce overall test/simulation run time. Co-developed-by: Mordechay Goodstein Signed-off-by: Mordechay Goodstein Link: https://patch.msgid.link/20240702192118.6ad0a083f574.Ie41206c8ce4507fe26b991937f47e86c24ca7a31@changeid Signed-off-by: Johannes Berg --- include/uapi/linux/um_timetravel.h | 179 ++++++++++++++++++++++++++++++++++--- 1 file changed, 165 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/um_timetravel.h b/include/uapi/linux/um_timetravel.h index 078ea401aa2a..546a690b0346 100644 --- a/include/uapi/linux/um_timetravel.h +++ b/include/uapi/linux/um_timetravel.h @@ -1,17 +1,6 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ /* - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019 - 2023 Intel Corporation */ #ifndef _UAPI_LINUX_UM_TIMETRAVEL_H #define _UAPI_LINUX_UM_TIMETRAVEL_H @@ -50,6 +39,36 @@ struct um_timetravel_msg { __u64 time; }; +/* max number of file descriptors that can be sent/received in a message */ +#define UM_TIMETRAVEL_MAX_FDS 2 + +/** + * enum um_timetravel_shared_mem_fds - fds sent in ACK message for START message + */ +enum um_timetravel_shared_mem_fds { + /** + * @UM_TIMETRAVEL_SHARED_MEMFD: Index of the shared memory file + * descriptor in the control message + */ + UM_TIMETRAVEL_SHARED_MEMFD, + /** + * @UM_TIMETRAVEL_SHARED_LOGFD: Index of the logging file descriptor + * in the control message + */ + UM_TIMETRAVEL_SHARED_LOGFD, + UM_TIMETRAVEL_SHARED_MAX_FDS, +}; + +/** + * enum um_timetravel_start_ack - ack-time mask for start message + */ +enum um_timetravel_start_ack { + /** + * @UM_TIMETRAVEL_START_ACK_ID: client ID that controller allocated. + */ + UM_TIMETRAVEL_START_ACK_ID = 0xffff, +}; + /** * enum um_timetravel_ops - Operation codes */ @@ -57,7 +76,9 @@ enum um_timetravel_ops { /** * @UM_TIMETRAVEL_ACK: response (ACK) to any previous message, * this usually doesn't carry any data in the 'time' field - * unless otherwise specified below + * unless otherwise specified below, note: while using shared + * memory no ACK for WAIT and RUN messages, for more info see + * &struct um_timetravel_schedshm. */ UM_TIMETRAVEL_ACK = 0, @@ -136,4 +157,134 @@ enum um_timetravel_ops { UM_TIMETRAVEL_BROADCAST = 9, }; +/* version of struct um_timetravel_schedshm */ +#define UM_TIMETRAVEL_SCHEDSHM_VERSION 2 + +/** + * enum um_timetravel_schedshm_cap - time travel capabilities of every client + * + * These flags must be set immediately after processing the ACK to + * the START message, before sending any message to the controller. + */ +enum um_timetravel_schedshm_cap { + /** + * @UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE: client can read current time + * update internal time request to shared memory and read + * free until and send no Ack on RUN and doesn't expect ACK on + * WAIT. + */ + UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE = 0x1, +}; + +/** + * enum um_timetravel_schedshm_flags - time travel flags of every client + */ +enum um_timetravel_schedshm_flags { + /** + * @UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN: client has a request to run. + * It's set by client when it has a request to run, if (and only + * if) the @running_id points to a client that is able to use + * shared memory, i.e. has %UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE + * (this includes the client itself). Otherwise, a message must + * be used. + */ + UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN = 0x1, +}; + +/** + * DOC: Time travel shared memory overview + * + * The main purpose of the shared memory is to avoid all time travel message + * that don't need any action, for example current time can be held in shared + * memory without the need of any client to send a message UM_TIMETRAVEL_GET + * in order to know what's the time. + * + * Since this is shared memory with all clients and controller and controller + * creates the shared memory space, all time values are absolute to controller + * time. So first time client connects to shared memory mode it should take the + * current_time value in shared memory and keep it internally as a diff to + * shared memory times, and once shared memory is initialized, any interaction + * with the controller must happen in the controller time domain, including any + * messages (for clients that are not using shared memory, the controller will + * handle an offset and make the clients think they start at time zero.) + * + * Along with the shared memory file descriptor is sent to the client a logging + * file descriptor, to have all logs related to shared memory, + * logged into one place. note: to have all logs synced into log file at write, + * file should be flushed (fflush) after writing to it. + * + * To avoid memory corruption, we define below for each field who can write to + * it at what time, defined in the structure fields. + * + * To avoid having to pack this struct, all fields in it must be naturally aligned + * (i.e. aligned to their size). + */ + +/** + * union um_timetravel_schedshm_client - UM time travel client struct + * + * Every entity using the shared memory including the controller has a place in + * the um_timetravel_schedshm clients array, that holds info related to the client + * using the shared memory, and can be set only by the client after it gets the + * fd memory. + * + * @capa: bit fields with client capabilities see + * &enum um_timetravel_schedshm_cap, set by client once after getting the + * shared memory file descriptor. + * @flags: bit fields for flags see &enum um_timetravel_schedshm_flags for doc. + * @req_time: request time to run, set by client on every request it needs. + * @name: unique id sent to the controller by client with START message. + */ +union um_timetravel_schedshm_client { + struct { + __u32 capa; + __u32 flags; + __u64 req_time; + __u64 name; + }; + char reserve[128]; /* reserved for future usage */ +}; + +/** + * struct um_timetravel_schedshm - UM time travel shared memory struct + * + * @hdr: header fields: + * @version: Current version struct UM_TIMETRAVEL_SCHEDSHM_VERSION, + * set by controller once at init, clients must check this after mapping + * and work without shared memory if they cannot handle the indicated + * version. + * @len: Length of all the memory including header (@hdr), clients should once + * per connection first mmap the header and take the length (@len) to remap the entire size. + * This is done in order to support dynamic struct size letting number of + * clients be dynamic based on controller support. + * @free_until: Stores the next request to run by any client, in order for the + * current client to know how long it can still run. A client needs to (at + * least) reload this value immediately after communicating with any other + * client, since the controller will update this field when a new request + * is made by any client. Clients also must update this value when they + * insert/update an own request into the shared memory while not running + * themselves, and the new request is before than the current value. + * current_time: Current time, can only be set by the client in running state + * (indicated by @running_id), though that client may only run until @free_until, + * so it must remain smaller than @free_until. + * @running_id: The current client in state running, set before a client is + * notified that it's now running. + * @max_clients: size of @clients array, set once at init by the controller. + * @clients: clients array see &union um_timetravel_schedshm_client for doc, + * set only by client. + */ +struct um_timetravel_schedshm { + union { + struct { + __u32 version; + __u32 len; + __u64 free_until; + __u64 current_time; + __u16 running_id; + __u16 max_clients; + }; + char hdr[4096]; /* align to 4K page size */ + }; + union um_timetravel_schedshm_client clients[]; +}; #endif /* _UAPI_LINUX_UM_TIMETRAVEL_H */ -- cgit v1.2.3 From 244389bd42870640c4b5ef672a360da329b579ed Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 3 Jul 2024 10:55:17 +0100 Subject: ASoC: cs35l56: Limit Speaker Volume to +12dB maximum Change CS35L56_MAIN_RENDER_USER_VOLUME_MAX to 48, to limit the maximum value of the Speaker Volume control to +12dB. The minimum value is unchanged so that the default 0dB has the same integer control value. The original maximum of 400 (+100dB) was the largest value that can be mathematically handled by the DSP. The actual maximum amplification is +12dB. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20240703095517.208077-3-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/sound/cs35l56.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index 642ef690ebc2..a6aa112e5741 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -207,7 +207,7 @@ /* CS35L56_MAIN_RENDER_USER_VOLUME */ #define CS35L56_MAIN_RENDER_USER_VOLUME_MIN -400 -#define CS35L56_MAIN_RENDER_USER_VOLUME_MAX 400 +#define CS35L56_MAIN_RENDER_USER_VOLUME_MAX 48 #define CS35L56_MAIN_RENDER_USER_VOLUME_MASK 0x0000FFC0 #define CS35L56_MAIN_RENDER_USER_VOLUME_SHIFT 6 #define CS35L56_MAIN_RENDER_USER_VOLUME_SIGNBIT 9 -- cgit v1.2.3 From d688ffa269421cec73c7e13fd0cb03879b07db89 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 26 Jun 2024 16:32:29 -0600 Subject: perf: arm_pmuv3: Include asm/arm_pmuv3.h from linux/perf/arm_pmuv3.h The arm64 asm/arm_pmuv3.h depends on defines from linux/perf/arm_pmuv3.h. Rather than depend on include order, follow the usual pattern of "linux" headers including "asm" headers of the same name. With this change, the include of linux/kvm_host.h is problematic due to circular includes: In file included from ../arch/arm64/include/asm/arm_pmuv3.h:9, from ../include/linux/perf/arm_pmuv3.h:312, from ../include/kvm/arm_pmu.h:11, from ../arch/arm64/include/asm/kvm_host.h:38, from ../arch/arm64/mm/init.c:41: ../include/linux/kvm_host.h:383:30: error: field 'arch' has incomplete type Switching to asm/kvm_host.h solves the issue. Signed-off-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20240626-arm-pmu-3-9-icntr-v2-5-c9784b4f4065@kernel.org Signed-off-by: Will Deacon --- include/linux/perf/arm_pmuv3.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index 46377e134d67..7867db04ec98 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -309,4 +309,6 @@ } \ } while (0) +#include + #endif -- cgit v1.2.3 From d69d804845985c29ab5be5a4b3b1f4787893daf8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 1 Jul 2024 14:07:37 +0200 Subject: driver core: have match() callback in struct bus_type take a const * In the match() callback, the struct device_driver * should not be changed, so change the function callback to be a const *. This is one step of many towards making the driver core safe to have struct device_driver in read-only memory. Because the match() callback is in all busses, all busses are modified to handle this properly. This does entail switching some container_of() calls to container_of_const() to properly handle the constant *. For some busses, like PCI and USB and HV, the const * is cast away in the match callback as those busses do want to modify those structures at this point in time (they have a local lock in the driver structure.) That will have to be changed in the future if they wish to have their struct device * in read-only-memory. Cc: Rafael J. Wysocki Reviewed-by: Alex Elder Acked-by: Sumit Garg Link: https://lore.kernel.org/r/2024070136-wrongdoer-busily-01e8@gregkh Signed-off-by: Greg Kroah-Hartman --- include/acpi/acpi_bus.h | 2 +- include/linux/arm_ffa.h | 2 +- include/linux/cdx/cdx_bus.h | 2 +- include/linux/device/bus.h | 2 +- include/linux/dfl.h | 2 +- include/linux/eisa.h | 2 +- include/linux/fsi.h | 2 +- include/linux/fsl/mc.h | 2 +- include/linux/gameport.h | 2 +- include/linux/greybus.h | 2 +- include/linux/hyperv.h | 6 +----- include/linux/i2c.h | 2 +- include/linux/i3c/device.h | 5 +---- include/linux/maple.h | 2 +- include/linux/mcb.h | 5 +---- include/linux/mdio.h | 19 ++++++------------- include/linux/mhi.h | 2 +- include/linux/mhi_ep.h | 2 +- include/linux/moxtet.h | 9 ++------- include/linux/nd.h | 6 +----- include/linux/pci-epf.h | 3 +-- include/linux/pci.h | 6 ++---- include/linux/phy.h | 2 +- include/linux/pnp.h | 2 +- include/linux/rio.h | 2 +- include/linux/scmi_protocol.h | 2 +- include/linux/serio.h | 2 +- include/linux/slimbus.h | 2 +- include/linux/soc/qcom/apr.h | 2 +- include/linux/soundwire/sdw_type.h | 2 +- include/linux/spi/spi.h | 6 ++---- include/linux/ssb/ssb.h | 2 +- include/linux/tc.h | 2 +- include/linux/tee_drv.h | 2 +- include/linux/virtio.h | 5 +---- include/scsi/scsi_transport_iscsi.h | 2 +- include/sound/ac97/codec.h | 5 +---- include/xen/xenbus.h | 5 +---- 38 files changed, 46 insertions(+), 86 deletions(-) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 1a4dfd7a1c4a..08d27a1a0072 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -562,7 +562,7 @@ static inline void *acpi_driver_data(struct acpi_device *d) } #define to_acpi_device(d) container_of(d, struct acpi_device, dev) -#define to_acpi_driver(d) container_of(d, struct acpi_driver, drv) +#define to_acpi_driver(d) container_of_const(d, struct acpi_driver, drv) static inline struct acpi_device *acpi_dev_parent(struct acpi_device *adev) { diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index c82d56768101..ae70704bfa90 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -149,7 +149,7 @@ struct ffa_driver { struct device_driver driver; }; -#define to_ffa_driver(d) container_of(d, struct ffa_driver, driver) +#define to_ffa_driver(d) container_of_const(d, struct ffa_driver, driver) static inline void ffa_dev_set_drvdata(struct ffa_device *fdev, void *data) { diff --git a/include/linux/cdx/cdx_bus.h b/include/linux/cdx/cdx_bus.h index b57118aaa679..79bb80e56790 100644 --- a/include/linux/cdx/cdx_bus.h +++ b/include/linux/cdx/cdx_bus.h @@ -211,7 +211,7 @@ struct cdx_driver { }; #define to_cdx_driver(_drv) \ - container_of(_drv, struct cdx_driver, driver) + container_of_const(_drv, struct cdx_driver, driver) /* Macro to avoid include chaining to get THIS_MODULE */ #define cdx_driver_register(drv) \ diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index 5ef4ec1c36c3..807831d6bf0f 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -81,7 +81,7 @@ struct bus_type { const struct attribute_group **dev_groups; const struct attribute_group **drv_groups; - int (*match)(struct device *dev, struct device_driver *drv); + int (*match)(struct device *dev, const struct device_driver *drv); int (*uevent)(const struct device *dev, struct kobj_uevent_env *env); int (*probe)(struct device *dev); void (*sync_state)(struct device *dev); diff --git a/include/linux/dfl.h b/include/linux/dfl.h index 0a7a00a0ee7f..1f02db0c1897 100644 --- a/include/linux/dfl.h +++ b/include/linux/dfl.h @@ -71,7 +71,7 @@ struct dfl_driver { }; #define to_dfl_dev(d) container_of(d, struct dfl_device, dev) -#define to_dfl_drv(d) container_of(d, struct dfl_driver, drv) +#define to_dfl_drv(d) container_of_const(d, struct dfl_driver, drv) /* * use a macro to avoid include chaining to get THIS_MODULE. diff --git a/include/linux/eisa.h b/include/linux/eisa.h index b012e30afebd..f98200cae637 100644 --- a/include/linux/eisa.h +++ b/include/linux/eisa.h @@ -60,7 +60,7 @@ struct eisa_driver { struct device_driver driver; }; -#define to_eisa_driver(drv) container_of(drv,struct eisa_driver, driver) +#define to_eisa_driver(drv) container_of_const(drv,struct eisa_driver, driver) /* These external functions are only available when EISA support is enabled. */ #ifdef CONFIG_EISA diff --git a/include/linux/fsi.h b/include/linux/fsi.h index 3df8c54868df..8c5eef808788 100644 --- a/include/linux/fsi.h +++ b/include/linux/fsi.h @@ -44,7 +44,7 @@ struct fsi_driver { }; #define to_fsi_dev(devp) container_of(devp, struct fsi_device, dev) -#define to_fsi_drv(drvp) container_of(drvp, struct fsi_driver, drv) +#define to_fsi_drv(drvp) container_of_const(drvp, struct fsi_driver, drv) extern int fsi_driver_register(struct fsi_driver *fsi_drv); extern void fsi_driver_unregister(struct fsi_driver *fsi_drv); diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index a1b3de87a3d1..083c860fd28e 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -56,7 +56,7 @@ struct fsl_mc_driver { }; #define to_fsl_mc_driver(_drv) \ - container_of(_drv, struct fsl_mc_driver, driver) + container_of_const(_drv, struct fsl_mc_driver, driver) /** * enum fsl_mc_pool_type - Types of allocatable MC bus resources diff --git a/include/linux/gameport.h b/include/linux/gameport.h index 07e370113b2b..86d62fdafd7a 100644 --- a/include/linux/gameport.h +++ b/include/linux/gameport.h @@ -58,7 +58,7 @@ struct gameport_driver { bool ignore; }; -#define to_gameport_driver(d) container_of(d, struct gameport_driver, driver) +#define to_gameport_driver(d) container_of_const(d, struct gameport_driver, driver) int gameport_open(struct gameport *gameport, struct gameport_driver *drv, int mode); void gameport_close(struct gameport *gameport); diff --git a/include/linux/greybus.h b/include/linux/greybus.h index 634c9511cf78..4d58e27ceaf6 100644 --- a/include/linux/greybus.h +++ b/include/linux/greybus.h @@ -64,7 +64,7 @@ struct greybus_driver { struct device_driver driver; }; -#define to_greybus_driver(d) container_of(d, struct greybus_driver, driver) +#define to_greybus_driver(d) container_of_const(d, struct greybus_driver, driver) static inline void greybus_set_drvdata(struct gb_bundle *bundle, void *data) { diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 5e39baa7f6cb..22c22fb91042 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1330,11 +1330,7 @@ struct hv_device { #define device_to_hv_device(d) container_of_const(d, struct hv_device, device) - -static inline struct hv_driver *drv_to_hv_drv(struct device_driver *d) -{ - return container_of(d, struct hv_driver, driver); -} +#define drv_to_hv_drv(d) container_of_const(d, struct hv_driver, driver) static inline void hv_set_drvdata(struct hv_device *dev, void *data) { diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 9709537370ee..cde3de35a89f 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -304,7 +304,7 @@ struct i2c_driver { u32 flags; }; -#define to_i2c_driver(d) container_of(d, struct i2c_driver, driver) +#define to_i2c_driver(d) container_of_const(d, struct i2c_driver, driver) /** * struct i2c_client - represent an I2C slave device diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h index e119f11948ef..0a8a44ac2f02 100644 --- a/include/linux/i3c/device.h +++ b/include/linux/i3c/device.h @@ -183,10 +183,7 @@ struct i3c_driver { const struct i3c_device_id *id_table; }; -static inline struct i3c_driver *drv_to_i3cdrv(struct device_driver *drv) -{ - return container_of(drv, struct i3c_driver, driver); -} +#define drv_to_i3cdrv(__drv) container_of_const(__drv, struct i3c_driver, driver) struct device *i3cdev_to_dev(struct i3c_device *i3cdev); diff --git a/include/linux/maple.h b/include/linux/maple.h index 9aae44efcfd4..3be4e567473c 100644 --- a/include/linux/maple.h +++ b/include/linux/maple.h @@ -97,7 +97,7 @@ int maple_add_packet(struct maple_device *mdev, u32 function, void maple_clear_dev(struct maple_device *mdev); #define to_maple_dev(n) container_of(n, struct maple_device, dev) -#define to_maple_driver(n) container_of(n, struct maple_driver, drv) +#define to_maple_driver(n) container_of_const(n, struct maple_driver, drv) #define maple_get_drvdata(d) dev_get_drvdata(&(d)->dev) #define maple_set_drvdata(d,p) dev_set_drvdata(&(d)->dev, (p)) diff --git a/include/linux/mcb.h b/include/linux/mcb.h index 0b971b24a804..4ab2691f51a6 100644 --- a/include/linux/mcb.h +++ b/include/linux/mcb.h @@ -94,10 +94,7 @@ struct mcb_driver { void (*shutdown)(struct mcb_device *mdev); }; -static inline struct mcb_driver *to_mcb_driver(struct device_driver *drv) -{ - return container_of(drv, struct mcb_driver, driver); -} +#define to_mcb_driver(__drv) container_of_const(__drv, struct mcb_driver, driver) static inline void *mcb_get_drvdata(struct mcb_device *dev) { diff --git a/include/linux/mdio.h b/include/linux/mdio.h index 68f8d2e970d4..efeca5bd7600 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -31,7 +31,7 @@ struct mdio_device { struct mii_bus *bus; char modalias[MDIO_NAME_SIZE]; - int (*bus_match)(struct device *dev, struct device_driver *drv); + int (*bus_match)(struct device *dev, const struct device_driver *drv); void (*device_free)(struct mdio_device *mdiodev); void (*device_remove)(struct mdio_device *mdiodev); @@ -57,11 +57,8 @@ struct mdio_driver_common { }; #define MDIO_DEVICE_FLAG_PHY 1 -static inline struct mdio_driver_common * -to_mdio_common_driver(const struct device_driver *driver) -{ - return container_of(driver, struct mdio_driver_common, driver); -} +#define to_mdio_common_driver(__drv_c) container_of_const(__drv_c, struct mdio_driver_common, \ + driver) /* struct mdio_driver: Generic MDIO driver */ struct mdio_driver { @@ -80,12 +77,8 @@ struct mdio_driver { void (*shutdown)(struct mdio_device *mdiodev); }; -static inline struct mdio_driver * -to_mdio_driver(const struct device_driver *driver) -{ - return container_of(to_mdio_common_driver(driver), struct mdio_driver, - mdiodrv); -} +#define to_mdio_driver(__drv_m) container_of_const(to_mdio_common_driver(__drv_m), \ + struct mdio_driver, mdiodrv) /* device driver data */ static inline void mdiodev_set_drvdata(struct mdio_device *mdio, void *data) @@ -105,7 +98,7 @@ void mdio_device_remove(struct mdio_device *mdiodev); void mdio_device_reset(struct mdio_device *mdiodev, int value); int mdio_driver_register(struct mdio_driver *drv); void mdio_driver_unregister(struct mdio_driver *drv); -int mdio_device_bus_match(struct device *dev, struct device_driver *drv); +int mdio_device_bus_match(struct device *dev, const struct device_driver *drv); static inline void mdio_device_get(struct mdio_device *mdiodev) { diff --git a/include/linux/mhi.h b/include/linux/mhi.h index b573f15762f8..ce1f9d737964 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -526,7 +526,7 @@ struct mhi_driver { struct device_driver driver; }; -#define to_mhi_driver(drv) container_of(drv, struct mhi_driver, driver) +#define to_mhi_driver(drv) container_of_const(drv, struct mhi_driver, driver) #define to_mhi_device(dev) container_of(dev, struct mhi_device, dev) /** diff --git a/include/linux/mhi_ep.h b/include/linux/mhi_ep.h index 11bf3212f782..7b40fc8cbe77 100644 --- a/include/linux/mhi_ep.h +++ b/include/linux/mhi_ep.h @@ -221,7 +221,7 @@ struct mhi_ep_driver { }; #define to_mhi_ep_device(dev) container_of(dev, struct mhi_ep_device, dev) -#define to_mhi_ep_driver(drv) container_of(drv, struct mhi_ep_driver, driver) +#define to_mhi_ep_driver(drv) container_of_const(drv, struct mhi_ep_driver, driver) /* * module_mhi_ep_driver() - Helper macro for drivers that don't do diff --git a/include/linux/moxtet.h b/include/linux/moxtet.h index ac577699edfd..dfa4800306ee 100644 --- a/include/linux/moxtet.h +++ b/include/linux/moxtet.h @@ -61,13 +61,8 @@ struct moxtet_driver { struct device_driver driver; }; -static inline struct moxtet_driver * -to_moxtet_driver(struct device_driver *drv) -{ - if (!drv) - return NULL; - return container_of(drv, struct moxtet_driver, driver); -} +#define to_moxtet_driver(__drv) \ + ( __drv ? container_of_const(__drv, struct moxtet_driver, driver) : NULL ) extern int __moxtet_register_driver(struct module *owner, struct moxtet_driver *mdrv); diff --git a/include/linux/nd.h b/include/linux/nd.h index b9771ba1ef87..fa099e295f78 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -84,11 +84,7 @@ struct nd_device_driver { void (*notify)(struct device *dev, enum nvdimm_event event); }; -static inline struct nd_device_driver *to_nd_device_driver( - struct device_driver *drv) -{ - return container_of(drv, struct nd_device_driver, drv); -}; +#define to_nd_device_driver(__drv) container_of_const(__drv, struct nd_device_driver, drv) /** * struct nd_namespace_common - core infrastructure of a namespace diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index adee6a1b35db..980a3c90a5ee 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -105,8 +105,7 @@ struct pci_epf_driver { const struct pci_epf_device_id *id_table; }; -#define to_pci_epf_driver(drv) (container_of((drv), struct pci_epf_driver, \ - driver)) +#define to_pci_epf_driver(drv) container_of_const((drv), struct pci_epf_driver, driver) /** * struct pci_epf_bar - represents the BAR of EPF device diff --git a/include/linux/pci.h b/include/linux/pci.h index cafc5ab1cbcb..aa1c3280b7d0 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -958,10 +958,8 @@ struct pci_driver { bool driver_managed_dma; }; -static inline struct pci_driver *to_pci_driver(struct device_driver *drv) -{ - return drv ? container_of(drv, struct pci_driver, driver) : NULL; -} +#define to_pci_driver(__drv) \ + ( __drv ? container_of_const(__drv, struct pci_driver, driver) : NULL ) /** * PCI_DEVICE - macro used to describe a specific PCI device diff --git a/include/linux/phy.h b/include/linux/phy.h index e6e83304558e..8237d5006a99 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1179,7 +1179,7 @@ struct phy_driver { int (*led_polarity_set)(struct phy_device *dev, int index, unsigned long modes); }; -#define to_phy_driver(d) container_of(to_mdio_common_driver(d), \ +#define to_phy_driver(d) container_of_const(to_mdio_common_driver(d), \ struct phy_driver, mdiodrv) #define PHY_ANY_ID "MATCH ANY PHY" diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 7f2ff95d2deb..b7a7158aaf65 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -383,7 +383,7 @@ struct pnp_driver { struct device_driver driver; }; -#define to_pnp_driver(drv) container_of(drv, struct pnp_driver, driver) +#define to_pnp_driver(drv) container_of_const(drv, struct pnp_driver, driver) struct pnp_card_driver { struct list_head global_list; diff --git a/include/linux/rio.h b/include/linux/rio.h index 2cd637268b4f..3c29f40f3c94 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -465,7 +465,7 @@ struct rio_driver { struct device_driver driver; }; -#define to_rio_driver(drv) container_of(drv,struct rio_driver, driver) +#define to_rio_driver(drv) container_of_const(drv,struct rio_driver, driver) union rio_pw_msg { struct { diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index 3a9bb5b9a9e8..688466a0e816 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -945,7 +945,7 @@ struct scmi_device { struct scmi_handle *handle; }; -#define to_scmi_dev(d) container_of(d, struct scmi_device, dev) +#define to_scmi_dev(d) container_of_const(d, struct scmi_device, dev) struct scmi_device_id { u8 protocol_id; diff --git a/include/linux/serio.h b/include/linux/serio.h index 7ca41af93b37..bf2191f25350 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -80,7 +80,7 @@ struct serio_driver { struct device_driver driver; }; -#define to_serio_driver(d) container_of(d, struct serio_driver, driver) +#define to_serio_driver(d) container_of_const(d, struct serio_driver, driver) int serio_open(struct serio *serio, struct serio_driver *drv); void serio_close(struct serio *serio); diff --git a/include/linux/slimbus.h b/include/linux/slimbus.h index 3042385b7b40..a4608d9a9684 100644 --- a/include/linux/slimbus.h +++ b/include/linux/slimbus.h @@ -91,7 +91,7 @@ struct slim_driver { struct device_driver driver; const struct slim_device_id *id_table; }; -#define to_slim_driver(d) container_of(d, struct slim_driver, driver) +#define to_slim_driver(d) container_of_const(d, struct slim_driver, driver) /** * struct slim_val_inf - Slimbus value or information element diff --git a/include/linux/soc/qcom/apr.h b/include/linux/soc/qcom/apr.h index 7161a3183eda..a532d1e4b1f4 100644 --- a/include/linux/soc/qcom/apr.h +++ b/include/linux/soc/qcom/apr.h @@ -162,7 +162,7 @@ struct apr_driver { }; typedef struct apr_driver gpr_driver_t; -#define to_apr_driver(d) container_of(d, struct apr_driver, driver) +#define to_apr_driver(d) container_of_const(d, struct apr_driver, driver) /* * use a macro to avoid include chaining to get THIS_MODULE diff --git a/include/linux/soundwire/sdw_type.h b/include/linux/soundwire/sdw_type.h index 693320b4f5c2..d405935a45fe 100644 --- a/include/linux/soundwire/sdw_type.h +++ b/include/linux/soundwire/sdw_type.h @@ -13,7 +13,7 @@ static inline int is_sdw_slave(const struct device *dev) return dev->type == &sdw_slave_type; } -#define drv_to_sdw_driver(_drv) container_of(_drv, struct sdw_driver, driver) +#define drv_to_sdw_driver(_drv) container_of_const(_drv, struct sdw_driver, driver) #define sdw_register_driver(drv) \ __sdw_register_driver(drv, THIS_MODULE) diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index e8e1e798924f..3fc559686d38 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -351,10 +351,8 @@ struct spi_driver { struct device_driver driver; }; -static inline struct spi_driver *to_spi_driver(struct device_driver *drv) -{ - return drv ? container_of(drv, struct spi_driver, driver) : NULL; -} +#define to_spi_driver(__drv) \ + ( __drv ? container_of_const(__drv, struct spi_driver, driver) : NULL ) extern int __spi_register_driver(struct module *owner, struct spi_driver *sdrv); diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h index a2257380c3f1..e1fb11e0f12c 100644 --- a/include/linux/ssb/ssb.h +++ b/include/linux/ssb/ssb.h @@ -325,7 +325,7 @@ struct ssb_driver { struct device_driver drv; }; -#define drv_to_ssb_drv(_drv) container_of(_drv, struct ssb_driver, drv) +#define drv_to_ssb_drv(_drv) container_of_const(_drv, struct ssb_driver, drv) extern int __ssb_driver_register(struct ssb_driver *drv, struct module *owner); #define ssb_driver_register(drv) \ diff --git a/include/linux/tc.h b/include/linux/tc.h index 1638660abf5e..8416bae9b126 100644 --- a/include/linux/tc.h +++ b/include/linux/tc.h @@ -108,7 +108,7 @@ struct tc_driver { struct device_driver driver; }; -#define to_tc_driver(drv) container_of(drv, struct tc_driver, driver) +#define to_tc_driver(drv) container_of_const(drv, struct tc_driver, driver) /* * Return TURBOchannel clock frequency in Hz. diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 786b9ae6cf4d..a54c203000ed 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -298,6 +298,6 @@ struct tee_client_driver { }; #define to_tee_client_driver(d) \ - container_of(d, struct tee_client_driver, driver) + container_of_const(d, struct tee_client_driver, driver) #endif /*__TEE_DRV_H*/ diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 96fea920873b..ecc5cb7b8c91 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -209,10 +209,7 @@ struct virtio_driver { int (*restore)(struct virtio_device *dev); }; -static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv) -{ - return container_of(drv, struct virtio_driver, driver); -} +#define drv_to_virtio(__drv) container_of_const(__drv, struct virtio_driver, driver) /* use a macro to avoid include chaining to get THIS_MODULE */ #define register_virtio_driver(drv) \ diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index fb3399e4cd29..bd1243657c01 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -495,7 +495,7 @@ iscsi_destroy_flashnode_sess(struct iscsi_bus_flash_session *fnode_sess); extern void iscsi_destroy_all_flashnode(struct Scsi_Host *shost); extern int iscsi_flashnode_bus_match(struct device *dev, - struct device_driver *drv); + const struct device_driver *drv); extern struct device * iscsi_find_flashnode_sess(struct Scsi_Host *shost, void *data, int (*fn)(struct device *dev, void *data)); diff --git a/include/sound/ac97/codec.h b/include/sound/ac97/codec.h index 2fc641cb1982..882b849b9255 100644 --- a/include/sound/ac97/codec.h +++ b/include/sound/ac97/codec.h @@ -73,10 +73,7 @@ static inline struct ac97_codec_device *to_ac97_device(struct device *d) return container_of(d, struct ac97_codec_device, dev); } -static inline struct ac97_codec_driver *to_ac97_driver(struct device_driver *d) -{ - return container_of(d, struct ac97_codec_driver, driver); -} +#define to_ac97_driver(__drv) container_of_const(__drv, struct ac97_codec_driver, driver) #if IS_ENABLED(CONFIG_AC97_BUS_NEW) int snd_ac97_codec_driver_register(struct ac97_codec_driver *drv); diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index ac22cf08c09f..3f90bdd387b6 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -124,10 +124,7 @@ struct xenbus_driver { void (*reclaim_memory)(struct xenbus_device *dev); }; -static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv) -{ - return container_of(drv, struct xenbus_driver, driver); -} +#define to_xenbus_driver(__drv) container_of_const(__drv, struct xenbus_driver, driver) int __must_check __xenbus_register_frontend(struct xenbus_driver *drv, struct module *owner, -- cgit v1.2.3 From 633478695d6b52e0b52f1273d8d11275b627b87d Mon Sep 17 00:00:00 2001 From: Slark Xiao Date: Mon, 1 Jul 2024 10:12:15 +0800 Subject: bus: mhi: host: Allow controller drivers to specify name for the MHI controller MHI devices usually have a product/device name to identify each device uniquely. So let's specify that name in 'struct mhi_controller' so that the client drivers can use this name to uniquely identify the devices and apply any device specific quirks. Signed-off-by: Slark Xiao Reviewed-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20240701021216.17734-2-slark_xiao@163.com [mani: reworked subject and description] Signed-off-by: Manivannan Sadhasivam --- include/linux/mhi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mhi.h b/include/linux/mhi.h index b573f15762f8..fabd6ed8d258 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -290,6 +290,7 @@ struct mhi_controller_config { /** * struct mhi_controller - Master MHI controller structure + * @name: Device name of the MHI controller * @cntrl_dev: Pointer to the struct device of physical bus acting as the MHI * controller (required) * @mhi_dev: MHI device instance for the controller @@ -367,6 +368,7 @@ struct mhi_controller_config { * they can be populated depending on the usecase. */ struct mhi_controller { + const char *name; struct device *cntrl_dev; struct mhi_device *mhi_dev; struct dentry *debugfs_dentry; -- cgit v1.2.3 From 62ce9ef1479729b1a3f8a1b19900e28d68b3625e Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Mon, 1 Jul 2024 15:21:24 +0200 Subject: usb: typec: tcpci: add support to set connector orientation This add the support to set the optional connector orientation bit which is part of the optional CONFIG_STANDARD_OUTPUT register 0x18 [1]. This allows system designers to connect the tcpc orientation pin directly to the 2:1 ss-mux. [1] https://www.usb.org/sites/default/files/documents/usb-port_controller_specification_rev2.0_v1.0_0.pdf Signed-off-by: Marco Felsch Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20240701132133.3054394-1-m.felsch@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/tcpci.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h index 47a86b8a4a50..0ab39b6ea205 100644 --- a/include/linux/usb/tcpci.h +++ b/include/linux/usb/tcpci.h @@ -47,6 +47,9 @@ #define TCPC_SINK_FAST_ROLE_SWAP BIT(0) #define TCPC_CONFIG_STD_OUTPUT 0x18 +#define TCPC_CONFIG_STD_OUTPUT_ORIENTATION_MASK BIT(0) +#define TCPC_CONFIG_STD_OUTPUT_ORIENTATION_NORMAL 0 +#define TCPC_CONFIG_STD_OUTPUT_ORIENTATION_FLIPPED 1 #define TCPC_TCPC_CTRL 0x19 #define TCPC_TCPC_CTRL_ORIENTATION BIT(0) @@ -127,6 +130,7 @@ #define TCPC_DEV_CAP_2 0x26 #define TCPC_STD_INPUT_CAP 0x28 #define TCPC_STD_OUTPUT_CAP 0x29 +#define TCPC_STD_OUTPUT_CAP_ORIENTATION BIT(0) #define TCPC_MSG_HDR_INFO 0x2e #define TCPC_MSG_HDR_INFO_DATA_ROLE BIT(3) @@ -209,6 +213,9 @@ struct tcpci; * swap following Discover Identity on SOP' occurs. * Return true when the TCPM is allowed to request a Vconn swap * after Discovery Identity on SOP. + * @set_orientation: + * Optional; Enable setting the connector orientation + * CONFIG_STANDARD_OUTPUT (0x18) bit0. */ struct tcpci_data { struct regmap *regmap; @@ -216,6 +223,7 @@ struct tcpci_data { unsigned char auto_discharge_disconnect:1; unsigned char vbus_vsafe0v:1; unsigned char cable_comm_capable:1; + unsigned char set_orientation:1; int (*init)(struct tcpci *tcpci, struct tcpci_data *data); int (*set_vconn)(struct tcpci *tcpci, struct tcpci_data *data, -- cgit v1.2.3 From bab2f5e8fd5d2f759db26b78d9db57412888f187 Mon Sep 17 00:00:00 2001 From: Ekansh Gupta Date: Fri, 28 Jun 2024 12:45:01 +0100 Subject: misc: fastrpc: Restrict untrusted app to attach to privileged PD Untrusted application with access to only non-secure fastrpc device node can attach to root_pd or static PDs if it can make the respective init request. This can cause problems as the untrusted application can send bad requests to root_pd or static PDs. Add changes to reject attach to privileged PDs if the request is being made using non-secure fastrpc device node. Fixes: 0871561055e6 ("misc: fastrpc: Add support for audiopd") Cc: stable Signed-off-by: Ekansh Gupta Reviewed-by: Dmitry Baryshkov Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20240628114501.14310-7-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/misc/fastrpc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index f33d914d8f46..91583690bddc 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -8,11 +8,14 @@ #define FASTRPC_IOCTL_ALLOC_DMA_BUFF _IOWR('R', 1, struct fastrpc_alloc_dma_buf) #define FASTRPC_IOCTL_FREE_DMA_BUFF _IOWR('R', 2, __u32) #define FASTRPC_IOCTL_INVOKE _IOWR('R', 3, struct fastrpc_invoke) +/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_ATTACH _IO('R', 4) #define FASTRPC_IOCTL_INIT_CREATE _IOWR('R', 5, struct fastrpc_init_create) #define FASTRPC_IOCTL_MMAP _IOWR('R', 6, struct fastrpc_req_mmap) #define FASTRPC_IOCTL_MUNMAP _IOWR('R', 7, struct fastrpc_req_munmap) +/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_ATTACH_SNS _IO('R', 8) +/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_CREATE_STATIC _IOWR('R', 9, struct fastrpc_init_create_static) #define FASTRPC_IOCTL_MEM_MAP _IOWR('R', 10, struct fastrpc_mem_map) #define FASTRPC_IOCTL_MEM_UNMAP _IOWR('R', 11, struct fastrpc_mem_unmap) -- cgit v1.2.3 From f9a748fa5ce0958bea2a03c25f092fbc22e72c67 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 2 May 2024 16:48:21 +0100 Subject: parport: Remove 'drivers' list The list has been empty since: 'commit 3275158fa52a ("parport: remove use of devmodel")' This also means we can remove the 'list_head' from struct parport_driver. Signed-off-by: Dr. David Alan Gilbert Acked-by: Sudip Mukherjee Link: https://lore.kernel.org/r/20240502154823.67235-2-linux@treblig.org Signed-off-by: Greg Kroah-Hartman --- include/linux/parport.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/parport.h b/include/linux/parport.h index fff39bc30629..2a4424b60156 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -258,7 +258,6 @@ struct parport_driver { int (*probe)(struct pardevice *); struct device_driver driver; bool devmodel; - struct list_head list; }; #define to_parport_driver(n) container_of(n, struct parport_driver, driver) -- cgit v1.2.3 From ed06e054906c5e7853a36d9ddad8fc94dbb8c252 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 2 May 2024 16:48:22 +0100 Subject: parport: Remove attach function pointer The attach function pointers haven't actually been called since: 'commit 3275158fa52a ("parport: remove use of devmodel")' topped adding entries to the drivers list. If you're converting a driver, look at the 'match_port' function pointer instead. (There are lots of comment references to 'attach' all over, but they probably need some deeper understanding to check the semantics to see if they can be replaced by match_port). Signed-off-by: Dr. David Alan Gilbert Acked-by: Sudip Mukherjee Link: https://lore.kernel.org/r/20240502154823.67235-3-linux@treblig.org Signed-off-by: Greg Kroah-Hartman --- include/linux/parport.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/parport.h b/include/linux/parport.h index 2a4424b60156..190de3569e25 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -252,7 +252,6 @@ struct parport { struct parport_driver { const char *name; - void (*attach) (struct parport *); void (*detach) (struct parport *); void (*match_port)(struct parport *); int (*probe)(struct pardevice *); -- cgit v1.2.3 From dfd19866d1a3f681cc12aae67ab05011eb3aa3d8 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 2 May 2024 16:48:23 +0100 Subject: parport: Remove parport_driver.devmodel 'devmodel' hasn't actually been used since: 'commit 3275158fa52a ("parport: remove use of devmodel")' and everyone now has it set to true and has been fixed up; remove the flag. (There are still comments all over about it) Signed-off-by: Dr. David Alan Gilbert Acked-by: Sudip Mukherjee Link: https://lore.kernel.org/r/20240502154823.67235-4-linux@treblig.org Signed-off-by: Greg Kroah-Hartman --- include/linux/parport.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/parport.h b/include/linux/parport.h index 190de3569e25..464c2ad28039 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -256,7 +256,6 @@ struct parport_driver { void (*match_port)(struct parport *); int (*probe)(struct pardevice *); struct device_driver driver; - bool devmodel; }; #define to_parport_driver(n) container_of(n, struct parport_driver, driver) @@ -299,9 +298,6 @@ int __must_check __parport_register_driver(struct parport_driver *, * to receive notifications about ports being found in the * system, as well as ports no longer available. * - * If devmodel is true then the new device model is used - * for registration. - * * The @driver structure is allocated by the caller and must not be * deallocated until after calling parport_unregister_driver(). * -- cgit v1.2.3 From eb054d67b21a53f6ccf3af49a62fb99397b48fc2 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Wed, 3 Jul 2024 11:16:03 +0100 Subject: iommu/arm-smmu-v3: Add support for dirty tracking in domain alloc This provides all the infrastructure to enable dirty tracking if the hardware has the capability and domain alloc request for it. Also, add a device_iommu_capable() check in iommufd core for IOMMU_CAP_DIRTY_TRACKING before we request a user domain with dirty tracking support. Please note, we still report no support for IOMMU_CAP_DIRTY_TRACKING as it will finally be enabled in a subsequent patch. Signed-off-by: Joao Martins Reviewed-by: Ryan Roberts Reviewed-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Reviewed-by: Kevin Tian Signed-off-by: Shameer Kolothum Link: https://lore.kernel.org/r/20240703101604.2576-5-shameerali.kolothum.thodi@huawei.com Signed-off-by: Will Deacon --- include/linux/io-pgtable.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 86cf1f7ae389..f9a81761bfce 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -85,6 +85,8 @@ struct io_pgtable_cfg { * * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability * attributes set in the TCR for a non-coherent page-table walker. + * + * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking in stage 1 pagetable. */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) @@ -92,6 +94,7 @@ struct io_pgtable_cfg { #define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT BIT(4) #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) + #define IO_PGTABLE_QUIRK_ARM_HD BIT(7) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; -- cgit v1.2.3 From 58cd0cba82976a5a21bdfc78783952b147837a84 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 28 Jun 2024 19:44:42 +0300 Subject: drm: Add helpers for q4 fixed point values Add helpers to convert between q4 fixed point and integer/fraction values. Also add the format/argument macros required to printk q4 fixed point variables. The q4 notation is based on the short variant described by https://en.wikipedia.org/wiki/Q_(number_format) where only the number of fraction bits in the fixed point value are defined, while the full size is deducted from the container type, that is the size of int for these helpers. Using the fxp_ prefix, which makes moving these helpers outside of drm to a more generic place easier, if they prove to be useful. These are needed by later patches dumping the Display Stream Compression configuration in DRM core and in the i915 driver to replace the corresponding bpp_x16 helpers defined locally in the driver. v2: Use the more generic/descriptive fxp_q4 prefix instead of drm_x16. (Jani) Cc: Jani Nikula Acked-by: Jani Nikula Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240628164451.1177612-2-imre.deak@intel.com --- include/drm/drm_fixed.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h index 81572d32db0c..ef8bc8d72039 100644 --- a/include/drm/drm_fixed.h +++ b/include/drm/drm_fixed.h @@ -214,4 +214,27 @@ static inline s64 drm_fixp_exp(s64 x) return sum; } +static inline int fxp_q4_from_int(int val_int) +{ + return val_int << 4; +} + +static inline int fxp_q4_to_int(int val_q4) +{ + return val_q4 >> 4; +} + +static inline int fxp_q4_to_int_roundup(int val_q4) +{ + return (val_q4 + 0xf) >> 4; +} + +static inline int fxp_q4_to_frac(int val_q4) +{ + return val_q4 & 0xf; +} + +#define FXP_Q4_FMT "%d.%04d" +#define FXP_Q4_ARGS(val_q4) fxp_q4_to_int(val_q4), (fxp_q4_to_frac(val_q4) * 625) + #endif -- cgit v1.2.3 From 74c3f5da233637bdf828e1ce67a5c1083630a47b Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 28 Jun 2024 19:44:43 +0300 Subject: drm/display/dsc: Add a helper to dump the DSC configuration Add a helper to dump the Display Stream Compression configuration, taken into use in the i915 driver by a later patch. v2: - Rebase on the s/DRM_X16/FXP_Q4 change. - s/DSC configration/DSC configuration in the function documentation. Acked-by: Jani Nikula Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240628164451.1177612-3-imre.deak@intel.com --- include/drm/display/drm_dsc_helper.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/drm/display/drm_dsc_helper.h b/include/drm/display/drm_dsc_helper.h index 913aa2071232..2c2b9033f60f 100644 --- a/include/drm/display/drm_dsc_helper.h +++ b/include/drm/display/drm_dsc_helper.h @@ -17,6 +17,8 @@ enum drm_dsc_params_type { DRM_DSC_1_2_420, }; +struct drm_printer; + void drm_dsc_dp_pps_header_init(struct dp_sdp_header *pps_header); int drm_dsc_dp_rc_buffer_size(u8 rc_buffer_block_size, u8 rc_buffer_size); void drm_dsc_pps_payload_pack(struct drm_dsc_picture_parameter_set *pps_sdp, @@ -28,6 +30,7 @@ int drm_dsc_compute_rc_parameters(struct drm_dsc_config *vdsc_cfg); u8 drm_dsc_initial_scale_value(const struct drm_dsc_config *dsc); u32 drm_dsc_flatness_det_thresh(const struct drm_dsc_config *dsc); u32 drm_dsc_get_bpp_int(const struct drm_dsc_config *vdsc_cfg); +void drm_dsc_dump_config(struct drm_printer *p, int indent, const struct drm_dsc_config *cfg); #endif /* _DRM_DSC_HELPER_H_ */ -- cgit v1.2.3 From da042a3655151157c06e71a583e883ab2d86d1ff Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 2 Jul 2024 17:10:19 +0200 Subject: block: split integrity support out of bio.h Split struct bio_integrity_payload and the related prototypes out of bio.h into a separate bio-integrity.h header so that it is only pulled in by the few places that need it. Signed-off-by: Christoph Hellwig Reviewed-by: Anuj Gupta Reviewed-by: Kanchan Joshi Reviewed-by: Johannes Thumshirn Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240702151047.1746127-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio-integrity.h | 153 +++++++++++++++++++++++++++++++++++++++++ include/linux/bio.h | 156 ------------------------------------------ include/linux/blk-integrity.h | 1 + 3 files changed, 154 insertions(+), 156 deletions(-) create mode 100644 include/linux/bio-integrity.h (limited to 'include') diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h new file mode 100644 index 000000000000..70ef19a0dc7e --- /dev/null +++ b/include/linux/bio-integrity.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_BIO_INTEGRITY_H +#define _LINUX_BIO_INTEGRITY_H + +#include + +enum bip_flags { + BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ + BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ + BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */ + BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */ + BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */ + BIP_INTEGRITY_USER = 1 << 5, /* Integrity payload is user address */ + BIP_COPY_USER = 1 << 6, /* Kernel bounce buffer in use */ +}; + +struct bio_integrity_payload { + struct bio *bip_bio; /* parent bio */ + + struct bvec_iter bip_iter; + + unsigned short bip_vcnt; /* # of integrity bio_vecs */ + unsigned short bip_max_vcnt; /* integrity bio_vec slots */ + unsigned short bip_flags; /* control flags */ + + struct bvec_iter bio_iter; /* for rewinding parent bio */ + + struct work_struct bip_work; /* I/O completion */ + + struct bio_vec *bip_vec; + struct bio_vec bip_inline_vecs[];/* embedded bvec array */ +}; + +#ifdef CONFIG_BLK_DEV_INTEGRITY + +#define bip_for_each_vec(bvl, bip, iter) \ + for_each_bvec(bvl, (bip)->bip_vec, iter, (bip)->bip_iter) + +#define bio_for_each_integrity_vec(_bvl, _bio, _iter) \ + for_each_bio(_bio) \ + bip_for_each_vec(_bvl, _bio->bi_integrity, _iter) + +static inline struct bio_integrity_payload *bio_integrity(struct bio *bio) +{ + if (bio->bi_opf & REQ_INTEGRITY) + return bio->bi_integrity; + + return NULL; +} + +static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag) +{ + struct bio_integrity_payload *bip = bio_integrity(bio); + + if (bip) + return bip->bip_flags & flag; + + return false; +} + +static inline sector_t bip_get_seed(struct bio_integrity_payload *bip) +{ + return bip->bip_iter.bi_sector; +} + +static inline void bip_set_seed(struct bio_integrity_payload *bip, + sector_t seed) +{ + bip->bip_iter.bi_sector = seed; +} + +struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, gfp_t gfp, + unsigned int nr); +int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len, + unsigned int offset); +int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed); +void bio_integrity_unmap_free_user(struct bio *bio); +bool bio_integrity_prep(struct bio *bio); +void bio_integrity_advance(struct bio *bio, unsigned int bytes_done); +void bio_integrity_trim(struct bio *bio); +int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask); +int bioset_integrity_create(struct bio_set *bs, int pool_size); +void bioset_integrity_free(struct bio_set *bs); +void bio_integrity_init(void); + +#else /* CONFIG_BLK_DEV_INTEGRITY */ + +static inline void *bio_integrity(struct bio *bio) +{ + return NULL; +} + +static inline int bioset_integrity_create(struct bio_set *bs, int pool_size) +{ + return 0; +} + +static inline void bioset_integrity_free(struct bio_set *bs) +{ +} + +static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf, + ssize_t len, u32 seed) +{ + return -EINVAL; +} + +static inline void bio_integrity_unmap_free_user(struct bio *bio) +{ +} + +static inline bool bio_integrity_prep(struct bio *bio) +{ + return true; +} + +static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src, + gfp_t gfp_mask) +{ + return 0; +} + +static inline void bio_integrity_advance(struct bio *bio, + unsigned int bytes_done) +{ +} + +static inline void bio_integrity_trim(struct bio *bio) +{ +} + +static inline void bio_integrity_init(void) +{ +} + +static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag) +{ + return false; +} + +static inline void *bio_integrity_alloc(struct bio *bio, gfp_t gfp, + unsigned int nr) +{ + return ERR_PTR(-EINVAL); +} + +static inline int bio_integrity_add_page(struct bio *bio, struct page *page, + unsigned int len, unsigned int offset) +{ + return 0; +} +#endif /* CONFIG_BLK_DEV_INTEGRITY */ +#endif /* _LINUX_BIO_INTEGRITY_H */ diff --git a/include/linux/bio.h b/include/linux/bio.h index 818e93612947..a46e2047bea4 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -321,69 +321,6 @@ static inline void bio_next_folio(struct folio_iter *fi, struct bio *bio) #define bio_for_each_folio_all(fi, bio) \ for (bio_first_folio(&fi, bio, 0); fi.folio; bio_next_folio(&fi, bio)) -enum bip_flags { - BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ - BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ - BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */ - BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */ - BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */ - BIP_INTEGRITY_USER = 1 << 5, /* Integrity payload is user address */ - BIP_COPY_USER = 1 << 6, /* Kernel bounce buffer in use */ -}; - -/* - * bio integrity payload - */ -struct bio_integrity_payload { - struct bio *bip_bio; /* parent bio */ - - struct bvec_iter bip_iter; - - unsigned short bip_vcnt; /* # of integrity bio_vecs */ - unsigned short bip_max_vcnt; /* integrity bio_vec slots */ - unsigned short bip_flags; /* control flags */ - - struct bvec_iter bio_iter; /* for rewinding parent bio */ - - struct work_struct bip_work; /* I/O completion */ - - struct bio_vec *bip_vec; - struct bio_vec bip_inline_vecs[];/* embedded bvec array */ -}; - -#if defined(CONFIG_BLK_DEV_INTEGRITY) - -static inline struct bio_integrity_payload *bio_integrity(struct bio *bio) -{ - if (bio->bi_opf & REQ_INTEGRITY) - return bio->bi_integrity; - - return NULL; -} - -static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag) -{ - struct bio_integrity_payload *bip = bio_integrity(bio); - - if (bip) - return bip->bip_flags & flag; - - return false; -} - -static inline sector_t bip_get_seed(struct bio_integrity_payload *bip) -{ - return bip->bip_iter.bi_sector; -} - -static inline void bip_set_seed(struct bio_integrity_payload *bip, - sector_t seed) -{ - bip->bip_iter.bi_sector = seed; -} - -#endif /* CONFIG_BLK_DEV_INTEGRITY */ - void bio_trim(struct bio *bio, sector_t offset, sector_t size); extern struct bio *bio_split(struct bio *bio, int sectors, gfp_t gfp, struct bio_set *bs); @@ -721,99 +658,6 @@ static inline bool bioset_initialized(struct bio_set *bs) return bs->bio_slab != NULL; } -#if defined(CONFIG_BLK_DEV_INTEGRITY) - -#define bip_for_each_vec(bvl, bip, iter) \ - for_each_bvec(bvl, (bip)->bip_vec, iter, (bip)->bip_iter) - -#define bio_for_each_integrity_vec(_bvl, _bio, _iter) \ - for_each_bio(_bio) \ - bip_for_each_vec(_bvl, _bio->bi_integrity, _iter) - -int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed); -void bio_integrity_unmap_free_user(struct bio *bio); -extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); -extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); -extern bool bio_integrity_prep(struct bio *); -extern void bio_integrity_advance(struct bio *, unsigned int); -extern void bio_integrity_trim(struct bio *); -extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t); -extern int bioset_integrity_create(struct bio_set *, int); -extern void bioset_integrity_free(struct bio_set *); -extern void bio_integrity_init(void); - -#else /* CONFIG_BLK_DEV_INTEGRITY */ - -static inline void *bio_integrity(struct bio *bio) -{ - return NULL; -} - -static inline int bioset_integrity_create(struct bio_set *bs, int pool_size) -{ - return 0; -} - -static inline void bioset_integrity_free (struct bio_set *bs) -{ - return; -} - -static inline bool bio_integrity_prep(struct bio *bio) -{ - return true; -} - -static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src, - gfp_t gfp_mask) -{ - return 0; -} - -static inline void bio_integrity_advance(struct bio *bio, - unsigned int bytes_done) -{ - return; -} - -static inline void bio_integrity_trim(struct bio *bio) -{ - return; -} - -static inline void bio_integrity_init(void) -{ - return; -} - -static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag) -{ - return false; -} - -static inline void *bio_integrity_alloc(struct bio * bio, gfp_t gfp, - unsigned int nr) -{ - return ERR_PTR(-EINVAL); -} - -static inline int bio_integrity_add_page(struct bio *bio, struct page *page, - unsigned int len, unsigned int offset) -{ - return 0; -} - -static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf, - ssize_t len, u32 seed) -{ - return -EINVAL; -} -static inline void bio_integrity_unmap_free_user(struct bio *bio) -{ -} - -#endif /* CONFIG_BLK_DEV_INTEGRITY */ - /* * Mark a bio as polled. Note that for async polled IO, the caller must * expect -EWOULDBLOCK if we cannot allocate a request (or other resources). diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index 804f856ed3e5..de98049b7ded 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -3,6 +3,7 @@ #define _LINUX_BLK_INTEGRITY_H #include +#include struct request; -- cgit v1.2.3 From 21671a1ed1ff22e158ebe9d619943f926f03f5cd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 2 Jul 2024 17:10:20 +0200 Subject: block: also return bio_integrity_payload * from stubs struct bio_integrity_payload is defined unconditionally. No need to return void * from bio_integrity() and bio_integrity_alloc(). Signed-off-by: Christoph Hellwig Reviewed-by: Kanchan Joshi Reviewed-by: Anuj Gupta Reviewed-by: Johannes Thumshirn Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240702151047.1746127-3-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio-integrity.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h index 70ef19a0dc7e..cac24dac06ff 100644 --- a/include/linux/bio-integrity.h +++ b/include/linux/bio-integrity.h @@ -85,7 +85,7 @@ void bio_integrity_init(void); #else /* CONFIG_BLK_DEV_INTEGRITY */ -static inline void *bio_integrity(struct bio *bio) +static inline struct bio_integrity_payload *bio_integrity(struct bio *bio) { return NULL; } @@ -138,8 +138,8 @@ static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag) return false; } -static inline void *bio_integrity_alloc(struct bio *bio, gfp_t gfp, - unsigned int nr) +static inline struct bio_integrity_payload * +bio_integrity_alloc(struct bio *bio, gfp_t gfp, unsigned int nr) { return ERR_PTR(-EINVAL); } -- cgit v1.2.3 From 85253bac4d02b1f95d6109c221aeccd7a262ec4d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 2 Jul 2024 17:10:23 +0200 Subject: block: don't free submitter owned integrity payload on I/O completion Currently __bio_integrity_endio frees the integrity payload unless it is explicitly marked as user-mapped. This means in-kernel callers that allocate their own integrity payload never get to see it on I/O completion. The current two users don't need it as they just pre-mapped PI tuples received over the network, but this limits uses of integrity data lot. Change bio_integrity_endio to call __bio_integrity_endio for block layer generated integrity data only, and leave freeing of submitter allocated integrity data to bio_uninit which also gets called from the final bio_put. This requires that unmapping user mapped or copied integrity data is now always done by the caller, and the special BIP_INTEGRITY_USER flag can go away. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Kanchan Joshi Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240702151047.1746127-6-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio-integrity.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h index cac24dac06ff..3823d9be0d07 100644 --- a/include/linux/bio-integrity.h +++ b/include/linux/bio-integrity.h @@ -10,8 +10,7 @@ enum bip_flags { BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */ BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */ BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */ - BIP_INTEGRITY_USER = 1 << 5, /* Integrity payload is user address */ - BIP_COPY_USER = 1 << 6, /* Kernel bounce buffer in use */ + BIP_COPY_USER = 1 << 5, /* Kernel bounce buffer in use */ }; struct bio_integrity_payload { -- cgit v1.2.3 From 74cc150282e41c6c0704cd305c9a4392dc64ef4d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 2 Jul 2024 17:10:24 +0200 Subject: block: don't free the integrity payload in bio_integrity_unmap_free_user Now that the integrity payload is always freed in bio_uninit, don't bother freeing it a little earlier in bio_integrity_unmap_free_user. With that the separate bio_integrity_unmap_free_user can go away by just passing the bio to bio_integrity_unmap_user. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Kanchan Joshi Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240702151047.1746127-7-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio-integrity.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h index 3823d9be0d07..dd831c269e99 100644 --- a/include/linux/bio-integrity.h +++ b/include/linux/bio-integrity.h @@ -73,7 +73,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, gfp_t gfp, int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int offset); int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed); -void bio_integrity_unmap_free_user(struct bio *bio); +void bio_integrity_unmap_user(struct bio *bio); bool bio_integrity_prep(struct bio *bio); void bio_integrity_advance(struct bio *bio, unsigned int bytes_done); void bio_integrity_trim(struct bio *bio); @@ -104,7 +104,7 @@ static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf, return -EINVAL; } -static inline void bio_integrity_unmap_free_user(struct bio *bio) +static inline void bio_integrity_unmap_user(struct bio *bio) { } -- cgit v1.2.3 From 1028f391d5f9d4248e2f49193e6de2516ad630f8 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Wed, 3 Jul 2024 00:36:46 +0000 Subject: cgroup/misc: Introduce misc.peak Introduce misc.peak to record the historical maximum usage of the resource, as in some scenarios the value of misc.max could be adjusted based on the peak usage of the resource. Signed-off-by: Xiu Jianfeng Signed-off-by: Tejun Heo --- include/linux/misc_cgroup.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h index d70eab2501ee..618392d41975 100644 --- a/include/linux/misc_cgroup.h +++ b/include/linux/misc_cgroup.h @@ -31,11 +31,13 @@ struct misc_cg; /** * struct misc_res: Per cgroup per misc type resource * @max: Maximum limit on the resource. + * @watermark: Historical maximum usage of the resource. * @usage: Current usage of the resource. * @events: Number of times, the resource limit exceeded. */ struct misc_res { u64 max; + atomic64_t watermark; atomic64_t usage; atomic64_t events; }; -- cgit v1.2.3 From d00106bbdfa82732c23cba44491c38f8c410d865 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Mon, 1 Jul 2024 16:47:55 +0200 Subject: vdso: Add comment about reason for vdso struct ordering struct vdso_data is optimized for fast access to the often required struct members. The optimization is not documented in the struct description but it should be kept in mind, when working with the vdso_data struct. Add a comment to the struct description. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Reviewed-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20240701-vdso-cleanup-v1-2-36eb64e7ece2@linutronix.de --- include/vdso/datapage.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index d04d394db064..7647e0946f50 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -77,6 +77,10 @@ struct vdso_timestamp { * vdso_data will be accessed by 64 bit and compat code at the same time * so we should be careful before modifying this structure. * + * The ordering of the struct members is optimized to have fast access to the + * often required struct members which are related to CLOCK_REALTIME and + * CLOCK_MONOTONIC. This information is stored in the first cache lines. + * * @basetime is used to store the base time for the system wide time getter * VVAR page. * -- cgit v1.2.3 From 4ecaf7e98a3ae0c843d67c76649ecc694232834b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 2 Jul 2024 15:33:54 -0400 Subject: tracing: Have memmapped ring buffer use ioctl of "R" range 0x20-2F To prevent conflicts with other ioctl numbers to allow strace to have an idea of what is happening, add the range of ioctls for the trace buffer mapping from _IO("T", 0x1) to the range of "R" 0x20 - 0x2F. Link: https://lore.kernel.org/linux-trace-kernel/20240630105322.GA17573@altlinux.org/ Link: https://lore.kernel.org/linux-trace-kernel/20240630213626.GA23566@altlinux.org/ Cc: Jonathan Corbet Fixes: cf9f0f7c4c5bb ("tracing: Allow user-space mapping of the ring-buffer") Link: https://lore.kernel.org/20240702153354.367861db@rorschach.local.home Reported-by: "Dmitry V. Levin" Reviewed-by: Mathieu Desnoyers Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/uapi/linux/trace_mmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/trace_mmap.h b/include/uapi/linux/trace_mmap.h index bd1066754220..c102ef35d11e 100644 --- a/include/uapi/linux/trace_mmap.h +++ b/include/uapi/linux/trace_mmap.h @@ -43,6 +43,6 @@ struct trace_buffer_meta { __u64 Reserved2; }; -#define TRACE_MMAP_IOCTL_GET_READER _IO('T', 0x1) +#define TRACE_MMAP_IOCTL_GET_READER _IO('R', 0x20) #endif /* _TRACE_MMAP_H_ */ -- cgit v1.2.3 From bf95919fe1917efa8f5da83057ff9fc11130aa55 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 1 Jul 2024 09:39:36 +0200 Subject: ASoC: dapm: Use unsigned for number of widgets in snd_soc_dapm_new_controls() Number of widgets in array passed to snd_soc_dapm_new_controls() cannot be negative, so make it explicit by using 'unsigned int', just like snd_soc_add_component_controls() is doing. Reviewed-by: Dmitry Baryshkov Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20240701-b4-qcom-audio-lpass-codec-cleanups-v3-4-6d98d4dd1ef5@linaro.org Signed-off-by: Mark Brown --- include/sound/soc-dapm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 667ecd4daa68..12cd7b5a2202 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -457,7 +457,7 @@ int snd_soc_dapm_get_pin_switch(struct snd_kcontrol *kcontrol, int snd_soc_dapm_put_pin_switch(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *uncontrol); int snd_soc_dapm_new_controls(struct snd_soc_dapm_context *dapm, - const struct snd_soc_dapm_widget *widget, int num); + const struct snd_soc_dapm_widget *widget, unsigned int num); struct snd_soc_dapm_widget *snd_soc_dapm_new_control(struct snd_soc_dapm_context *dapm, const struct snd_soc_dapm_widget *widget); struct snd_soc_dapm_widget *snd_soc_dapm_new_control_unlocked(struct snd_soc_dapm_context *dapm, -- cgit v1.2.3 From 526e21a2aa6fa7ac3309a8c87f4d6e5f7e407cb6 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Wed, 3 Jul 2024 07:20:34 +0900 Subject: firewire: ohci: add tracepoints event for data of Self-ID DMA In 1394 OHCI, the SelfIDComplete event occurs when the hardware has finished transmitting all of the self ID packets received during the bus initialization process to the host memory by DMA. This commit adds a tracepoints event for this event to trace the timing and packet data of Self-ID DMA. It is the part of following tracepoints events helpful to debug some events at bus reset; e.g. the issue addressed at a commit d0b06dc48fb1 ("firewire: core: use long bus reset on gap count error")[1]: * firewire_ohci:irqs * firewire_ohci:self_id_complete * firewire:bus_reset_handle * firewire:self_id_sequence They would be also helpful in the problem about invocation timing of hardIRQ and process (workqueue) contexts. We can often see this kind of problem with -rt kernel[2]. [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d0b06dc48fb1 [2] https://lore.kernel.org/linux-rt-users/YAwPoaUZ1gTD5y+k@hmbx/ Link: https://lore.kernel.org/r/20240702222034.1378764-6-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire_ohci.h | 54 ++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'include') diff --git a/include/trace/events/firewire_ohci.h b/include/trace/events/firewire_ohci.h index 483aeeb033af..4f9a7f2577f3 100644 --- a/include/trace/events/firewire_ohci.h +++ b/include/trace/events/firewire_ohci.h @@ -9,6 +9,8 @@ #include +// Some macros and helper functions are defined in 'drivers/firewire/ohci.c'. + TRACE_EVENT(irqs, TP_PROTO(unsigned int card_index, u32 events), TP_ARGS(card_index, events), @@ -42,6 +44,58 @@ TRACE_EVENT(irqs, ) ); +#define QUADLET_SIZE 4 + +#define SELF_ID_COUNT_IS_ERROR(reg) \ + (!!(((reg) & OHCI1394_SelfIDCount_selfIDError_MASK) >> OHCI1394_SelfIDCount_selfIDError_SHIFT)) + +#define SELF_ID_COUNT_GET_GENERATION(reg) \ + (((reg) & OHCI1394_SelfIDCount_selfIDGeneration_MASK) >> OHCI1394_SelfIDCount_selfIDGeneration_SHIFT) + +#define SELF_ID_RECEIVE_Q0_GET_GENERATION(quadlet) \ + (((quadlet) & OHCI1394_SELF_ID_RECEIVE_Q0_GENERATION_MASK) >> OHCI1394_SELF_ID_RECEIVE_Q0_GENERATION_SHIFT) + +#define SELF_ID_RECEIVE_Q0_GET_TIMESTAMP(quadlet) \ + (((quadlet) & OHCI1394_SELF_ID_RECEIVE_Q0_TIMESTAMP_MASK) >> OHCI1394_SELF_ID_RECEIVE_Q0_TIMESTAMP_SHIFT) + +TRACE_EVENT(self_id_complete, + TP_PROTO(unsigned int card_index, u32 reg, const __le32 *self_id_receive, bool has_be_header_quirk), + TP_ARGS(card_index, reg, self_id_receive, has_be_header_quirk), + TP_STRUCT__entry( + __field(u8, card_index) + __field(u32, reg) + __dynamic_array(u32, self_id_receive, ohci1394_self_id_count_get_size(reg)) + ), + TP_fast_assign( + __entry->card_index = card_index; + __entry->reg = reg; + { + u32 *ptr = __get_dynamic_array(self_id_receive); + int i; + + for (i = 0; i < __get_dynamic_array_len(self_id_receive) / QUADLET_SIZE; ++i) + ptr[i] = cond_le32_to_cpu(self_id_receive[i], has_be_header_quirk); + } + ), + TP_printk( + "card_index=%u is_error=%s generation_at_bus_reset=%u generation_at_completion=%u timestamp=0x%04x packet_data=%s", + __entry->card_index, + SELF_ID_COUNT_IS_ERROR(__entry->reg) ? "true" : "false", + SELF_ID_COUNT_GET_GENERATION(__entry->reg), + SELF_ID_RECEIVE_Q0_GET_GENERATION(((const u32 *)__get_dynamic_array(self_id_receive))[0]), + SELF_ID_RECEIVE_Q0_GET_TIMESTAMP(((const u32 *)__get_dynamic_array(self_id_receive))[0]), + __print_array(((const u32 *)__get_dynamic_array(self_id_receive)) + 1, + (__get_dynamic_array_len(self_id_receive) / QUADLET_SIZE) - 1, QUADLET_SIZE) + ) +); + +#undef SELF_ID_COUNT_IS_ERROR +#undef SELF_ID_COUNT_GET_GENERATION +#undef SELF_ID_RECEIVE_Q0_GET_GENERATION +#undef SELF_ID_RECEIVE_Q0_GET_TIMESTAMP + +#undef QUADLET_SIZE + #endif // _FIREWIRE_OHCI_TRACE_EVENT_H #include -- cgit v1.2.3 From a19621ed4e0ac9e1d296d07dc8ff8c27d5c03b43 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 15 May 2024 15:07:06 +0800 Subject: mm: add folio_alloc_mpol() Patch series "mm: convert to folio_alloc_mpol()". This patch (of 4): This adds a new folio_alloc_mpol() like folio_alloc() but allocate folio according to NUMA mempolicy. Link: https://lkml.kernel.org/r/20240515070709.78529-1-wangkefeng.wang@huawei.com Link: https://lkml.kernel.org/r/20240515070709.78529-2-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: Hugh Dickins Signed-off-by: Andrew Morton --- include/linux/gfp.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 7f9691d375f0..f53f76e0b17e 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -303,6 +303,8 @@ struct page *alloc_pages_noprof(gfp_t gfp, unsigned int order); struct page *alloc_pages_mpol_noprof(gfp_t gfp, unsigned int order, struct mempolicy *mpol, pgoff_t ilx, int nid); struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order); +struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int order, + struct mempolicy *mpol, pgoff_t ilx, int nid); struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct vm_area_struct *vma, unsigned long addr, bool hugepage); #else @@ -319,6 +321,11 @@ static inline struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order) { return __folio_alloc_node(gfp, order, numa_node_id()); } +static inline struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int order, + struct mempolicy *mpol, pgoff_t ilx, int nid) +{ + return folio_alloc_noprof(gfp, order); +} #define vma_alloc_folio_noprof(gfp, order, vma, addr, hugepage) \ folio_alloc_noprof(gfp, order) #endif @@ -326,6 +333,7 @@ static inline struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order) #define alloc_pages(...) alloc_hooks(alloc_pages_noprof(__VA_ARGS__)) #define alloc_pages_mpol(...) alloc_hooks(alloc_pages_mpol_noprof(__VA_ARGS__)) #define folio_alloc(...) alloc_hooks(folio_alloc_noprof(__VA_ARGS__)) +#define folio_alloc_mpol(...) alloc_hooks(folio_alloc_mpol_noprof(__VA_ARGS__)) #define vma_alloc_folio(...) alloc_hooks(vma_alloc_folio_noprof(__VA_ARGS__)) #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) -- cgit v1.2.3 From 7f83bf14603ef41a44dc907594d749a283e22c37 Mon Sep 17 00:00:00 2001 From: Ran Xiaokai Date: Wed, 15 May 2024 10:47:54 +0800 Subject: mm/huge_memory: mark racy access onhuge_anon_orders_always huge_anon_orders_always is accessed lockless, it is better to use the READ_ONCE() wrapper. This is not fixing any visible bug, hopefully this can cease some KCSAN complains in the future. Also do that for huge_anon_orders_madvise. Link: https://lkml.kernel.org/r/20240515104754889HqrahFPePOIE1UlANHVAh@zte.com.cn Signed-off-by: Ran Xiaokai Acked-by: David Hildenbrand Reviewed-by: Lu Zhongjun Reviewed-by: xu xin Cc: Yang Yang Cc: Matthew Wilcox (Oracle) Cc: Yang Shi Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 2aa986a5cd1b..088d66a54643 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -134,8 +134,8 @@ static inline bool hugepage_flags_enabled(void) * So we don't need to look at huge_anon_orders_inherit. */ return hugepage_global_enabled() || - huge_anon_orders_always || - huge_anon_orders_madvise; + READ_ONCE(huge_anon_orders_always) || + READ_ONCE(huge_anon_orders_madvise); } static inline int highest_order(unsigned long orders) -- cgit v1.2.3 From 564a2ee9f9f66ceef135b7208cff705d48ad76c4 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Wed, 22 May 2024 01:58:51 +0800 Subject: mm: remove page_file_offset and folio_file_pos These two helpers were useful for mixed usage of swap cache and page cache, which help retrieve the corresponding file or swap device offset of a page or folio. They were introduced in commit f981c5950fa8 ("mm: methods for teaching filesystems about PG_swapcache pages") and used in commit d56b4ddf7781 ("nfs: teach the NFS client how to treat PG_swapcache pages"), suppose to be used with direct_IO for swap over fs. But after commit e1209d3a7a67 ("mm: introduce ->swap_rw and use it for reads from SWP_FS_OPS swap-space"), swap with direct_IO is no more, and swap cache mapping is never exposed to fs. Now we have dropped all users of page_file_offset and folio_file_pos, so they can be deleted. Link: https://lkml.kernel.org/r/20240521175854.96038-10-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: "Huang, Ying" Cc: Anna Schumaker Cc: Barry Song Cc: Chao Yu Cc: Chris Li Cc: David Hildenbrand Cc: David Howells Cc: Hugh Dickins Cc: Ilya Dryomov Cc: Jaegeuk Kim Cc: Jeff Layton Cc: Marc Dionne Cc: Matthew Wilcox (Oracle) Cc: Minchan Kim Cc: NeilBrown Cc: Ryan Roberts Cc: Ryusuke Konishi Cc: Trond Myklebust Cc: Xiubo Li Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 59f1df0cde5a..78f2cd8c73ff 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -932,11 +932,6 @@ static inline loff_t page_offset(struct page *page) return ((loff_t)page->index) << PAGE_SHIFT; } -static inline loff_t page_file_offset(struct page *page) -{ - return ((loff_t)page_index(page)) << PAGE_SHIFT; -} - /** * folio_pos - Returns the byte position of this folio in its file. * @folio: The folio. @@ -946,18 +941,6 @@ static inline loff_t folio_pos(struct folio *folio) return page_offset(&folio->page); } -/** - * folio_file_pos - Returns the byte position of this folio in its file. - * @folio: The folio. - * - * This differs from folio_pos() for folios which belong to a swap file. - * NFS is the only filesystem today which needs to use folio_file_pos(). - */ -static inline loff_t folio_file_pos(struct folio *folio) -{ - return page_file_offset(&folio->page); -} - /* * Get the offset in PAGE_SIZE (even for hugetlb folios). */ -- cgit v1.2.3 From 05b0c7edad9b8a5ccf1b46b01e1b96fcd10b50d8 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Wed, 22 May 2024 01:58:52 +0800 Subject: mm: drop page_index and simplify folio_index There are two helpers for retrieving the index within address space for mixed usage of swap cache and page cache: - page_index - folio_index This commit drops page_index, as we have eliminated all users, and converts folio_index's helper __page_file_index to use folio to avoid the page conversion. Link: https://lkml.kernel.org/r/20240521175854.96038-11-ryncsn@gmail.com Signed-off-by: Kairui Song Reviewed-by: "Huang, Ying" Cc: Anna Schumaker Cc: Barry Song Cc: Chao Yu Cc: Chris Li Cc: David Hildenbrand Cc: David Howells Cc: Hugh Dickins Cc: Ilya Dryomov Cc: Jaegeuk Kim Cc: Jeff Layton Cc: Marc Dionne Cc: Matthew Wilcox (Oracle) Cc: Minchan Kim Cc: NeilBrown Cc: Ryan Roberts Cc: Ryusuke Konishi Cc: Trond Myklebust Cc: Xiubo Li Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- include/linux/mm.h | 13 ------------- include/linux/pagemap.h | 8 ++++---- 2 files changed, 4 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index eb7c96d24ac0..99174fac3d47 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2295,19 +2295,6 @@ static inline void *folio_address(const struct folio *folio) return page_address(&folio->page); } -extern pgoff_t __page_file_index(struct page *page); - -/* - * Return the pagecache index of the passed page. Regular pagecache pages - * use ->index whereas swapcache pages use swp_offset(->private) - */ -static inline pgoff_t page_index(struct page *page) -{ - if (unlikely(PageSwapCache(page))) - return __page_file_index(page); - return page->index; -} - /* * Return true only if the page has been allocated with * ALLOC_NO_WATERMARKS and the low watermark was not diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 78f2cd8c73ff..1586b5c21e81 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -792,7 +792,7 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping, mapping_gfp_mask(mapping)); } -#define swapcache_index(folio) __page_file_index(&(folio)->page) +extern pgoff_t __folio_swap_cache_index(struct folio *folio); /** * folio_index - File index of a folio. @@ -807,9 +807,9 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping, */ static inline pgoff_t folio_index(struct folio *folio) { - if (unlikely(folio_test_swapcache(folio))) - return swapcache_index(folio); - return folio->index; + if (unlikely(folio_test_swapcache(folio))) + return __folio_swap_cache_index(folio); + return folio->index; } /** -- cgit v1.2.3 From 63818aaf0da8c036d600424ac961620dcdc13ce2 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Mon, 20 May 2024 15:44:07 -0700 Subject: mm/hugetlb: remove {Set,Clear}Hpage macros All users have been converted to use the folio version of these macros, we can safely remove the page based interface. Link: https://lkml.kernel.org/r/20240520224407.110062-1-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Reviewed-by: Oscar Salvador Cc: Muchun Song Cc: Peter Xu Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 2b3c3a404769..15a58f69782c 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -625,18 +625,14 @@ static __always_inline \ void folio_set_hugetlb_##flname(struct folio *folio) \ { void *private = &folio->private; \ set_bit(HPG_##flname, private); \ - } \ -static inline void SetHPage##uname(struct page *page) \ - { set_bit(HPG_##flname, &(page->private)); } + } #define CLEARHPAGEFLAG(uname, flname) \ static __always_inline \ void folio_clear_hugetlb_##flname(struct folio *folio) \ { void *private = &folio->private; \ clear_bit(HPG_##flname, private); \ - } \ -static inline void ClearHPage##uname(struct page *page) \ - { clear_bit(HPG_##flname, &(page->private)); } + } #else #define TESTHPAGEFLAG(uname, flname) \ static inline bool \ @@ -648,15 +644,11 @@ static inline int HPage##uname(struct page *page) \ #define SETHPAGEFLAG(uname, flname) \ static inline void \ folio_set_hugetlb_##flname(struct folio *folio) \ - { } \ -static inline void SetHPage##uname(struct page *page) \ { } #define CLEARHPAGEFLAG(uname, flname) \ static inline void \ folio_clear_hugetlb_##flname(struct folio *folio) \ - { } \ -static inline void ClearHPage##uname(struct page *page) \ { } #endif -- cgit v1.2.3 From 6ad28e7e52e2bae76b1d3eb4466999d04d50db92 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 22 May 2024 14:57:13 +0200 Subject: mm/rmap: sanity check that zeropages are not passed to RMAP Using insert_page() we might have previously ended up passing the zeropage into rmap code. Make sure that won't happen again. Note that we won't check the huge zeropage for now, which might still end up in RMAP code. Link: https://lkml.kernel.org/r/20240522125713.775114-4-david@redhat.com Signed-off-by: David Hildenbrand Cc: Dan Williams Cc: Vincent Donnefort Signed-off-by: Andrew Morton --- include/linux/rmap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 7229b9baf20d..5cb0d419a1d7 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -200,6 +200,9 @@ static inline void __folio_rmap_sanity_checks(struct folio *folio, /* hugetlb folios are handled separately. */ VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); + /* When (un)mapping zeropages, we should never touch ref+mapcount. */ + VM_WARN_ON_FOLIO(is_zero_folio(folio), folio); + /* * TODO: we get driver-allocated folios that have nothing to do with * the rmap using vm_insert_page(); therefore, we cannot assume that -- cgit v1.2.3 From 23b1b44e6c61295084284aa7d87db863a7802b92 Mon Sep 17 00:00:00 2001 From: Bang Li Date: Wed, 22 May 2024 14:12:02 +0800 Subject: mm: add update_mmu_tlb_range() Patch series "Add update_mmu_tlb_range() to simplify code", v4. This series of commits mainly adds the update_mmu_tlb_range() to batch update tlb in an address range and implement update_mmu_tlb() using update_mmu_tlb_range(). After commit 19eaf44954df ("mm: thp: support allocation of anonymous multi-size THP"), We may need to batch update tlb of a certain address range by calling update_mmu_tlb() in a loop. Using the update_mmu_tlb_range(), we can simplify the code and possibly reduce the execution of some unnecessary code in some architectures. This patch (of 3): Add update_mmu_tlb_range(), we can batch update tlb of an address range. Link: https://lkml.kernel.org/r/20240522061204.117421-1-libang.li@antgroup.com Link: https://lkml.kernel.org/r/20240522061204.117421-2-libang.li@antgroup.com Signed-off-by: Bang Li Acked-by: David Hildenbrand Cc: Chris Zankel Cc: Huacai Chen Cc: Lance Yang Cc: Max Filippov Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Ryan Roberts Cc: Thomas Bogendoerfer Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 18019f037bae..17d1caee39ab 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -729,6 +729,13 @@ static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr, * fault. This function updates TLB only, do nothing with cache or others. * It is the difference with function update_mmu_cache. */ +#ifndef update_mmu_tlb_range +static inline void update_mmu_tlb_range(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, unsigned int nr) +{ +} +#endif + #ifndef __HAVE_ARCH_UPDATE_MMU_TLB static inline void update_mmu_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) -- cgit v1.2.3 From 8f65aa32239f1c3f11b7a25bd5921223bafc5fed Mon Sep 17 00:00:00 2001 From: Bang Li Date: Wed, 22 May 2024 14:12:03 +0800 Subject: mm: implement update_mmu_tlb() using update_mmu_tlb_range() Let's make update_mmu_tlb() simply a generic wrapper around update_mmu_tlb_range(). Only the latter can now be overridden by the architecture. We can now remove __HAVE_ARCH_UPDATE_MMU_TLB as well. Link: https://lkml.kernel.org/r/20240522061204.117421-3-libang.li@antgroup.com Signed-off-by: Bang Li Acked-by: David Hildenbrand Cc: Chris Zankel Cc: Huacai Chen Cc: Lance Yang Cc: Max Filippov Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Ryan Roberts Cc: Thomas Bogendoerfer Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 17d1caee39ab..117b807e3f89 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -736,13 +736,11 @@ static inline void update_mmu_tlb_range(struct vm_area_struct *vma, } #endif -#ifndef __HAVE_ARCH_UPDATE_MMU_TLB static inline void update_mmu_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { + update_mmu_tlb_range(vma, address, ptep, 1); } -#define __HAVE_ARCH_UPDATE_MMU_TLB -#endif /* * Some architectures may be able to avoid expensive synchronization -- cgit v1.2.3 From b8b9488d50b7150bd4830dfff487e8d4ef6589ba Mon Sep 17 00:00:00 2001 From: Jane Chu Date: Fri, 24 May 2024 15:53:04 -0600 Subject: mm/memory-failure: improve memory failure action_result messages Added two explicit MF_MSG messages describing failure in get_hwpoison_page. Attemped to document the definition of various action names, and made a few adjustment to the action_result() calls. Link: https://lkml.kernel.org/r/20240524215306.2705454-4-jane.chu@oracle.com Signed-off-by: Jane Chu Reviewed-by: Oscar Salvador Acked-by: Miaohe Lin Cc: Naoya Horiguchi Cc: Oscar Salvador Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 ++ include/ras/ras_event.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 99174fac3d47..f4bf94ca99e3 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4096,6 +4096,7 @@ enum mf_action_page_type { MF_MSG_DIFFERENT_COMPOUND, MF_MSG_HUGE, MF_MSG_FREE_HUGE, + MF_MSG_GET_HWPOISON, MF_MSG_UNMAP_FAILED, MF_MSG_DIRTY_SWAPCACHE, MF_MSG_CLEAN_SWAPCACHE, @@ -4109,6 +4110,7 @@ enum mf_action_page_type { MF_MSG_BUDDY, MF_MSG_DAX, MF_MSG_UNSPLIT_THP, + MF_MSG_ALREADY_POISONED, MF_MSG_UNKNOWN, }; diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 7c47151d5c72..cf7f19b7ce64 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -360,6 +360,7 @@ TRACE_EVENT(aer_event, EM ( MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking" ) \ EM ( MF_MSG_HUGE, "huge page" ) \ EM ( MF_MSG_FREE_HUGE, "free huge page" ) \ + EM ( MF_MSG_GET_HWPOISON, "get hwpoison page" ) \ EM ( MF_MSG_UNMAP_FAILED, "unmapping failed page" ) \ EM ( MF_MSG_DIRTY_SWAPCACHE, "dirty swapcache page" ) \ EM ( MF_MSG_CLEAN_SWAPCACHE, "clean swapcache page" ) \ @@ -373,6 +374,7 @@ TRACE_EVENT(aer_event, EM ( MF_MSG_BUDDY, "free buddy page" ) \ EM ( MF_MSG_DAX, "dax page" ) \ EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" ) \ + EM ( MF_MSG_ALREADY_POISONED, "already poisoned" ) \ EMe ( MF_MSG_UNKNOWN, "unknown page" ) /* -- cgit v1.2.3 From 188f87f2648b13f5de17d5e068f18d317e0c1f98 Mon Sep 17 00:00:00 2001 From: Eric Chanudet Date: Wed, 22 May 2024 16:38:01 -0400 Subject: mm/mm_init: use node's number of cpus in deferred_page_init_max_threads x86_64 is already using the node's cpu as maximum threads. Make that the default for all archs setting DEFERRED_STRUCT_PAGE_INIT. This returns to the behavior prior making the function arch-specific with commit ecd096506922 ("mm: make deferred init's max threads arch-specific"). Setting DEFERRED_STRUCT_PAGE_INIT and testing on a few arm64 platforms shows faster deferred_init_memmap completions: | | x13s | SA8775p-ride | Ampere R137-P31 | Ampere HR330 | | | Metal, 32GB | VM, 36GB | VM, 58GB | Metal, 128GB | | | 8cpus | 8cpus | 8cpus | 32cpus | |---------|-------------|--------------|-----------------|--------------| | threads | ms (%) | ms (%) | ms (%) | ms (%) | |---------|-------------|--------------|-----------------|--------------| | 1 | 108 (0%) | 72 (0%) | 224 (0%) | 324 (0%) | | cpus | 24 (-77%) | 36 (-50%) | 40 (-82%) | 56 (-82%) | Michael Ellerman reported: : On a machine here (1TB, 40 cores, 4KB pages) the existing code gives: : : [ 0.500124] node 2 deferred pages initialised in 210ms : [ 0.515790] node 3 deferred pages initialised in 230ms : [ 0.516061] node 0 deferred pages initialised in 230ms : [ 0.516522] node 7 deferred pages initialised in 230ms : [ 0.516672] node 4 deferred pages initialised in 230ms : [ 0.516798] node 6 deferred pages initialised in 230ms : [ 0.517051] node 5 deferred pages initialised in 230ms : [ 0.523887] node 1 deferred pages initialised in 240ms : : vs with the patch: : : [ 0.379613] node 0 deferred pages initialised in 90ms : [ 0.380388] node 1 deferred pages initialised in 90ms : [ 0.380540] node 4 deferred pages initialised in 100ms : [ 0.390239] node 6 deferred pages initialised in 100ms : [ 0.390249] node 2 deferred pages initialised in 100ms : [ 0.390786] node 3 deferred pages initialised in 110ms : [ 0.396721] node 5 deferred pages initialised in 110ms : [ 0.397095] node 7 deferred pages initialised in 110ms : : Which is a nice speedup. [echanude@redhat.com: v3] Link: https://lkml.kernel.org/r/20240528185455.643227-4-echanude@redhat.com Link: https://lkml.kernel.org/r/20240522203758.626932-4-echanude@redhat.com Signed-off-by: Eric Chanudet Tested-by: Michael Ellerman (powerpc) Reviewed-by: Baoquan He Acked-by: Alexander Gordeev Acked-by: Mike Rapoport (IBM) Cc: Andy Lutomirski Cc: Borislav Petkov (AMD) Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Nicholas Piggin Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- include/linux/memblock.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index e2082240586d..40c62aca36ec 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -335,8 +335,6 @@ void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, for (; i != U64_MAX; \ __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end)) -int __init deferred_page_init_max_threads(const struct cpumask *node_cpumask); - #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ /** -- cgit v1.2.3 From ffc3c8a631eeadd0de63605cecfb6ba544245352 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 24 May 2024 09:49:50 +0800 Subject: mm: memcontrol: remove page_memcg() The page_memcg() only called by mod_memcg_page_state(), so squash it to cleanup page_memcg(). Link: https://lkml.kernel.org/r/20240524014950.187805-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Matthew Wilcox Cc: Muchun Song Cc: Roman Gushchin Cc: Shakeel Butt Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 030d34e9d117..3d1599146afe 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -443,11 +443,6 @@ static inline struct mem_cgroup *folio_memcg(struct folio *folio) return __folio_memcg(folio); } -static inline struct mem_cgroup *page_memcg(struct page *page) -{ - return folio_memcg(page_folio(page)); -} - /** * folio_memcg_rcu - Locklessly get the memory cgroup associated with a folio. * @folio: Pointer to the folio. @@ -1014,7 +1009,7 @@ static inline void mod_memcg_page_state(struct page *page, return; rcu_read_lock(); - memcg = page_memcg(page); + memcg = folio_memcg(page_folio(page)); if (memcg) mod_memcg_state(memcg, idx, val); rcu_read_unlock(); @@ -1133,11 +1128,6 @@ static inline struct mem_cgroup *folio_memcg(struct folio *folio) return NULL; } -static inline struct mem_cgroup *page_memcg(struct page *page) -{ - return NULL; -} - static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio) { WARN_ON_ONCE(!rcu_read_lock_held()); @@ -1636,7 +1626,7 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec, spin_unlock_irqrestore(&lruvec->lru_lock, flags); } -/* Test requires a stable page->memcg binding, see page_memcg() */ +/* Test requires a stable folio->memcg binding, see folio_memcg() */ static inline bool folio_matches_lruvec(struct folio *folio, struct lruvec *lruvec) { -- cgit v1.2.3 From 06668257a355a05483c188e35a18c80471d06987 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 24 May 2024 19:18:10 +0100 Subject: mm: remove page_mapping() All callers are now converted, delete this compatibility wrapper. Also fix up some comments which referred to page_mapping. Link: https://lkml.kernel.org/r/20240423225552.4113447-7-willy@infradead.org Link: https://lkml.kernel.org/r/20240524181813.698813-1-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: David Hildenbrand Cc: Eric Biggers Cc: Sidhartha Kumar Signed-off-by: Andrew Morton --- include/linux/buffer_head.h | 2 +- include/linux/page-flags.h | 23 ++++++++++++----------- include/linux/pagemap.h | 1 - 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index e022e40b099e..14acf1bbe0ce 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -53,7 +53,7 @@ typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); * filesystem and block layers. Nowadays the basic I/O unit * is the bio, and buffer_heads are used for extracting block * mappings (via a get_block_t call), for tracking state within - * a page (via a page_mapping) and for wrapping bio submission + * a folio (via a folio_mapping) and for wrapping bio submission * for backward compatibility reasons (e.g. submit_bh). */ struct buffer_head { diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index b9e914e1face..813eea2efe26 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -655,27 +655,28 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) #endif /* - * On an anonymous page mapped into a user virtual memory area, - * page->mapping points to its anon_vma, not to a struct address_space; + * On an anonymous folio mapped into a user virtual memory area, + * folio->mapping points to its anon_vma, not to a struct address_space; * with the PAGE_MAPPING_ANON bit set to distinguish it. See rmap.h. * * On an anonymous page in a VM_MERGEABLE area, if CONFIG_KSM is enabled, * the PAGE_MAPPING_MOVABLE bit may be set along with the PAGE_MAPPING_ANON - * bit; and then page->mapping points, not to an anon_vma, but to a private + * bit; and then folio->mapping points, not to an anon_vma, but to a private * structure which KSM associates with that merged page. See ksm.h. * * PAGE_MAPPING_KSM without PAGE_MAPPING_ANON is used for non-lru movable - * page and then page->mapping points to a struct movable_operations. + * page and then folio->mapping points to a struct movable_operations. * - * Please note that, confusingly, "page_mapping" refers to the inode - * address_space which maps the page from disk; whereas "page_mapped" - * refers to user virtual address space into which the page is mapped. + * Please note that, confusingly, "folio_mapping" refers to the inode + * address_space which maps the folio from disk; whereas "folio_mapped" + * refers to user virtual address space into which the folio is mapped. * * For slab pages, since slab reuses the bits in struct page to store its - * internal states, the page->mapping does not exist as such, nor do these - * flags below. So in order to avoid testing non-existent bits, please - * make sure that PageSlab(page) actually evaluates to false before calling - * the following functions (e.g., PageAnon). See mm/slab.h. + * internal states, the folio->mapping does not exist as such, nor do + * these flags below. So in order to avoid testing non-existent bits, + * please make sure that folio_test_slab(folio) actually evaluates to + * false before calling the following functions (e.g., folio_test_anon). + * See mm/slab.h. */ #define PAGE_MAPPING_ANON 0x1 #define PAGE_MAPPING_MOVABLE 0x2 diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 1586b5c21e81..e37e16ebff7a 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -426,7 +426,6 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping) #endif } -struct address_space *page_mapping(struct page *); struct address_space *folio_mapping(struct folio *); struct address_space *swapcache_mapping(struct folio *); -- cgit v1.2.3 From 1df07243483c1a32c8f2f93b06dc52a583a4dd1c Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 24 May 2024 13:36:18 +0800 Subject: rmap: remove DEFINE_PAGE_VMA_WALK() This are no users since commit 40d707f33db5 ("mm/ksm: use folio in write_protect_page"), so remove DEFINE_PAGE_VMA_WALK(). Link: https://lkml.kernel.org/r/20240524053618.208895-1-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: David Hildenbrand Cc: Alex Shi Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- include/linux/rmap.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 5cb0d419a1d7..bb53e5920b88 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -684,16 +684,6 @@ struct page_vma_mapped_walk { unsigned int flags; }; -#define DEFINE_PAGE_VMA_WALK(name, _page, _vma, _address, _flags) \ - struct page_vma_mapped_walk name = { \ - .pfn = page_to_pfn(_page), \ - .nr_pages = compound_nr(_page), \ - .pgoff = page_to_pgoff(_page), \ - .vma = _vma, \ - .address = _address, \ - .flags = _flags, \ - } - #define DEFINE_FOLIO_VMA_WALK(name, _folio, _vma, _address, _flags) \ struct page_vma_mapped_walk name = { \ .pfn = folio_pfn(_folio), \ -- cgit v1.2.3 From 940d6683c79950b21b3762124eabfa9b2f6fee96 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 24 May 2024 13:28:42 +0800 Subject: mm: migrate: remove migrate_folio_extra() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit migrate_folio_extra() is only called in migrate.c now, convert it a static function and take a new src_private argument which could be shared by migrate_folio() and filemap_migrate_folio() to simplify code a bit. Link: https://lkml.kernel.org/r/20240524052843.182275-5-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Jane Chu Cc: Alistair Popple Cc: Benjamin LaHaise Cc: David Hildenbrand Cc: Hugh Dickins Cc: Jérôme Glisse Cc: Jiaqi Yan Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Muchun Song Cc: Naoya Horiguchi Cc: Tony Luck Cc: Vishal Moola (Oracle) Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/migrate.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 2ce13e8a309b..517f70b70620 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -63,8 +63,6 @@ extern const char *migrate_reason_names[MR_TYPES]; #ifdef CONFIG_MIGRATION void putback_movable_pages(struct list_head *l); -int migrate_folio_extra(struct address_space *mapping, struct folio *dst, - struct folio *src, enum migrate_mode mode, int extra_count); int migrate_folio(struct address_space *mapping, struct folio *dst, struct folio *src, enum migrate_mode mode); int migrate_pages(struct list_head *l, new_folio_t new, free_folio_t free, -- cgit v1.2.3 From 906632843d00a4a42072b19c423b30a9e7adef3c Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 24 May 2024 13:28:43 +0800 Subject: mm: remove MIGRATE_SYNC_NO_COPY mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 2916ecc0f9d4 ("mm/migrate: new migrate mode MIGRATE_SYNC_NO_COPY") introduce a new MIGRATE_SYNC_NO_COPY mode to allow to offload the copy to a device DMA engine, which is only used __migrate_device_pages() to decide whether or not copy the old page, and the MIGRATE_SYNC_NO_COPY mode only set in hmm, as the MIGRATE_SYNC_NO_COPY set is removed by previous cleanup, it seems that we could remove the unnecessary MIGRATE_SYNC_NO_COPY. Link: https://lkml.kernel.org/r/20240524052843.182275-6-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Jane Chu Cc: Alistair Popple Cc: Benjamin LaHaise Cc: David Hildenbrand Cc: Hugh Dickins Cc: Jérôme Glisse Cc: Jiaqi Yan Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Muchun Song Cc: Naoya Horiguchi Cc: Tony Luck Cc: Vishal Moola (Oracle) Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/migrate_mode.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h index f37cc03f9369..9fb482bb7323 100644 --- a/include/linux/migrate_mode.h +++ b/include/linux/migrate_mode.h @@ -7,16 +7,11 @@ * on most operations but not ->writepage as the potential stall time * is too significant * MIGRATE_SYNC will block when migrating pages - * MIGRATE_SYNC_NO_COPY will block when migrating pages but will not copy pages - * with the CPU. Instead, page copy happens outside the migratepage() - * callback and is likely using a DMA engine. See migrate_vma() and HMM - * (mm/hmm.c) for users of this mode. */ enum migrate_mode { MIGRATE_ASYNC, MIGRATE_SYNC_LIGHT, MIGRATE_SYNC, - MIGRATE_SYNC_NO_COPY, }; enum migrate_reason { -- cgit v1.2.3 From ebfba0045176cb013f49cb3e5bd9f0b16eba203c Mon Sep 17 00:00:00 2001 From: Chuanhua Han Date: Wed, 29 May 2024 20:28:19 +1200 Subject: mm: swap: introduce swap_free_nr() for batched swap_free() Patch series "large folios swap-in: handle refault cases first", v5. This patchset is extracted from the large folio swapin series[1], primarily addressing the handling of scenarios involving large folios in the swap cache. Currently, it is particularly focused on addressing the refaulting of mTHP, which is still undergoing reclamation. This approach aims to streamline code review and expedite the integration of this segment into the MM tree. It relies on Ryan's swap-out series[2], leveraging the helper function swap_pte_batch() introduced by that series. Presently, do_swap_page only encounters a large folio in the swap cache before the large folio is released by vmscan. However, the code should remain equally useful once we support large folio swap-in via swapin_readahead(). This approach can effectively reduce page faults and eliminate most redundant checks and early exits for MTE restoration in recent MTE patchset[3]. The large folio swap-in for SWP_SYNCHRONOUS_IO and swapin_readahead() will be split into separate patch sets and sent at a later time. [1] https://lore.kernel.org/linux-mm/20240304081348.197341-1-21cnbao@gmail.com/ [2] https://lore.kernel.org/linux-mm/20240408183946.2991168-1-ryan.roberts@arm.com/ [3] https://lore.kernel.org/linux-mm/20240322114136.61386-1-21cnbao@gmail.com/ This patch (of 6): While swapping in a large folio, we need to free swaps related to the whole folio. To avoid frequently acquiring and releasing swap locks, it is better to introduce an API for batched free. Furthermore, this new function, swap_free_nr(), is designed to efficiently handle various scenarios for releasing a specified number, nr, of swap entries. Link: https://lkml.kernel.org/r/20240529082824.150954-1-21cnbao@gmail.com Link: https://lkml.kernel.org/r/20240529082824.150954-2-21cnbao@gmail.com Signed-off-by: Chuanhua Han Co-developed-by: Barry Song Signed-off-by: Barry Song Reviewed-by: Ryan Roberts Acked-by: Chris Li Reviewed-by: "Huang, Ying" Cc: Baolin Wang Cc: David Hildenbrand Cc: Gao Xiang Cc: Hugh Dickins Cc: Johannes Weiner Cc: Kairui Song Cc: Matthew Wilcox (Oracle) Cc: Suren Baghdasaryan Cc: Yosry Ahmed Cc: Yu Zhao Cc: Zi Yan Cc: Andreas Larsson Cc: Christoph Hellwig Cc: "David S. Miller" Cc: Khalid Aziz Cc: Len Brown Cc: Pavel Machek Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton --- include/linux/swap.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index bd450023b9a4..0f41fe49c9dc 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -478,6 +478,7 @@ extern void swap_shmem_alloc(swp_entry_t); extern int swap_duplicate(swp_entry_t); extern int swapcache_prepare(swp_entry_t); extern void swap_free(swp_entry_t); +extern void swap_free_nr(swp_entry_t entry, int nr_pages); extern void swapcache_free_entries(swp_entry_t *entries, int n); extern void free_swap_and_cache_nr(swp_entry_t entry, int nr); int swap_type_of(dev_t device, sector_t offset); @@ -559,6 +560,10 @@ static inline void swap_free(swp_entry_t swp) { } +static inline void swap_free_nr(swp_entry_t entry, int nr_pages) +{ +} + static inline void put_swap_folio(struct folio *folio, swp_entry_t swp) { } -- cgit v1.2.3 From 54f7a49c20ebb5189980c53e6e66709d22bee572 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Wed, 29 May 2024 20:28:20 +1200 Subject: mm: remove the implementation of swap_free() and always use swap_free_nr() To streamline maintenance efforts, we propose removing the implementation of swap_free(). Instead, we can simply invoke swap_free_nr() with nr set to 1. swap_free_nr() is designed with a bitmap consisting of only one long, resulting in overhead that can be ignored for cases where nr equals 1. A prime candidate for leveraging swap_free_nr() lies within kernel/power/swap.c. Implementing this change facilitates the adoption of batch processing for hibernation. Link: https://lkml.kernel.org/r/20240529082824.150954-3-21cnbao@gmail.com Signed-off-by: Barry Song Suggested-by: "Huang, Ying" Reviewed-by: "Huang, Ying" Acked-by: Chris Li Reviewed-by: Ryan Roberts Cc: "Rafael J. Wysocki" Cc: Pavel Machek Cc: Len Brown Cc: Hugh Dickins Cc: Christoph Hellwig Cc: Andreas Larsson Cc: Baolin Wang Cc: Chuanhua Han Cc: David Hildenbrand Cc: "David S. Miller" Cc: Gao Xiang Cc: Johannes Weiner Cc: Kairui Song Cc: Khalid Aziz Cc: Matthew Wilcox (Oracle) Cc: Suren Baghdasaryan Cc: Yosry Ahmed Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/swap.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 0f41fe49c9dc..d33ce740b695 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -477,7 +477,6 @@ extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern void swap_shmem_alloc(swp_entry_t); extern int swap_duplicate(swp_entry_t); extern int swapcache_prepare(swp_entry_t); -extern void swap_free(swp_entry_t); extern void swap_free_nr(swp_entry_t entry, int nr_pages); extern void swapcache_free_entries(swp_entry_t *entries, int n); extern void free_swap_and_cache_nr(swp_entry_t entry, int nr); @@ -556,10 +555,6 @@ static inline int swapcache_prepare(swp_entry_t swp) return 0; } -static inline void swap_free(swp_entry_t swp) -{ -} - static inline void swap_free_nr(swp_entry_t entry, int nr_pages) { } @@ -608,6 +603,11 @@ static inline void free_swap_and_cache(swp_entry_t entry) free_swap_and_cache_nr(entry, 1); } +static inline void swap_free(swp_entry_t entry) +{ + swap_free_nr(entry, 1); +} + #ifdef CONFIG_MEMCG static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg) { -- cgit v1.2.3 From 29f252cdc293f4a50b5d3dcbed53701d8444614d Mon Sep 17 00:00:00 2001 From: Barry Song Date: Wed, 29 May 2024 20:28:22 +1200 Subject: mm: introduce arch_do_swap_page_nr() which allows restore metadata for nr pages Should do_swap_page() have the capability to directly map a large folio, metadata restoration becomes necessary for a specified number of pages denoted as nr. It's important to highlight that metadata restoration is solely required by the SPARC platform, which, however, does not enable THP_SWAP. Consequently, in the present kernel configuration, there exists no practical scenario where users necessitate the restoration of nr metadata. Platforms implementing THP_SWAP might invoke this function with nr values exceeding 1, subsequent to do_swap_page() successfully mapping an entire large folio. Nonetheless, their arch_do_swap_page_nr() functions remain empty. Link: https://lkml.kernel.org/r/20240529082824.150954-5-21cnbao@gmail.com Signed-off-by: Barry Song Reviewed-by: Ryan Roberts Reviewed-by: Khalid Aziz Cc: "David S. Miller" Cc: Andreas Larsson Cc: Baolin Wang Cc: Chris Li Cc: Christoph Hellwig Cc: Chuanhua Han Cc: David Hildenbrand Cc: Gao Xiang Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Johannes Weiner Cc: Kairui Song Cc: Len Brown Cc: Matthew Wilcox (Oracle) Cc: Pavel Machek Cc: "Rafael J. Wysocki" Cc: Suren Baghdasaryan Cc: Yosry Ahmed Cc: Yu Zhao Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 117b807e3f89..2f32eaccf0b9 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1089,6 +1089,15 @@ static inline int pgd_same(pgd_t pgd_a, pgd_t pgd_b) }) #ifndef __HAVE_ARCH_DO_SWAP_PAGE +static inline void arch_do_swap_page_nr(struct mm_struct *mm, + struct vm_area_struct *vma, + unsigned long addr, + pte_t pte, pte_t oldpte, + int nr) +{ + +} +#else /* * Some architectures support metadata associated with a page. When a * page is being swapped out, this metadata must be saved so it can be @@ -1097,12 +1106,17 @@ static inline int pgd_same(pgd_t pgd_a, pgd_t pgd_b) * page as metadata for the page. arch_do_swap_page() can restore this * metadata when a page is swapped back in. */ -static inline void arch_do_swap_page(struct mm_struct *mm, - struct vm_area_struct *vma, - unsigned long addr, - pte_t pte, pte_t oldpte) -{ - +static inline void arch_do_swap_page_nr(struct mm_struct *mm, + struct vm_area_struct *vma, + unsigned long addr, + pte_t pte, pte_t oldpte, + int nr) +{ + for (int i = 0; i < nr; i++) { + arch_do_swap_page(vma->vm_mm, vma, addr + i * PAGE_SIZE, + pte_advance_pfn(pte, i), + pte_advance_pfn(oldpte, i)); + } } #endif -- cgit v1.2.3 From 16540dae959d862909716d5c854e9b3aee285609 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Thu, 30 May 2024 10:14:27 -0700 Subject: mm/hugetlb: mm/memory_hotplug: use a folio in scan_movable_pages() By using a folio in scan_movable_pages() we convert the last user of the page-based hugetlb information macro functions to the folio version. After this conversion, we can safely remove the page-based definitions from include/linux/hugetlb.h. Link: https://lkml.kernel.org/r/20240530171427.242018-1-sidhartha.kumar@oracle.com Signed-off-by: Sidhartha Kumar Acked-by: David Hildenbrand Cc: Matthew Wilcox (Oracle) Cc: Muchun Song Cc: Oscar Salvador Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 15a58f69782c..279aca379b95 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -616,9 +616,7 @@ static __always_inline \ bool folio_test_hugetlb_##flname(struct folio *folio) \ { void *private = &folio->private; \ return test_bit(HPG_##flname, private); \ - } \ -static inline int HPage##uname(struct page *page) \ - { return test_bit(HPG_##flname, &(page->private)); } + } #define SETHPAGEFLAG(uname, flname) \ static __always_inline \ @@ -637,8 +635,6 @@ void folio_clear_hugetlb_##flname(struct folio *folio) \ #define TESTHPAGEFLAG(uname, flname) \ static inline bool \ folio_test_hugetlb_##flname(struct folio *folio) \ - { return 0; } \ -static inline int HPage##uname(struct page *page) \ { return 0; } #define SETHPAGEFLAG(uname, flname) \ -- cgit v1.2.3 From fefc6e6631ff43427e81f08c8e49f7787ff0213a Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Tue, 28 May 2024 09:40:50 -0700 Subject: memcg: rearrange fields of mem_cgroup_per_node Kernel test robot reported [1] performance regression for will-it-scale test suite's page_fault2 test case for the commit 70a64b7919cb ("memcg: dynamically allocate lruvec_stats"). After inspection it seems like the commit has unintentionally introduced false cache sharing. After the commit the fields of mem_cgroup_per_node which get read on the performance critical path share the cacheline with the fields which get updated often on LRU page allocations or deallocations. This has caused contention on that cacheline and the workloads which manipulates a lot of LRU pages are regressed as reported by the test report. The solution is to rearrange the fields of mem_cgroup_per_node such that the false sharing is eliminated. Let's move all the read only pointers at the start of the struct, followed by memcg-v1 only fields and at the end fields which get updated often. Experiment setup: Ran fallocate1, fallocate2, page_fault1, page_fault2 and page_fault3 from the will-it-scale test suite inside a three level memcg with /tmp mounted as tmpfs on two different machines, one a single numa node and the other one, two node machine. $ ./[testcase]_processes -t $NR_CPUS -s 50 Results for single node, 52 CPU machine: Testcase base with-patch fallocate1 1031081 1431291 (38.80 %) fallocate2 1029993 1421421 (38.00 %) page_fault1 2269440 3405788 (50.07 %) page_fault2 2375799 3572868 (50.30 %) page_fault3 28641143 28673950 ( 0.11 %) Results for dual node, 80 CPU machine: Testcase base with-patch fallocate1 2976288 3641185 (22.33 %) fallocate2 2979366 3638181 (22.11 %) page_fault1 6221790 7748245 (24.53 %) page_fault2 6482854 7847698 (21.05 %) page_fault3 28804324 28991870 ( 0.65 %) Link: https://lkml.kernel.org/r/20240528164050.2625718-1-shakeel.butt@linux.dev Fixes: 70a64b7919cb ("memcg: dynamically allocate lruvec_stats") Signed-off-by: Shakeel Butt Reported-by: kernel test robot Reviewed-by: Yosry Ahmed Reviewed-by: Roman Gushchin Cc: Feng Tang Cc: "Huang, Ying" Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Yin Fengwei Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 3d1599146afe..7403dd5926eb 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -96,23 +96,29 @@ struct mem_cgroup_reclaim_iter { * per-node information in memory controller. */ struct mem_cgroup_per_node { - struct lruvec lruvec; + /* Keep the read-only fields at the start */ + struct mem_cgroup *memcg; /* Back pointer, we cannot */ + /* use container_of */ struct lruvec_stats_percpu __percpu *lruvec_stats_percpu; struct lruvec_stats *lruvec_stats; - - unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; - - struct mem_cgroup_reclaim_iter iter; - struct shrinker_info __rcu *shrinker_info; + /* + * Memcg-v1 only stuff in middle as buffer between read mostly fields + * and update often fields to avoid false sharing. Once v1 stuff is + * moved in a separate struct, an explicit padding is needed. + */ + struct rb_node tree_node; /* RB tree node */ unsigned long usage_in_excess;/* Set to the value by which */ /* the soft limit is exceeded*/ bool on_tree; - struct mem_cgroup *memcg; /* Back pointer, we cannot */ - /* use container_of */ + + /* Fields which get updated often at the end. */ + struct lruvec lruvec; + unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; + struct mem_cgroup_reclaim_iter iter; }; struct mem_cgroup_threshold { -- cgit v1.2.3 From 21664442be1bf79094dd9d21b8833acbfbd80ea7 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 28 May 2024 16:43:13 +0200 Subject: percpu: add __this_cpu_try_cmpxchg() Add __this_cpu_try_cmpxchg() version of the percpu op. Link: https://lkml.kernel.org/r/20240528144345.5980-1-ubizjak@gmail.com Signed-off-by: Uros Bizjak Reviewed-by: Uladzislau Rezki (Sony) Acked-by: Dennis Zhou Cc: Christoph Hellwig Cc: Lorenzo Stoakes Cc: Tejun Heo Cc: Christoph Lameter Signed-off-by: Andrew Morton --- include/linux/percpu-defs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index ec3573119923..8efce7414fad 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -475,6 +475,12 @@ do { \ raw_cpu_cmpxchg(pcp, oval, nval); \ }) +#define __this_cpu_try_cmpxchg(pcp, ovalp, nval) \ +({ \ + __this_cpu_preempt_check("try_cmpxchg"); \ + raw_cpu_try_cmpxchg(pcp, ovalp, nval); \ +}) + #define __this_cpu_sub(pcp, val) __this_cpu_add(pcp, -(typeof(pcp))(val)) #define __this_cpu_inc(pcp) __this_cpu_add(pcp, 1) #define __this_cpu_dec(pcp) __this_cpu_sub(pcp, 1) -- cgit v1.2.3 From 4b98995530b77a97912230d8e1564ba7738db19c Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 11 Jun 2024 18:11:07 +0800 Subject: mm: shmem: add multi-size THP sysfs interface for anonymous shmem To support the use of mTHP with anonymous shmem, add a new sysfs interface 'shmem_enabled' in the '/sys/kernel/mm/transparent_hugepage/hugepages-kB/' directory for each mTHP to control whether shmem is enabled for that mTHP, with a value similar to the top level 'shmem_enabled', which can be set to: "always", "inherit (to inherit the top level setting)", "within_size", "advise", "never". An 'inherit' option is added to ensure compatibility with these global settings, and the options 'force' and 'deny' are dropped, which are rather testing artifacts from the old ages. By default, PMD-sized hugepages have enabled="inherit" and all other hugepage sizes have enabled="never" for '/sys/kernel/mm/transparent_hugepage/hugepages-xxkB/shmem_enabled'. In addition, if top level value is 'force', then only PMD-sized hugepages have enabled="inherit", otherwise configuration will be failed and vice versa. That means now we will avoid using non-PMD sized THP to override the global huge allocation. [baolin.wang@linux.alibaba.com: fix transhuge.rst indentation] Link: https://lkml.kernel.org/r/b189d815-998b-4dfd-ba89-218ff51313f8@linux.alibaba.com [akpm@linux-foundation.org: reflow transhuge.rst addition to 80 cols] [baolin.wang@linux.alibaba.com: move huge_shmem_orders_lock under CONFIG_SYSFS] Link: https://lkml.kernel.org/r/eb34da66-7f12-44f3-a39e-2bcc90c33354@linux.alibaba.com [akpm@linux-foundation.org: huge_memory.c needs mm_types.h] Link: https://lkml.kernel.org/r/ffddfa8b3cb4266ff963099ab78cfd7184c57ac7.1718090413.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Cc: Barry Song Cc: Daniel Gomez Cc: David Hildenbrand Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Kefeng Wang Cc: Lance Yang Cc: Pankaj Raghav Cc: Ryan Roberts Cc: Yang Shi Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 088d66a54643..6a1edd752968 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -6,6 +6,7 @@ #include #include /* only for vma_is_dax() */ +#include vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf); int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, @@ -63,6 +64,7 @@ ssize_t single_hugepage_flag_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf, enum transparent_hugepage_flag flag); extern struct kobj_attribute shmem_enabled_attr; +extern struct kobj_attribute thpsize_shmem_enabled_attr; /* * Mask of all large folio orders supported for anonymous THP; all orders up to @@ -265,6 +267,14 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma, return __thp_vma_allowable_orders(vma, vm_flags, tva_flags, orders); } +struct thpsize { + struct kobject kobj; + struct list_head node; + int order; +}; + +#define to_thpsize(kobj) container_of(kobj, struct thpsize, kobj) + enum mthp_stat_item { MTHP_STAT_ANON_FAULT_ALLOC, MTHP_STAT_ANON_FAULT_FALLBACK, -- cgit v1.2.3 From e7a2ab7b3bb5d87f99f2ea3d4481d52fc5ceb52d Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 11 Jun 2024 18:11:08 +0800 Subject: mm: shmem: add mTHP support for anonymous shmem Commit 19eaf44954df adds multi-size THP (mTHP) for anonymous pages, that can allow THP to be configured through the sysfs interface located at '/sys/kernel/mm/transparent_hugepage/hugepage-XXkb/enabled'. However, the anonymous shmem will ignore the anonymous mTHP rule configured through the sysfs interface, and can only use the PMD-mapped THP, that is not reasonable. Users expect to apply the mTHP rule for all anonymous pages, including the anonymous shmem, in order to enjoy the benefits of mTHP. For example, lower latency than PMD-mapped THP, smaller memory bloat than PMD-mapped THP, contiguous PTEs on ARM architecture to reduce TLB miss etc. In addition, the mTHP interfaces can be extended to support all shmem/tmpfs scenarios in the future, especially for the shmem mmap() case. The primary strategy is similar to supporting anonymous mTHP. Introduce a new interface '/mm/transparent_hugepage/hugepage-XXkb/shmem_enabled', which can have almost the same values as the top-level '/sys/kernel/mm/transparent_hugepage/shmem_enabled', with adding a new additional "inherit" option and dropping the testing options 'force' and 'deny'. By default all sizes will be set to "never" except PMD size, which is set to "inherit". This ensures backward compatibility with the anonymous shmem enabled of the top level, meanwhile also allows independent control of anonymous shmem enabled for each mTHP. Link: https://lkml.kernel.org/r/65796c1e72e51e15f3410195b5c2d5b6c160d411.1718090413.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Cc: Barry Song Cc: Daniel Gomez Cc: David Hildenbrand Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Kefeng Wang Cc: Lance Yang Cc: Pankaj Raghav Cc: Ryan Roberts Cc: Yang Shi Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 6a1edd752968..53b7a137f460 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -560,6 +560,16 @@ static inline bool thp_migration_supported(void) { return false; } + +static inline int highest_order(unsigned long orders) +{ + return 0; +} + +static inline int next_order(unsigned long *orders, int prev) +{ + return 0; +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline int split_folio_to_list_to_order(struct folio *folio, -- cgit v1.2.3 From 66f44583f9b617d74ffa2487e75a9c3adf344ddb Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Tue, 11 Jun 2024 18:11:10 +0800 Subject: mm: shmem: add mTHP counters for anonymous shmem Add mTHP counters for anonymous shmem. [baolin.wang@linux.alibaba.com: update Documentation/admin-guide/mm/transhuge.rst] Link: https://lkml.kernel.org/r/d86e2e7f-4141-432b-b2ba-c6691f36ef0b@linux.alibaba.com Link: https://lkml.kernel.org/r/4fd9e467d49ae4a747e428bcd821c7d13125ae67.1718090413.git.baolin.wang@linux.alibaba.com Signed-off-by: Baolin Wang Reviewed-by: Lance Yang Cc: Barry Song Cc: Daniel Gomez Cc: David Hildenbrand Cc: "Huang, Ying" Cc: Hugh Dickins Cc: Kefeng Wang Cc: Pankaj Raghav Cc: Ryan Roberts Cc: Yang Shi Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 53b7a137f460..7ad41de5eaea 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -281,6 +281,9 @@ enum mthp_stat_item { MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE, MTHP_STAT_SWPOUT, MTHP_STAT_SWPOUT_FALLBACK, + MTHP_STAT_FILE_ALLOC, + MTHP_STAT_FILE_FALLBACK, + MTHP_STAT_FILE_FALLBACK_CHARGE, __MTHP_STAT_COUNT }; -- cgit v1.2.3 From cdd9a571b7d8465353fe2e2d8d1edd8a58d82021 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 7 Jun 2024 14:23:56 +0200 Subject: fs/proc: move page_mapcount() to fs/proc/internal.h ... and rename it to folio_precise_page_mapcount(). fs/proc is the last remaining user, and that should stay that way. While at it, cleanup kpagecount_read() a bit: there are still some legacy leftovers -- when the interface was introduced it returned the page refcount, but was changed briefly afterwards to return the page mapcount. Further, some simple folio conversion. Once we stop using the per-page mapcounts of large folios, all folio_precise_page_mapcount() users will have to implement an alternative way to achieve what they are trying to achieve, possibly in a less precise way. [dan.carpenter@linaro.org: fix uninitialized variable in pagemap_pmd_range()] Link: https://lkml.kernel.org/r/9d6eaba7-92f8-4a70-8765-38a519680a87@moroto.mountain Link: https://lkml.kernel.org/r/20240607122357.115423-6-david@redhat.com Signed-off-by: David Hildenbrand Signed-off-by: Dan Carpenter Cc: Alexey Dobriyan Cc: Jonathan Corbet Cc: Kirill A. Shutemov Cc: Lance Yang Cc: Oscar Salvador Signed-off-by: Andrew Morton --- include/linux/mm.h | 27 +-------------------------- 1 file changed, 1 insertion(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index f4bf94ca99e3..7f864d5f52b7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1202,8 +1202,7 @@ static inline int is_vmalloc_or_module_addr(const void *x) /* * How many times the entire folio is mapped as a single unit (eg by a * PMD or PUD entry). This is probably not what you want, except for - * debugging purposes - it does not include PTE-mapped sub-pages; look - * at folio_mapcount() or page_mapcount() instead. + * debugging purposes or implementation of other core folio_*() primitives. */ static inline int folio_entire_mapcount(const struct folio *folio) { @@ -1221,30 +1220,6 @@ static inline void page_mapcount_reset(struct page *page) atomic_set(&(page)->_mapcount, -1); } -/** - * page_mapcount() - Number of times this precise page is mapped. - * @page: The page. - * - * The number of times this page is mapped. If this page is part of - * a large folio, it includes the number of times this page is mapped - * as part of that folio. - * - * Will report 0 for pages which cannot be mapped into userspace, eg - * slab, page tables and similar. - */ -static inline int page_mapcount(struct page *page) -{ - int mapcount = atomic_read(&page->_mapcount) + 1; - - /* Handle page_has_type() pages */ - if (mapcount < PAGE_MAPCOUNT_RESERVE + 1) - mapcount = 0; - if (unlikely(PageCompound(page))) - mapcount += folio_entire_mapcount(page_folio(page)); - - return mapcount; -} - static inline int folio_large_mapcount(const struct folio *folio) { VM_WARN_ON_FOLIO(!folio_test_large(folio), folio); -- cgit v1.2.3 From 7a581204b1fa4fed233ed3b7ffcf6847cc383dbc Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 7 Jun 2024 10:37:10 +0200 Subject: mm/highmem: reimplement totalhigh_pages() by walking zones Patch series "mm/highmem: don't track highmem pages manually". Let's remove highmem special-casing from adjust_managed_page_count(), to result in less confusion why memblock manually adjusts totalram_pages, and __free_pages_core() only adjusts the zone's managed pages -- what about the highmem pages that adjust_managed_page_count() updates? Now, we only maintain totalram_pages and a zone's managed pages independent of highmem support. We can derive the number of highmem pages simply by looking at the relevant zone's managed pages. I don't think there is any particular fast path that needs a maximum-efficient totalhigh_pages() implementation. Note that highmem memory is currently initialized using free_highmem_page()->free_reserved_page(), not __free_pages_core(). In the future we might want to also use __free_pages_core() to initialize highmem memory, to make that less special, and consider moving totalram_pages updates into __free_pages_core() [1], so we can just use adjust_managed_page_count() in there as well. Booting a simple kernel in QEMU reveals no highmem accounting change: Before: Memory: 3095448K/3145208K available (14802K kernel code, 2073K rwdata, 5000K rodata, 740K init, 556K bss, 49760K reserved, 0K cma-reserved, 2244488K highmem) After: Memory: 3095276K/3145208K available (14802K kernel code, 2073K rwdata, 5000K rodata, 740K init, 556K bss, 49932K reserved, 0K cma-reserved, 2244488K highmem) [1] https://lkml.kernel.org/r/20240601133402.2675-1-richard.weiyang@gmail.com This patch (of 2): Can we get rid of the highmem ifdef in adjust_managed_page_count()? Likely yes: we don't have that many totalhigh_pages() users, and they all don't seem to be very performance critical. So let's implement totalhigh_pages() like nr_free_highpages(), collecting information from all zones. This is now similar to what we do in si_meminfo_node() to collect the per-node highmem page count. In the common case (single node, 3-4 zones), we really shouldn't care. We could optimize a bit further (only walk ZONE_HIGHMEM and ZONE_MOVABLE if required), but there doesn't seem a real need for that. [david@redhat.com: fix build bot complaint] Link: https://lkml.kernel.org/r/b57e5bc4-eb72-40e3-add4-57dfa6e03df6@redhat.com Link: https://lkml.kernel.org/r/20240607083711.62833-1-david@redhat.com Link: https://lkml.kernel.org/r/20240607083711.62833-2-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Wei Yang Reviewed-by: Oscar Salvador Signed-off-by: Andrew Morton --- include/linux/highmem-internal.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index a3028e400a9c..65f865fbbac0 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -132,7 +132,7 @@ static inline void __kunmap_atomic(const void *addr) } unsigned int __nr_free_highpages(void); -extern atomic_long_t _totalhigh_pages; +unsigned long __totalhigh_pages(void); static inline unsigned int nr_free_highpages(void) { @@ -141,12 +141,7 @@ static inline unsigned int nr_free_highpages(void) static inline unsigned long totalhigh_pages(void) { - return (unsigned long)atomic_long_read(&_totalhigh_pages); -} - -static inline void totalhigh_pages_add(long count) -{ - atomic_long_add(count, &_totalhigh_pages); + return __totalhigh_pages(); } static inline bool is_kmap_addr(const void *x) -- cgit v1.2.3 From 90b8fab5cdc441735d388e286c715da7c85b24f1 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 7 Jun 2024 10:37:11 +0200 Subject: mm/highmem: make nr_free_highpages() return "unsigned long" It looks rather weird that totalhigh_pages() returns an "unsigned long" but nr_free_highpages() returns an "unsigned int". Let's return an "unsigned long" from nr_free_highpages() to be consistent. While at it, use a plain "0" instead of a "0UL" in the !CONFIG_HIGHMEM totalhigh_pages() implementation, to make these look alike as well. Link: https://lkml.kernel.org/r/20240607083711.62833-3-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Wei Yang Signed-off-by: Andrew Morton --- include/linux/highmem-internal.h | 8 ++++---- include/linux/highmem.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index 65f865fbbac0..dd100e849f5e 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -131,10 +131,10 @@ static inline void __kunmap_atomic(const void *addr) preempt_enable(); } -unsigned int __nr_free_highpages(void); +unsigned long __nr_free_highpages(void); unsigned long __totalhigh_pages(void); -static inline unsigned int nr_free_highpages(void) +static inline unsigned long nr_free_highpages(void) { return __nr_free_highpages(); } @@ -234,8 +234,8 @@ static inline void __kunmap_atomic(const void *addr) preempt_enable(); } -static inline unsigned int nr_free_highpages(void) { return 0; } -static inline unsigned long totalhigh_pages(void) { return 0UL; } +static inline unsigned long nr_free_highpages(void) { return 0; } +static inline unsigned long totalhigh_pages(void) { return 0; } static inline bool is_kmap_addr(const void *x) { diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 00341b56d291..fa6891e06316 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -179,7 +179,7 @@ static inline void *kmap_local_folio(struct folio *folio, size_t offset); static inline void *kmap_atomic(struct page *page); /* Highmem related interfaces for management code */ -static inline unsigned int nr_free_highpages(void); +static inline unsigned long nr_free_highpages(void); static inline unsigned long totalhigh_pages(void); #ifndef ARCH_HAS_FLUSH_ANON_PAGE -- cgit v1.2.3 From 29e847d2ade3cdff36afe095fdbeb9b5f71a197a Mon Sep 17 00:00:00 2001 From: Lance Yang Date: Fri, 14 Jun 2024 09:51:37 +0800 Subject: mm/rmap: integrate PMD-mapped folio splitting into pagewalk loop In preparation for supporting try_to_unmap_one() to unmap PMD-mapped folios, start the pagewalk first, then call split_huge_pmd_address() to split the folio. Link: https://lkml.kernel.org/r/20240614015138.31461-3-ioworker0@gmail.com Signed-off-by: Lance Yang Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Suggested-by: Baolin Wang Acked-by: Zi Yan Cc: Bang Li Cc: Barry Song Cc: Fangrui Song Cc: Jeff Xie Cc: Kefeng Wang Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Minchan Kim Cc: Muchun Song Cc: Peter Xu Cc: Ryan Roberts Cc: SeongJae Park Cc: Yang Shi Cc: Yin Fengwei Cc: Zach O'Keefe Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 6 ++++++ include/linux/rmap.h | 24 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 7ad41de5eaea..9f720b0731c4 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -428,6 +428,9 @@ static inline bool thp_migration_supported(void) return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION); } +void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address, + pmd_t *pmd, bool freeze, struct folio *folio); + #else /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline bool folio_test_pmd_mappable(struct folio *folio) @@ -490,6 +493,9 @@ static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long address, bool freeze, struct folio *folio) {} static inline void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, bool freeze, struct folio *folio) {} +static inline void split_huge_pmd_locked(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmd, + bool freeze, struct folio *folio) {} #define split_huge_pud(__vma, __pmd, __address) \ do { } while (0) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index bb53e5920b88..bf46787c8eba 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -703,6 +703,30 @@ static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw) spin_unlock(pvmw->ptl); } +/** + * page_vma_mapped_walk_restart - Restart the page table walk. + * @pvmw: Pointer to struct page_vma_mapped_walk. + * + * It restarts the page table walk when changes occur in the page + * table, such as splitting a PMD. Ensures that the PTL held during + * the previous walk is released and resets the state to allow for + * a new walk starting at the current address stored in pvmw->address. + */ +static inline void +page_vma_mapped_walk_restart(struct page_vma_mapped_walk *pvmw) +{ + WARN_ON_ONCE(!pvmw->pmd && !pvmw->pte); + + if (likely(pvmw->ptl)) + spin_unlock(pvmw->ptl); + else + WARN_ON_ONCE(1); + + pvmw->ptl = NULL; + pvmw->pmd = NULL; + pvmw->pte = NULL; +} + bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw); /* -- cgit v1.2.3 From 735ecdfaf4e802209caf34b7ac45adf448c54ccc Mon Sep 17 00:00:00 2001 From: Lance Yang Date: Fri, 14 Jun 2024 09:51:38 +0800 Subject: mm/vmscan: avoid split lazyfree THP during shrink_folio_list() When the user no longer requires the pages, they would use madvise(MADV_FREE) to mark the pages as lazy free. Subsequently, they typically would not re-write to that memory again. During memory reclaim, if we detect that the large folio and its PMD are both still marked as clean and there are no unexpected references (such as GUP), so we can just discard the memory lazily, improving the efficiency of memory reclamation in this case. On an Intel i5 CPU, reclaiming 1GiB of lazyfree THPs using mem_cgroup_force_empty() results in the following runtimes in seconds (shorter is better): -------------------------------------------- | Old | New | Change | -------------------------------------------- | 0.683426 | 0.049197 | -92.80% | -------------------------------------------- [ioworker0@gmail.com: minor changes per David] Link: https://lkml.kernel.org/r/20240622100057.3352-1-ioworker0@gmail.com Link: https://lkml.kernel.org/r/20240614015138.31461-4-ioworker0@gmail.com Signed-off-by: Lance Yang Suggested-by: Zi Yan Suggested-by: David Hildenbrand Cc: Bang Li Cc: Baolin Wang Cc: Barry Song Cc: Fangrui Song Cc: Jeff Xie Cc: Kefeng Wang Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Minchan Kim Cc: Muchun Song Cc: Peter Xu Cc: Ryan Roberts Cc: SeongJae Park Cc: Yang Shi Cc: Yin Fengwei Cc: Zach O'Keefe Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 9f720b0731c4..212cca384d7e 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -430,6 +430,8 @@ static inline bool thp_migration_supported(void) void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, bool freeze, struct folio *folio); +bool unmap_huge_pmd_locked(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmdp, struct folio *folio); #else /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -497,6 +499,13 @@ static inline void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, bool freeze, struct folio *folio) {} +static inline bool unmap_huge_pmd_locked(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp, + struct folio *folio) +{ + return false; +} + #define split_huge_pud(__vma, __pmd, __address) \ do { } while (0) -- cgit v1.2.3 From 2b33a97c94bc44468fc1d54b745269c0cf0b7bb2 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 11 Jun 2024 02:45:14 +0000 Subject: mm: zswap: rename is_zswap_enabled() to zswap_is_enabled() In preparation for introducing a similar function, rename is_zswap_enabled() to use zswap_* prefix like other zswap functions. Link: https://lkml.kernel.org/r/20240611024516.1375191-1-yosryahmed@google.com Signed-off-by: Yosry Ahmed Reviewed-by: Barry Song Reviewed-by: Nhat Pham Cc: Chengming Zhou Cc: Chris Li Cc: David Hildenbrand Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/zswap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/zswap.h b/include/linux/zswap.h index 2a85b941db97..ce5e7bfe8f1e 100644 --- a/include/linux/zswap.h +++ b/include/linux/zswap.h @@ -35,7 +35,7 @@ void zswap_swapoff(int type); void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg); void zswap_lruvec_state_init(struct lruvec *lruvec); void zswap_folio_swapin(struct folio *folio); -bool is_zswap_enabled(void); +bool zswap_is_enabled(void); #else struct zswap_lruvec_state {}; @@ -60,7 +60,7 @@ static inline void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg) {} static inline void zswap_lruvec_state_init(struct lruvec *lruvec) {} static inline void zswap_folio_swapin(struct folio *folio) {} -static inline bool is_zswap_enabled(void) +static inline bool zswap_is_enabled(void) { return false; } -- cgit v1.2.3 From 2d4d2b1cfb85cc07f6d5619acb882d8b11e55cf4 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 11 Jun 2024 02:45:15 +0000 Subject: mm: zswap: add zswap_never_enabled() Add zswap_never_enabled() to skip the xarray lookup in zswap_load() if zswap was never enabled on the system. It is implemented using static branches for efficiency, as enabling zswap should be a rare event. This could shave some cycles off zswap_load() when CONFIG_ZSWAP is used but zswap is never enabled. However, the real motivation behind this patch is two-fold: - Incoming large folio swapin work will need to fallback to order-0 folios if zswap was ever enabled, because any part of the folio could be in zswap, until proper handling of large folios with zswap is added. - A warning and recovery attempt will be added in a following change in case the above was not done incorrectly. Zswap will fail the read if the folio is large and it was ever enabled. Expose zswap_never_enabled() in the header for the swapin work to use it later. [yosryahmed@google.com: expose zswap_never_enabled() in the header] Link: https://lkml.kernel.org/r/Zmjf0Dr8s9xSW41X@google.com Link: https://lkml.kernel.org/r/20240611024516.1375191-2-yosryahmed@google.com Signed-off-by: Yosry Ahmed Reviewed-by: Nhat Pham Cc: Barry Song Cc: Chengming Zhou Cc: Chris Li Cc: David Hildenbrand Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/zswap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/zswap.h b/include/linux/zswap.h index ce5e7bfe8f1e..bf83ae5e285d 100644 --- a/include/linux/zswap.h +++ b/include/linux/zswap.h @@ -36,6 +36,7 @@ void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg); void zswap_lruvec_state_init(struct lruvec *lruvec); void zswap_folio_swapin(struct folio *folio); bool zswap_is_enabled(void); +bool zswap_never_enabled(void); #else struct zswap_lruvec_state {}; @@ -65,6 +66,11 @@ static inline bool zswap_is_enabled(void) return false; } +static inline bool zswap_never_enabled(void) +{ + return false; +} + #endif #endif /* _LINUX_ZSWAP_H */ -- cgit v1.2.3 From 15995a35247442aefa0ffe36a6dad51cb46b0918 Mon Sep 17 00:00:00 2001 From: Sourav Panda Date: Wed, 5 Jun 2024 22:27:51 +0000 Subject: mm: report per-page metadata information Today, we do not have any observability of per-page metadata and how much it takes away from the machine capacity. Thus, we want to describe the amount of memory that is going towards per-page metadata, which can vary depending on build configuration, machine architecture, and system use. This patch adds 2 fields to /proc/vmstat that can used as shown below: Accounting per-page metadata allocated by boot-allocator: /proc/vmstat:nr_memmap_boot * PAGE_SIZE Accounting per-page metadata allocated by buddy-allocator: /proc/vmstat:nr_memmap * PAGE_SIZE Accounting total Perpage metadata allocated on the machine: (/proc/vmstat:nr_memmap_boot + /proc/vmstat:nr_memmap) * PAGE_SIZE Utility for userspace: Observability: Describe the amount of memory overhead that is going to per-page metadata on the system at any given time since this overhead is not currently observable. Debugging: Tracking the changes or absolute value in struct pages can help detect anomalies as they can be correlated with other metrics in the machine (e.g., memtotal, number of huge pages, etc). page_ext overheads: Some kernel features such as page_owner page_table_check that use page_ext can be optionally enabled via kernel parameters. Having the total per-page metadata information helps users precisely measure impact. Furthermore, page-metadata metrics will reflect the amount of struct pages reliquished (or overhead reduced) when hugetlbfs pages are reserved which will vary depending on whether hugetlb vmemmap optimization is enabled or not. For background and results see: lore.kernel.org/all/20240220214558.3377482-1-souravpanda@google.com Link: https://lkml.kernel.org/r/20240605222751.1406125-1-souravpanda@google.com Signed-off-by: Sourav Panda Acked-by: David Rientjes Reviewed-by: Pasha Tatashin Cc: Alexey Dobriyan Cc: Bjorn Helgaas Cc: Chen Linxuan Cc: David Hildenbrand Cc: Greg Kroah-Hartman Cc: Ivan Babrou Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kefeng Wang Cc: Kirill A. Shutemov Cc: Liam R. Howlett Cc: Mike Kravetz Cc: Mike Rapoport (IBM) Cc: Muchun Song Cc: "Rafael J. Wysocki" Cc: Randy Dunlap Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Tomas Mudrunka Cc: Vlastimil Babka Cc: Wei Xu Cc: Yang Yang Cc: Yosry Ahmed Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 2 ++ include/linux/vmstat.h | 4 ++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 586a8f0104d7..cb7f265c2b96 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -220,6 +220,8 @@ enum node_stat_item { PGDEMOTE_KSWAPD, PGDEMOTE_DIRECT, PGDEMOTE_KHUGEPAGED, + NR_MEMMAP, /* page metadata allocated through buddy allocator */ + NR_MEMMAP_BOOT, /* page metadata allocated through boot allocator */ NR_VM_NODE_STAT_ITEMS }; diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 735eae6e272c..16b0cfa80502 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -624,4 +624,8 @@ static inline void lruvec_stat_sub_folio(struct folio *folio, { lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); } + +void __meminit mod_node_early_perpage_metadata(int nid, long delta); +void __meminit store_early_perpage_metadata(void); + #endif /* _LINUX_VMSTAT_H */ -- cgit v1.2.3 From 47179fe03588caa13a9bae642b058901709ddc55 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Wed, 12 Jun 2024 09:24:08 +0000 Subject: mm/hugetlb_cgroup: prepare cftypes based on template Unlike other cgroup subsystems, the hugetlb cgroup does not provide a static array of cftype that explicitly displays the properties, handling functions, etc., of each file. Instead, it dynamically creates the attribute of cftypes based on the hstate during the startup procedure. This reduces the readability of the code. To fix this issue, introduce two templates of cftypes, and rebuild the attributes according to the hstate to make it ready to be added to cgroup framework. Link: https://lkml.kernel.org/r/20240612092409.2027592-3-xiujianfeng@huawei.com Signed-off-by: Xiu Jianfeng Cc: Muchun Song Cc: Oscar Salvador Cc: kernel test robot From: Xiu Jianfeng Subject: mm/hugetlb_cgroup: register lockdep key for cftype Date: Tue, 18 Jun 2024 07:19:22 +0000 When CONFIG_DEBUG_LOCK_ALLOC is enabled, the following commands can trigger a bug, mount -t cgroup2 none /sys/fs/cgroup cd /sys/fs/cgroup echo "+hugetlb" > cgroup.subtree_control The log is as below: BUG: key ffff8880046d88d8 has not been registered! ------------[ cut here ]------------ DEBUG_LOCKS_WARN_ON(1) WARNING: CPU: 3 PID: 226 at kernel/locking/lockdep.c:4945 lockdep_init_map_type+0x185/0x220 Modules linked in: CPU: 3 PID: 226 Comm: bash Not tainted 6.10.0-rc4-next-20240617-g76db4c64526c #544 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:lockdep_init_map_type+0x185/0x220 Code: 00 85 c0 0f 84 6c ff ff ff 8b 3d 6a d1 85 01 85 ff 0f 85 5e ff ff ff 48 c7 c6 21 99 4a 82 48 c7 c7 60 29 49 82 e8 3b 2e f5 RSP: 0018:ffffc9000083fc30 EFLAGS: 00000282 RAX: 0000000000000000 RBX: ffffffff828dd820 RCX: 0000000000000027 RDX: ffff88803cd9cac8 RSI: 0000000000000001 RDI: ffff88803cd9cac0 RBP: ffff88800674fbb0 R08: ffffffff828ce248 R09: 00000000ffffefff R10: ffffffff8285e260 R11: ffffffff828b8eb8 R12: ffff8880046d88d8 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8880067281c0 FS: 00007f68601ea740(0000) GS:ffff88803cd80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005614f3ebc740 CR3: 000000000773a000 CR4: 00000000000006f0 Call Trace: ? __warn+0x77/0xd0 ? lockdep_init_map_type+0x185/0x220 ? report_bug+0x189/0x1a0 ? handle_bug+0x3c/0x70 ? exc_invalid_op+0x18/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? lockdep_init_map_type+0x185/0x220 __kernfs_create_file+0x79/0x100 cgroup_addrm_files+0x163/0x380 ? find_held_lock+0x2b/0x80 ? find_held_lock+0x2b/0x80 ? find_held_lock+0x2b/0x80 css_populate_dir+0x73/0x180 cgroup_apply_control_enable+0x12f/0x3a0 cgroup_subtree_control_write+0x30b/0x440 kernfs_fop_write_iter+0x13a/0x1f0 vfs_write+0x341/0x450 ksys_write+0x64/0xe0 do_syscall_64+0x4b/0x110 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f68602d9833 Code: 8b 15 61 26 0e 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 64 8b 04 25 18 00 00 00 85 c0 75 14 b8 01 00 00 00 08 RSP: 002b:00007fff9bbdf8e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000009 RCX: 00007f68602d9833 RDX: 0000000000000009 RSI: 00005614f3ebc740 RDI: 0000000000000001 RBP: 00005614f3ebc740 R08: 000000000000000a R09: 0000000000000008 R10: 00005614f3db6ba0 R11: 0000000000000246 R12: 0000000000000009 R13: 00007f68603bd6a0 R14: 0000000000000009 R15: 00007f68603b8880 For lockdep, there is a sanity check in lockdep_init_map_type(), the lock-class key must either have been allocated statically or must have been registered as a dynamic key. However the commit e18df2889ff9 ("mm/hugetlb_cgroup: prepare cftypes based on template") has changed the cftypes from static allocated objects to dynamic allocated objects, so the cft->lockdep_key must be registered proactively. [xiujianfeng@huawei.com: fix BUG()] Link: https://lkml.kernel.org/r/20240619015527.2212698-1-xiujianfeng@huawei.com Link: https://lkml.kernel.org/r/20240618071922.2127289-1-xiujianfeng@huawei.com Link: https://lore.kernel.org/all/602186b3-5ce3-41b3-90a3-134792cc2a48@samsung.com/ Fixes: e18df2889ff9 ("mm/hugetlb_cgroup: prepare cftypes based on template") Signed-off-by: Xiu Jianfeng Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202406181046.8d8b2492-oliver.sang@intel.com Tested-by: Marek Szyprowski Tested-by: SeongJae Park Closes: https://lore.kernel.org/20240618233608.400367-1-sj@kernel.org Cc: Muchun Song Cc: Oscar Salvador Signed-off-by: Andrew Morton --- include/linux/cgroup-defs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index ea48c861cd36..dc151cb0ef09 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -676,9 +676,7 @@ struct cftype { __poll_t (*poll)(struct kernfs_open_file *of, struct poll_table_struct *pt); -#ifdef CONFIG_DEBUG_LOCK_ALLOC struct lock_class_key lockdep_key; -#endif }; /* -- cgit v1.2.3 From b79d715c43354010775862cfcd2d01cb04d0de47 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Wed, 12 Jun 2024 09:24:09 +0000 Subject: mm/hugetlb_cgroup: switch to the new cftypes The previous patch has already reconstructed the cftype attributes based on the templates and saved them in dfl_cftypes and legacy_cftypes. then remove the old procedure and switch to the new cftypes. Link: https://lkml.kernel.org/r/20240612092409.2027592-4-xiujianfeng@huawei.com Signed-off-by: Xiu Jianfeng Cc: Muchun Song Cc: Oscar Salvador Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 279aca379b95..a951c0d06061 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -686,11 +686,6 @@ struct hstate { unsigned int nr_huge_pages_node[MAX_NUMNODES]; unsigned int free_huge_pages_node[MAX_NUMNODES]; unsigned int surplus_huge_pages_node[MAX_NUMNODES]; -#ifdef CONFIG_CGROUP_HUGETLB - /* cgroup control files */ - struct cftype cgroup_files_dfl[8]; - struct cftype cgroup_files_legacy[10]; -#endif char name[HSTATE_NAME_LEN]; }; -- cgit v1.2.3 From ceb32d6aa93ce3282a724f3a0828b4b84d5035f1 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Wed, 12 Jun 2024 15:18:24 +0800 Subject: mm/memory-failure: remove MF_MSG_SLAB Since commit 46df8e73a4a3 ("mm: free up PG_slab"), MF_MSG_SLAB becomes unused. Remove it. No functional change intended. Link: https://lkml.kernel.org/r/20240612071835.157004-3-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Cc: Borislav Petkov (AMD) Cc: David Hildenbrand Cc: kernel test robot Cc: Naoya Horiguchi Cc: Tony Luck Signed-off-by: Andrew Morton --- include/linux/mm.h | 1 - include/ras/ras_event.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 7f864d5f52b7..8a38adaf5480 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4067,7 +4067,6 @@ enum mf_result { enum mf_action_page_type { MF_MSG_KERNEL, MF_MSG_KERNEL_HIGH_ORDER, - MF_MSG_SLAB, MF_MSG_DIFFERENT_COMPOUND, MF_MSG_HUGE, MF_MSG_FREE_HUGE, diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index cf7f19b7ce64..9bc707fe8819 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -356,7 +356,6 @@ TRACE_EVENT(aer_event, #define MF_PAGE_TYPE \ EM ( MF_MSG_KERNEL, "reserved kernel page" ) \ EM ( MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page" ) \ - EM ( MF_MSG_SLAB, "kernel slab page" ) \ EM ( MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking" ) \ EM ( MF_MSG_HUGE, "huge page" ) \ EM ( MF_MSG_FREE_HUGE, "free huge page" ) \ -- cgit v1.2.3 From 3a78f77fd1fb82c32cb7971a8a97c5cbc83ab69e Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Wed, 12 Jun 2024 15:18:32 +0800 Subject: mm/memory-failure: move some function declarations into internal.h There are some functions only used inside mm. Move them into internal.h. No functional change intended. Link: https://lkml.kernel.org/r/20240612071835.157004-11-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202405251049.hxjwX7zO-lkp@intel.com/ Cc: Borislav Petkov (AMD) Cc: David Hildenbrand Cc: Naoya Horiguchi Cc: Tony Luck Signed-off-by: Andrew Morton --- include/linux/mm.h | 7 ------- include/linux/page-flags.h | 5 ----- include/linux/rmap.h | 2 -- 3 files changed, 14 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 8a38adaf5480..51fe207026f8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4000,7 +4000,6 @@ extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared); void num_poisoned_pages_inc(unsigned long pfn); void num_poisoned_pages_sub(unsigned long pfn, long i); -struct task_struct *task_early_kill(struct task_struct *tsk, int force_early); #else static inline void memory_failure_queue(unsigned long pfn, int flags) { @@ -4021,12 +4020,6 @@ static inline void num_poisoned_pages_sub(unsigned long pfn, long i) } #endif -#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_KSM) -void add_to_kill_ksm(struct task_struct *tsk, struct page *p, - struct vm_area_struct *vma, struct list_head *to_kill, - unsigned long ksm_addr); -#endif - #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG) extern void memblk_nr_poison_inc(unsigned long pfn); extern void memblk_nr_poison_sub(unsigned long pfn, long i); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 813eea2efe26..b041bc058fb1 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -616,11 +616,6 @@ PAGEFLAG_FALSE(Uncached, uncached) PAGEFLAG(HWPoison, hwpoison, PF_ANY) TESTSCFLAG(HWPoison, hwpoison, PF_ANY) #define __PG_HWPOISON (1UL << PG_hwpoison) -#define MAGIC_HWPOISON 0x48575053U /* HWPS */ -extern void SetPageHWPoisonTakenOff(struct page *page); -extern void ClearPageHWPoisonTakenOff(struct page *page); -extern bool take_page_off_buddy(struct page *page); -extern bool put_page_back_buddy(struct page *page); #else PAGEFLAG_FALSE(HWPoison, hwpoison) #define __PG_HWPOISON 0 diff --git a/include/linux/rmap.h b/include/linux/rmap.h index bf46787c8eba..694d3d886245 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -747,8 +747,6 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff, void remove_migration_ptes(struct folio *src, struct folio *dst, bool locked); -unsigned long page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); - /* * rmap_walk_control: To control rmap traversing for specific needs * -- cgit v1.2.3 From e36287c6e12d00f2087dc0c4899d8a9c54e22e1c Mon Sep 17 00:00:00 2001 From: Hyeongtak Ji Date: Fri, 14 Jun 2024 12:00:05 +0900 Subject: mm/damon/sysfs-schemes: add target_nid on sysfs-schemes This patch adds target_nid under /sys/kernel/mm/damon/admin/kdamonds//contexts//schemes// The 'target_nid' can be used as the destination node for DAMOS actions such as DAMOS_MIGRATE_{HOT,COLD} in the follow up patches. [sj@kernel.org: document target_nid file] Link: https://lkml.kernel.org/r/20240618213630.84846-3-sj@kernel.org Link: https://lkml.kernel.org/r/20240614030010.751-4-honggyu.kim@sk.com Signed-off-by: Hyeongtak Ji Signed-off-by: Honggyu Kim Signed-off-by: SeongJae Park Cc: Gregory Price Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Masami Hiramatsu (Google) Cc: Mathieu Desnoyers Cc: Rakie Kim Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- include/linux/damon.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index f7da65e1ac04..21d6b69a015c 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -374,6 +374,7 @@ struct damos_access_pattern { * @apply_interval_us: The time between applying the @action. * @quota: Control the aggressiveness of this scheme. * @wmarks: Watermarks for automated (in)activation of this scheme. + * @target_nid: Destination node if @action is "migrate_{hot,cold}". * @filters: Additional set of &struct damos_filter for &action. * @stat: Statistics of this scheme. * @list: List head for siblings. @@ -389,6 +390,10 @@ struct damos_access_pattern { * monitoring context are inactive, DAMON stops monitoring either, and just * repeatedly checks the watermarks. * + * @target_nid is used to set the migration target node for migrate_hot or + * migrate_cold actions, which means it's only meaningful when @action is either + * "migrate_hot" or "migrate_cold". + * * Before applying the &action to a memory region, &struct damon_operations * implementation could check pages of the region and skip &action to respect * &filters @@ -410,6 +415,9 @@ struct damos { /* public: */ struct damos_quota quota; struct damos_watermarks wmarks; + union { + int target_nid; + }; struct list_head filters; struct damos_stat stat; struct list_head list; @@ -726,7 +734,8 @@ struct damos *damon_new_scheme(struct damos_access_pattern *pattern, enum damos_action action, unsigned long apply_interval_us, struct damos_quota *quota, - struct damos_watermarks *wmarks); + struct damos_watermarks *wmarks, + int target_nid); void damon_add_scheme(struct damon_ctx *ctx, struct damos *s); void damon_destroy_scheme(struct damos *s); -- cgit v1.2.3 From ced816a76b8fd1288d48523cc48ed308964d12ed Mon Sep 17 00:00:00 2001 From: Honggyu Kim Date: Fri, 14 Jun 2024 12:00:06 +0900 Subject: mm/migrate: add MR_DAMON to migrate_reason The current patch series introduces DAMON based migration across NUMA nodes so it'd be better to have a new migrate_reason in trace events. Link: https://lkml.kernel.org/r/20240614030010.751-5-honggyu.kim@sk.com Signed-off-by: Honggyu Kim Reviewed-by: SeongJae Park Signed-off-by: SeongJae Park Cc: Gregory Price Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Hyeongtak Ji Cc: Masami Hiramatsu (Google) Cc: Mathieu Desnoyers Cc: Rakie Kim Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- include/linux/migrate_mode.h | 1 + include/trace/events/migrate.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h index 9fb482bb7323..265c4328b36a 100644 --- a/include/linux/migrate_mode.h +++ b/include/linux/migrate_mode.h @@ -24,6 +24,7 @@ enum migrate_reason { MR_CONTIG_RANGE, MR_LONGTERM_PIN, MR_DEMOTION, + MR_DAMON, MR_TYPES }; diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h index 0190ef725b43..cd01dd7b3640 100644 --- a/include/trace/events/migrate.h +++ b/include/trace/events/migrate.h @@ -22,7 +22,8 @@ EM( MR_NUMA_MISPLACED, "numa_misplaced") \ EM( MR_CONTIG_RANGE, "contig_range") \ EM( MR_LONGTERM_PIN, "longterm_pin") \ - EMe(MR_DEMOTION, "demotion") + EM( MR_DEMOTION, "demotion") \ + EMe(MR_DAMON, "damon") /* * First define the enums in the above macros to be exported to userspace -- cgit v1.2.3 From b51820ebea656be3b48bb16dcdc5ad3f203c4fd7 Mon Sep 17 00:00:00 2001 From: Honggyu Kim Date: Fri, 14 Jun 2024 12:00:07 +0900 Subject: mm/damon/paddr: introduce DAMOS_MIGRATE_COLD action for demotion This patch introduces DAMOS_MIGRATE_COLD action, which is similar to DAMOS_PAGEOUT, but migrate folios to the given 'target_nid' in the sysfs instead of swapping them out. The 'target_nid' sysfs knob informs the migration target node ID. Here is one of the example usage of this 'migrate_cold' action. $ cd /sys/kernel/mm/damon/admin/kdamonds/ $ cat contexts//schemes//action migrate_cold $ echo 2 > contexts//schemes//target_nid $ echo commit > state $ numactl -p 0 ./hot_cold 500M 600M & $ numastat -c -p hot_cold Per-node process memory usage (in MBs) PID Node 0 Node 1 Node 2 Total -------------- ------ ------ ------ ----- 701 (hot_cold) 501 0 601 1101 Since there are some common routines with pageout, many functions have similar logics between pageout and migrate cold. damon_pa_migrate_folio_list() is a minimized version of shrink_folio_list(). Link: https://lkml.kernel.org/r/20240614030010.751-6-honggyu.kim@sk.com Signed-off-by: Honggyu Kim Signed-off-by: Hyeongtak Ji Signed-off-by: SeongJae Park Cc: Gregory Price Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Masami Hiramatsu (Google) Cc: Mathieu Desnoyers Cc: Rakie Kim Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- include/linux/damon.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index 21d6b69a015c..56714b6eb0d7 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -105,6 +105,7 @@ struct damon_target { * @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE. * @DAMOS_LRU_PRIO: Prioritize the region on its LRU lists. * @DAMOS_LRU_DEPRIO: Deprioritize the region on its LRU lists. + * @DAMOS_MIGRATE_COLD: Migrate the regions prioritizing colder regions. * @DAMOS_STAT: Do nothing but count the stat. * @NR_DAMOS_ACTIONS: Total number of DAMOS actions * @@ -122,6 +123,7 @@ enum damos_action { DAMOS_NOHUGEPAGE, DAMOS_LRU_PRIO, DAMOS_LRU_DEPRIO, + DAMOS_MIGRATE_COLD, DAMOS_STAT, /* Do nothing but only record the stat */ NR_DAMOS_ACTIONS, }; -- cgit v1.2.3 From b696722d784fb3610183281d2fd514b0efe97e3b Mon Sep 17 00:00:00 2001 From: Hyeongtak Ji Date: Fri, 14 Jun 2024 12:00:08 +0900 Subject: mm/damon/paddr: introduce DAMOS_MIGRATE_HOT action for promotion This patch introduces DAMOS_MIGRATE_HOT action, which is similar to DAMOS_MIGRATE_COLD, but proritizes hot pages. It migrates pages inside the given region to the 'target_nid' NUMA node in the sysfs. Here is one of the example usage of this 'migrate_hot' action. $ cd /sys/kernel/mm/damon/admin/kdamonds/ $ cat contexts//schemes//action migrate_hot $ echo 0 > contexts//schemes//target_nid $ echo commit > state $ numactl -p 2 ./hot_cold 500M 600M & $ numastat -c -p hot_cold Per-node process memory usage (in MBs) PID Node 0 Node 1 Node 2 Total -------------- ------ ------ ------ ----- 701 (hot_cold) 501 0 601 1101 Link: https://lkml.kernel.org/r/20240614030010.751-7-honggyu.kim@sk.com Signed-off-by: Hyeongtak Ji Signed-off-by: Honggyu Kim Signed-off-by: SeongJae Park Cc: Gregory Price Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Masami Hiramatsu (Google) Cc: Mathieu Desnoyers Cc: Rakie Kim Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton --- include/linux/damon.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index 56714b6eb0d7..3d62d98d6359 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -105,6 +105,7 @@ struct damon_target { * @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE. * @DAMOS_LRU_PRIO: Prioritize the region on its LRU lists. * @DAMOS_LRU_DEPRIO: Deprioritize the region on its LRU lists. + * @DAMOS_MIGRATE_HOT: Migrate the regions prioritizing warmer regions. * @DAMOS_MIGRATE_COLD: Migrate the regions prioritizing colder regions. * @DAMOS_STAT: Do nothing but count the stat. * @NR_DAMOS_ACTIONS: Total number of DAMOS actions @@ -123,6 +124,7 @@ enum damos_action { DAMOS_NOHUGEPAGE, DAMOS_LRU_PRIO, DAMOS_LRU_DEPRIO, + DAMOS_MIGRATE_HOT, DAMOS_MIGRATE_COLD, DAMOS_STAT, /* Do nothing but only record the stat */ NR_DAMOS_ACTIONS, -- cgit v1.2.3 From 3ad1dce6c30175dfd3da29d76853a69affbf7489 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 18 Jun 2024 11:17:58 -0700 Subject: mm/damon/core: implement DAMOS quota goals online commit function Patch series "mm/damon: introduce DAMON parameters online commit function". DAMON context struct (damon_ctx) contains user requests (parameters), internal status, and operation results. For flexible usages, DAMON API users are encouraged to manually manipulate the struct. That works well for simple use cases. However, it has turned out that it is not that simple at least for online parameters udpate. It is easy to forget properly maintaining internal status and operation results. Also, such manual manipulation for online tuning is implemented multiple times on DAMON API users including DAMON sysfs interface, DAMON_RECLAIM and DAMON_LRU_SORT. As a result, we have multiple sources of bugs for same problem. Actually we found and fixed a few bugs from online parameter updating of DAMON API users. Implement a function for online DAMON parameters update in core layer, and replace DAMON API users' manual manipulation code for the use case. The core layer function could still have bugs, but this change reduces the source of bugs for the problem to one place. This patch (of 12): Implement functions for supporting online DAMOS quota goals parameters update. The function receives two DAMOS quota structs. One is the struct that currently being used by a kdamond and therefore to be updated. The other one contains the parameters to be applied to the first one. The function applies the new parameters to the destination struct while keeping/updating the internal status. The function should be called from parameters-update safe place, like DAMON callbacks. Link: https://lkml.kernel.org/r/20240618181809.82078-1-sj@kernel.org Link: https://lkml.kernel.org/r/20240618181809.82078-2-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/damon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index 3d62d98d6359..ce12c9f1b4e4 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -742,6 +742,7 @@ struct damos *damon_new_scheme(struct damos_access_pattern *pattern, int target_nid); void damon_add_scheme(struct damon_ctx *ctx, struct damos *s); void damon_destroy_scheme(struct damos *s); +int damos_commit_quota_goals(struct damos_quota *dst, struct damos_quota *src); struct damon_target *damon_new_target(void); void damon_add_target(struct damon_ctx *ctx, struct damon_target *t); -- cgit v1.2.3 From 9cb3d0b9dfce6a3258d91e6d69e418d0b4cce46a Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 18 Jun 2024 11:17:59 -0700 Subject: mm/damon/core: implement DAMON context commit function Implement functions for supporting online DAMON context level parameters update. The function receives two DAMON context structs. One is the struct that currently being used by a kdamond and therefore to be updated. The other one contains the parameters to be applied to the first one. The function applies the new parameters to the destination struct while keeping/updating the internal status and operation results. The function should be called from DAMON context-update-safe place, like DAMON callbacks. Link: https://lkml.kernel.org/r/20240618181809.82078-3-sj@kernel.org Signed-off-by: SeongJae Park Signed-off-by: Andrew Morton --- include/linux/damon.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/damon.h b/include/linux/damon.h index ce12c9f1b4e4..27c546bfc6d4 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -756,6 +756,7 @@ void damon_destroy_ctx(struct damon_ctx *ctx); int damon_set_attrs(struct damon_ctx *ctx, struct damon_attrs *attrs); void damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes, ssize_t nr_schemes); +int damon_commit_ctx(struct damon_ctx *old_ctx, struct damon_ctx *new_ctx); int damon_nr_running_ctxs(void); bool damon_is_registered_ops(enum damon_ops_id id); int damon_register_ops(struct damon_operations *ops); -- cgit v1.2.3 From 6d21dde7adc0a2deb9e0c6b06f42be3f88ca180d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 29 May 2024 13:18:59 +0200 Subject: mm: update _mapcount and page_type documentation Patch series "mm: page_type, zsmalloc and page_mapcount_reset()", v2. Wanting to remove the remaining abuser of _mapcount/page_type along with page_mapcount_reset(), I stumbled over zsmalloc, which is yet to be converted away from "struct page" [1]. Unfortunately, we cannot stop using the page_type field in zsmalloc code completely for its own purposes. All other fields in "struct page" are used one way or the other. Could we simply store a 2-byte offset value at the beginning of each page? Likely, but that will require a bit more work; and once we have memdesc we might want to move the offset in there (struct zsalloc?) again. ... but we can limit the abuse to 16 bit, glue it to a page type that must be set, and document it. page_has_type() will always successfully indicate such zsmalloc pages, and such zsmalloc pages only. We lose zsmalloc support for PAGE_SIZE > 64KB, which should be tolerable. We could use more bits from the page type, but 16 bit sounds like a good idea for now. So clarify the _mapcount/page_type documentation, use a proper page_type for zsmalloc, and remove page_mapcount_reset(). [1] https://lore.kernel.org/all/20231130101242.2590384-1-42.hyeyoo@gmail.com/ This patch (of 6): Let's make it clearer that _mapcount must no longer be used for own purposes, and how _mapcount and page_type behaves nowadays (also in the context of hugetlb folios, which are typed folios that will be mapped to user space). Move the documentation regarding "-1" over from page_mapcount_reset(), which we will remove next. Move "page_type" before "mapcount", to make it clearer what typed folios are. Link: https://lkml.kernel.org/r/20240529111904.2069608-1-david@redhat.com Link: https://lkml.kernel.org/r/20240529111904.2069608-2-david@redhat.com Signed-off-by: David Hildenbrand Tested-by: Sergey Senozhatsky [zram/zsmalloc workloads] Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Matthew Wilcox (Oracle) Cc: Mike Rapoport (IBM) Cc: Minchan Kim Signed-off-by: Andrew Morton --- include/linux/mm.h | 5 ----- include/linux/mm_types.h | 28 +++++++++++++++++----------- 2 files changed, 17 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 51fe207026f8..61a7f680f0ba 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1210,11 +1210,6 @@ static inline int folio_entire_mapcount(const struct folio *folio) return atomic_read(&folio->_entire_mapcount) + 1; } -/* - * The atomic page->_mapcount, starts from -1: so that transitions - * both from it and to it can be tracked, using atomic_inc_and_test - * and atomic_add_negative(-1). - */ static inline void page_mapcount_reset(struct page *page) { atomic_set(&(page)->_mapcount, -1); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index af3a0256fa93..278eade2ad4c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -46,9 +46,7 @@ struct mem_cgroup; * which is guaranteed to be aligned. If you use the same storage as * page->mapping, you must restore it to NULL before freeing the page. * - * If your page will not be mapped to userspace, you can also use the four - * bytes in the mapcount union, but you must call page_mapcount_reset() - * before freeing it. + * The mapcount field must not be used for own purposes. * * If you want to use the refcount field, it must be used in such a way * that other CPUs temporarily incrementing and then decrementing the @@ -152,18 +150,26 @@ struct page { union { /* This union is 4 bytes in size. */ /* - * If the page can be mapped to userspace, encodes the number - * of times this page is referenced by a page table. + * For head pages of typed folios, the value stored here + * allows for determining what this page is used for. The + * tail pages of typed folios will not store a type + * (page_type == _mapcount == -1). + * + * See page-flags.h for a list of page types which are currently + * stored here. */ - atomic_t _mapcount; + unsigned int page_type; /* - * If the page is neither PageSlab nor mappable to userspace, - * the value stored here may help determine what this page - * is used for. See page-flags.h for a list of page types - * which are currently stored here. + * For pages that are part of non-typed folios for which mappings + * are tracked via the RMAP, encodes the number of times this page + * is directly referenced by a page table. + * + * Note that the mapcount is always initialized to -1, so that + * transitions both from it and to it can be tracked, using + * atomic_inc_and_test() and atomic_add_negative(-1). */ - unsigned int page_type; + atomic_t _mapcount; }; /* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */ -- cgit v1.2.3 From 8db00ad5646171880239b7f10e333278f63d8fcf Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 29 May 2024 13:19:00 +0200 Subject: mm: allow reuse of the lower 16 bit of the page type with an actual type As long as the owner sets a page type first, we can allow reuse of the lower 16 bit: sufficient to store an offset into a 64 KiB page, which is the maximum base page size in *common* configurations (ignoring the 256 KiB variant). Restrict it to the head page. We'll use that for zsmalloc next, to set a proper type while still reusing that field to store information (offset into a base page) that cannot go elsewhere for now. Let's reserve the lower 16 bit for that purpose and for catching mapcount underflows, and let's reduce PAGE_TYPE_BASE to a single bit. Note that we will still have to overflow the mapcount quite a lot until we would actually indicate a valid page type. Start handing out the type bits from highest to lowest, to make it clearer how many bits for types we have left. Out of 15 bit we can use for types, we currently use 6. If we run out of bits before we have better typing (e.g., memdesc), we can always investigate storing a value instead [1]. [1] https://lore.kernel.org/all/00ba1dff-7c05-46e8-b0d9-a78ac1cfc198@redhat.com/ [akpm@linux-foundation.org: fix PG_hugetlb typo, per David] Link: https://lkml.kernel.org/r/20240529111904.2069608-3-david@redhat.com Signed-off-by: David Hildenbrand Tested-by: Sergey Senozhatsky [zram/zsmalloc workloads] Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Matthew Wilcox (Oracle) Cc: Mike Rapoport (IBM) Cc: Minchan Kim Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 5 +++++ include/linux/page-flags.h | 25 +++++++++++++++---------- 2 files changed, 20 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 278eade2ad4c..ef09c4eef6d3 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -157,6 +157,11 @@ struct page { * * See page-flags.h for a list of page types which are currently * stored here. + * + * Owners of typed folios may reuse the lower 16 bit of the + * head page page_type field after setting the page type, + * but must reset these 16 bit to -1 before clearing the + * page type. */ unsigned int page_type; diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index b041bc058fb1..d221a6a14764 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -941,16 +941,21 @@ PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned) */ enum pagetype { - PG_buddy = 0x00000080, - PG_offline = 0x00000100, - PG_table = 0x00000200, - PG_guard = 0x00000400, - PG_hugetlb = 0x00000800, - PG_slab = 0x00001000, - - PAGE_TYPE_BASE = 0xf0000000, - /* Reserve 0x0000007f to catch underflows of _mapcount */ - PAGE_MAPCOUNT_RESERVE = -128, + PG_buddy = 0x40000000, + PG_offline = 0x20000000, + PG_table = 0x10000000, + PG_guard = 0x08000000, + PG_hugetlb = 0x04000000, + PG_slab = 0x02000000, + + PAGE_TYPE_BASE = 0x80000000, + + /* + * Reserve 0xffff0000 - 0xfffffffe to catch _mapcount underflows and + * allow owners that set a type to reuse the lower 16 bit for their own + * purposes. + */ + PAGE_MAPCOUNT_RESERVE = ~0x0000ffff, }; #define PageType(page, flag) \ -- cgit v1.2.3 From 43d746dc49bb4c82034fce01a92fe67344d664cf Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 29 May 2024 13:19:01 +0200 Subject: mm/zsmalloc: use a proper page type Let's clean it up: use a proper page type and store our data (offset into a page) in the lower 16 bit as documented. We won't be able to support 256 KiB base pages, which is acceptable. Teach Kconfig to handle that cleanly using a new CONFIG_HAVE_ZSMALLOC. Based on this, we should do a proper "struct zsdesc" conversion, as proposed in [1]. This removes the last _mapcount/page_type offender. [1] https://lore.kernel.org/all/20231130101242.2590384-1-42.hyeyoo@gmail.com/ Link: https://lkml.kernel.org/r/20240529111904.2069608-4-david@redhat.com Signed-off-by: David Hildenbrand Tested-by: Sergey Senozhatsky [zram/zsmalloc workloads] Reviewed-by: Sergey Senozhatsky Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Matthew Wilcox (Oracle) Cc: Mike Rapoport (IBM) Cc: Minchan Kim Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index d221a6a14764..0f7c7320391e 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -947,6 +947,7 @@ enum pagetype { PG_guard = 0x08000000, PG_hugetlb = 0x04000000, PG_slab = 0x02000000, + PG_zsmalloc = 0x01000000, PAGE_TYPE_BASE = 0x80000000, @@ -1071,6 +1072,8 @@ FOLIO_TYPE_OPS(hugetlb, hugetlb) FOLIO_TEST_FLAG_FALSE(hugetlb) #endif +PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc) + /** * PageHuge - Determine if the page belongs to hugetlbfs * @page: The page to test. -- cgit v1.2.3 From 11d5401b011e3557894d824dac210f0b18cc3911 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 29 May 2024 13:19:04 +0200 Subject: mm/mm_init: initialize page->_mapcount directly in __init_single_page() Let's simply reinitialize the page->_mapcount directly. We can now get rid of page_mapcount_reset(). Link: https://lkml.kernel.org/r/20240529111904.2069608-7-david@redhat.com Signed-off-by: David Hildenbrand Tested-by: Sergey Senozhatsky [zram/zsmalloc workloads] Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Matthew Wilcox (Oracle) Cc: Mike Rapoport (IBM) Cc: Minchan Kim Signed-off-by: Andrew Morton --- include/linux/mm.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 61a7f680f0ba..4f75fee497f1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1210,11 +1210,6 @@ static inline int folio_entire_mapcount(const struct folio *folio) return atomic_read(&folio->_entire_mapcount) + 1; } -static inline void page_mapcount_reset(struct page *page) -{ - atomic_set(&(page)->_mapcount, -1); -} - static inline int folio_large_mapcount(const struct folio *folio) { VM_WARN_ON_FOLIO(!folio_test_large(folio), folio); -- cgit v1.2.3 From 2669324b81e5f67d409d3ad988da77c7c2a09045 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 4 Jun 2024 19:48:20 +0800 Subject: mm: remove page_maybe_dma_pinned() After the last user of page_maybe_dma_pinned() is converted to folio_maybe_dma_pinned(), remove page_maybe_dma_pinned() and update the document and comment. [wangkefeng.wang@huawei.com: fix pin_user_pages.rst underlining] Link: https://lkml.kernel.org/r/61b256c7-4989-44ec-83db-f34a1bd4be2d@huawei.com Link: https://lkml.kernel.org/r/20240604114822.2089819-3-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Acked-by: David Hildenbrand Cc: Daniel Vetter Cc: Helge Deller Cc: Jonathan Corbet Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 4f75fee497f1..cb4aa725252e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1918,8 +1918,8 @@ static inline struct folio *pfn_folio(unsigned long pfn) * * For more information, please see Documentation/core-api/pin_user_pages.rst. * - * Return: True, if it is likely that the page has been "dma-pinned". - * False, if the page is definitely not dma-pinned. + * Return: True, if it is likely that the folio has been "dma-pinned". + * False, if the folio is definitely not dma-pinned. */ static inline bool folio_maybe_dma_pinned(struct folio *folio) { @@ -1938,11 +1938,6 @@ static inline bool folio_maybe_dma_pinned(struct folio *folio) GUP_PIN_COUNTING_BIAS; } -static inline bool page_maybe_dma_pinned(struct page *page) -{ - return folio_maybe_dma_pinned(page_folio(page)); -} - /* * This should most likely only be called during fork() to see whether we * should break the cow immediately for an anon page on the src mm. -- cgit v1.2.3 From a929e0d10f3db1a53668f6b9845db27d7fb63759 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 4 Jun 2024 19:48:22 +0800 Subject: mm: remove page_mkclean() There are no more users of page_mkclean(), remove it and update the document and comment. Link: https://lkml.kernel.org/r/20240604114822.2089819-5-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Acked-by: David Hildenbrand Cc: Daniel Vetter Cc: Helge Deller Cc: Jonathan Corbet Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 +- include/linux/rmap.h | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index cb4aa725252e..101945baffc7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1577,7 +1577,7 @@ static inline void put_page(struct page *page) * issue. * * Locking: the lockless algorithm described in folio_try_get_rcu() - * provides safe operation for get_user_pages(), page_mkclean() and + * provides safe operation for get_user_pages(), folio_mkclean() and * other calls that race to set up page table entries. */ #define GUP_PIN_COUNTING_BIAS (1U << 10) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 694d3d886245..980fa5d75d69 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -802,8 +802,4 @@ static inline int folio_mkclean(struct folio *folio) } #endif /* CONFIG_MMU */ -static inline int page_mkclean(struct page *page) -{ - return folio_mkclean(page_folio(page)); -} #endif /* _LINUX_RMAP_H */ -- cgit v1.2.3 From 503b158fc30f203a1854c87183ca3467c6466001 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 7 Jun 2024 11:09:37 +0200 Subject: mm/memory_hotplug: initialize memmap of !ZONE_DEVICE with PageOffline() instead of PageReserved() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We currently initialize the memmap such that PG_reserved is set and the refcount of the page is 1. In virtio-mem code, we have to manually clear that PG_reserved flag to make memory offlining with partially hotplugged memory blocks possible: has_unmovable_pages() would otherwise bail out on such pages. We want to avoid PG_reserved where possible and move to typed pages instead. Further, we want to further enlighten memory offlining code about PG_offline: offline pages in an online memory section. One example is handling managed page count adjustments in a cleaner way during memory offlining. So let's initialize the pages with PG_offline instead of PG_reserved. generic_online_page()->__free_pages_core() will now clear that flag before handing that memory to the buddy. Note that the page refcount is still 1 and would forbid offlining of such memory except when special care is take during GOING_OFFLINE as currently only implemented by virtio-mem. With this change, we can now get non-PageReserved() pages in the XEN balloon list. From what I can tell, that can already happen via decrease_reservation(), so that should be fine. HV-balloon should not really observe a change: partial online memory blocks still cannot get surprise-offlined, because the refcount of these PageOffline() pages is 1. Update virtio-mem, HV-balloon and XEN-balloon code to be aware that hotplugged pages are now PageOffline() instead of PageReserved() before they are handed over to the buddy. We'll leave the ZONE_DEVICE case alone for now. Note that self-hosted vmemmap pages will no longer be marked as reserved. This matches ordinary vmemmap pages allocated from the buddy during memory hotplug. Now, really only vmemmap pages allocated from memblock during early boot will be marked reserved. Existing PageReserved() checks seem to be handling all relevant cases correctly even after this change. Link: https://lkml.kernel.org/r/20240607090939.89524-3-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Oscar Salvador [generic memory-hotplug bits] Cc: Alexander Potapenko Cc: Dexuan Cui Cc: Dmitry Vyukov Cc: Eugenio Pérez Cc: Haiyang Zhang Cc: Jason Wang Cc: Juergen Gross Cc: "K. Y. Srinivasan" Cc: Marco Elver Cc: Michael S. Tsirkin Cc: Mike Rapoport (IBM) Cc: Oleksandr Tyshchenko Cc: Stefano Stabellini Cc: Wei Liu Cc: Xuan Zhuo Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 0f7c7320391e..b23772b08cc1 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -30,16 +30,11 @@ * - Pages falling into physical memory gaps - not IORESOURCE_SYSRAM. Trying * to read/write these pages might end badly. Don't touch! * - The zero page(s) - * - Pages not added to the page allocator when onlining a section because - * they were excluded via the online_page_callback() or because they are - * PG_hwpoison. * - Pages allocated in the context of kexec/kdump (loaded kernel image, * control pages, vmcoreinfo) * - MMIO/DMA pages. Some architectures don't allow to ioremap pages that are * not marked PG_reserved (as they might be in use by somebody else who does * not respect the caching strategy). - * - Pages part of an offline section (struct pages of offline sections should - * not be trusted as they will be initialized when first onlined). * - MCA pages on ia64 * - Pages holding CPU notes for POWER Firmware Assisted Dump * - Device memory (e.g. PMEM, DAX, HMM) @@ -1020,6 +1015,10 @@ PAGE_TYPE_OPS(Buddy, buddy, buddy) * The content of these pages is effectively stale. Such pages should not * be touched (read/write/dump/save) except by their owner. * + * When a memory block gets onlined, all pages are initialized with a + * refcount of 1 and PageOffline(). generic_online_page() will + * take care of clearing PageOffline(). + * * If a driver wants to allow to offline unmovable PageOffline() pages without * putting them back to the buddy, it can do so via the memory notifier by * decrementing the reference count in MEM_GOING_OFFLINE and incrementing the @@ -1027,8 +1026,7 @@ PAGE_TYPE_OPS(Buddy, buddy, buddy) * pages (now with a reference count of zero) are treated like free pages, * allowing the containing memory block to get offlined. A driver that * relies on this feature is aware that re-onlining the memory block will - * require to re-set the pages PageOffline() and not giving them to the - * buddy via online_page_callback_t. + * require not giving them to the buddy via generic_online_page(). * * There are drivers that mark a page PageOffline() and expect there won't be * any further access to page content. PFN walkers that read content of random -- cgit v1.2.3 From 50625744220c101705a989d7c57a6c16e945f3b1 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 7 Jun 2024 11:09:38 +0200 Subject: mm/memory_hotplug: skip adjust_managed_page_count() for PageOffline() pages when offlining MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We currently have a hack for virtio-mem in place to handle memory offlining with PageOffline pages for which we already adjusted the managed page count. Let's enlighten memory offlining code so we can get rid of that hack, and document the situation. Link: https://lkml.kernel.org/r/20240607090939.89524-4-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Oscar Salvador Cc: Alexander Potapenko Cc: Dexuan Cui Cc: Dmitry Vyukov Cc: Eugenio Pérez Cc: Haiyang Zhang Cc: Jason Wang Cc: Juergen Gross Cc: "K. Y. Srinivasan" Cc: Marco Elver Cc: Michael S. Tsirkin Cc: Mike Rapoport (IBM) Cc: Oleksandr Tyshchenko Cc: Stefano Stabellini Cc: Wei Liu Cc: Xuan Zhuo Signed-off-by: Andrew Morton --- include/linux/memory_hotplug.h | 4 ++-- include/linux/page-flags.h | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 7a9ff464608d..ebe876930e78 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -175,8 +175,8 @@ extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages, extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages); extern int online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *zone, struct memory_group *group); -extern void __offline_isolated_pages(unsigned long start_pfn, - unsigned long end_pfn); +extern unsigned long __offline_isolated_pages(unsigned long start_pfn, + unsigned long end_pfn); typedef void (*online_page_callback_t)(struct page *page, unsigned int order); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index b23772b08cc1..1d441908b726 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -1023,11 +1023,15 @@ PAGE_TYPE_OPS(Buddy, buddy, buddy) * putting them back to the buddy, it can do so via the memory notifier by * decrementing the reference count in MEM_GOING_OFFLINE and incrementing the * reference count in MEM_CANCEL_OFFLINE. When offlining, the PageOffline() - * pages (now with a reference count of zero) are treated like free pages, - * allowing the containing memory block to get offlined. A driver that + * pages (now with a reference count of zero) are treated like free (unmanaged) + * pages, allowing the containing memory block to get offlined. A driver that * relies on this feature is aware that re-onlining the memory block will * require not giving them to the buddy via generic_online_page(). * + * Memory offlining code will not adjust the managed page count for any + * PageOffline() pages, treating them like they were never exposed to the + * buddy using generic_online_page(). + * * There are drivers that mark a page PageOffline() and expect there won't be * any further access to page content. PFN walkers that read content of random * pages should check PageOffline() and synchronize with such drivers using -- cgit v1.2.3 From 15bde4abab734c687c1f81704886aba3a70c268e Mon Sep 17 00:00:00 2001 From: Barry Song Date: Tue, 18 Jun 2024 11:11:35 +1200 Subject: mm: extend rmap flags arguments for folio_add_new_anon_rmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "mm: clarify folio_add_new_anon_rmap() and __folio_add_anon_rmap()", v2. This patchset is preparatory work for mTHP swapin. folio_add_new_anon_rmap() assumes that new anon rmaps are always exclusive. However, this assumption doesn’t hold true for cases like do_swap_page(), where a new anon might be added to the swapcache and is not necessarily exclusive. The patchset extends the rmap flags to allow folio_add_new_anon_rmap() to handle both exclusive and non-exclusive new anon folios. The do_swap_page() function is updated to use this extended API with rmap flags. Consequently, all new anon folios now consistently use folio_add_new_anon_rmap(). The special case for !folio_test_anon() in __folio_add_anon_rmap() can be safely removed. In conclusion, new anon folios always use folio_add_new_anon_rmap(), regardless of exclusivity. Old anon folios continue to use __folio_add_anon_rmap() via folio_add_anon_rmap_pmd() and folio_add_anon_rmap_ptes(). This patch (of 3): In the case of a swap-in, a new anonymous folio is not necessarily exclusive. This patch updates the rmap flags to allow a new anonymous folio to be treated as either exclusive or non-exclusive. To maintain the existing behavior, we always use EXCLUSIVE as the default setting. [akpm@linux-foundation.org: cleanup and constifications per David and akpm] [v-songbaohua@oppo.com: fix missing doc for flags of folio_add_new_anon_rmap()] Link: https://lkml.kernel.org/r/20240619210641.62542-1-21cnbao@gmail.com [v-songbaohua@oppo.com: enhance doc for extend rmap flags arguments for folio_add_new_anon_rmap] Link: https://lkml.kernel.org/r/20240622030256.43775-1-21cnbao@gmail.com Link: https://lkml.kernel.org/r/20240617231137.80726-1-21cnbao@gmail.com Link: https://lkml.kernel.org/r/20240617231137.80726-2-21cnbao@gmail.com Signed-off-by: Barry Song Suggested-by: David Hildenbrand Tested-by: Shuai Yuan Acked-by: David Hildenbrand Cc: Baolin Wang Cc: Chris Li Cc: "Huang, Ying" Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Ryan Roberts Cc: Suren Baghdasaryan Cc: Yang Shi Cc: Yosry Ahmed Cc: Yu Zhao Signed-off-by: Andrew Morton --- include/linux/rmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 980fa5d75d69..0978c64f49d8 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -244,7 +244,7 @@ void folio_add_anon_rmap_ptes(struct folio *, struct page *, int nr_pages, void folio_add_anon_rmap_pmd(struct folio *, struct page *, struct vm_area_struct *, unsigned long address, rmap_t flags); void folio_add_new_anon_rmap(struct folio *, struct vm_area_struct *, - unsigned long address); + unsigned long address, rmap_t flags); void folio_add_file_rmap_ptes(struct folio *, struct page *, int nr_pages, struct vm_area_struct *); #define folio_add_file_rmap_pte(folio, page, vma) \ -- cgit v1.2.3 From 78fefd04c123493bbf28434768fa577b2153c79b Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 18 Jun 2024 17:12:39 +0800 Subject: mm: memory: convert clear_huge_page() to folio_zero_user() Patch series "mm: improve clear and copy user folio", v2. Some folio conversions. An improvement is to move address alignment into the caller as it is only needed if we don't know which address will be accessed when clearing/copying user folios. This patch (of 4): Replace clear_huge_page() with folio_zero_user(), and take a folio instead of a page. Directly get number of pages by folio_nr_pages() to remove pages_per_huge_page argument, furthermore, move the address alignment from folio_zero_user() to the callers since the alignment is only needed when we don't know which address will be accessed. Link: https://lkml.kernel.org/r/20240618091242.2140164-1-wangkefeng.wang@huawei.com Link: https://lkml.kernel.org/r/20240618091242.2140164-2-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Cc: David Hildenbrand Cc: "Huang, Ying" Cc: Matthew Wilcox (Oracle) Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/mm.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 101945baffc7..e2140ea6ae98 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4067,9 +4067,7 @@ enum mf_action_page_type { }; #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS) -extern void clear_huge_page(struct page *page, - unsigned long addr_hint, - unsigned int pages_per_huge_page); +void folio_zero_user(struct folio *folio, unsigned long addr_hint); int copy_user_large_folio(struct folio *dst, struct folio *src, unsigned long addr_hint, struct vm_area_struct *vma); -- cgit v1.2.3 From dc9e6f7053dd540db2c9fba30c31a07de5edab01 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 31 May 2024 14:56:16 +0200 Subject: mm: read page_type using READ_ONCE KCSAN complains about possible data races: while we check for a page_type -- for example for sanity checks -- we might concurrently modify the mapcount that overlays page_type. Let's use READ_ONCE to avoid load tearing (shouldn't make a difference) and to make KCSAN happy. Likely, we might also want to use WRITE_ONCE for the writer side of page_type, if KCSAN ever complains about that. But we'll not mess with that for now. Note: nothing should really be broken besides wrong KCSAN complaints. The sanity check that triggers this was added in commit 68f0320824fa ("mm/rmap: convert folio_add_file_rmap_range() into folio_add_file_rmap_[pte|ptes|pmd]()"). Even before that similar races likely where possible, ever since we added page_type in commit 6e292b9be7f4 ("mm: split page_type out from _mapcount"). Link: https://lkml.kernel.org/r/20240531125616.2850153-1-david@redhat.com Signed-off-by: David Hildenbrand Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202405281431.c46a3be9-lkp@intel.com Reviewed-by: Oscar Salvador Reviewed-by: Miaohe Lin Cc: Matthew Wilcox Signed-off-by: Andrew Morton --- include/linux/page-flags.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 1d441908b726..5769fe6e4950 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -955,9 +955,9 @@ enum pagetype { }; #define PageType(page, flag) \ - ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) + ((READ_ONCE(page->page_type) & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) #define folio_test_type(folio, flag) \ - ((folio->page.page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) + ((READ_ONCE(folio->page.page_type) & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) static inline int page_type_has_type(unsigned int page_type) { @@ -966,7 +966,7 @@ static inline int page_type_has_type(unsigned int page_type) static inline int page_has_type(const struct page *page) { - return page_type_has_type(page->page_type); + return page_type_has_type(READ_ONCE(page->page_type)); } #define FOLIO_TYPE_OPS(lname, fname) \ -- cgit v1.2.3 From 6b1709d4b7fce27c6c79fc78c17c458f9848a4d8 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Fri, 21 Jun 2024 13:34:53 +0200 Subject: kmsan: expose kmsan_get_metadata() Each s390 CPU has lowcore pages associated with it. Each CPU sees its own lowcore at virtual address 0 through a hardware mechanism called prefixing. Additionally, all lowcores are mapped to non-0 virtual addresses stored in the lowcore_ptr[] array. When lowcore is accessed through virtual address 0, one needs to resolve metadata for lowcore_ptr[raw_smp_processor_id()]. Expose kmsan_get_metadata() to make it possible to do this from the arch code. Link: https://lkml.kernel.org/r/20240621113706.315500-10-iii@linux.ibm.com Signed-off-by: Ilya Leoshkevich Reviewed-by: Alexander Potapenko Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Christoph Lameter Cc: David Rientjes Cc: Dmitry Vyukov Cc: Heiko Carstens Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Joonsoo Kim Cc: Cc: Marco Elver Cc: Mark Rutland Cc: Masami Hiramatsu (Google) Cc: Pekka Enberg Cc: Roman Gushchin Cc: Steven Rostedt (Google) Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/kmsan.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/kmsan.h b/include/linux/kmsan.h index e0c23a32cdf0..fe6c2212bdb1 100644 --- a/include/linux/kmsan.h +++ b/include/linux/kmsan.h @@ -230,6 +230,15 @@ void kmsan_handle_urb(const struct urb *urb, bool is_out); */ void kmsan_unpoison_entry_regs(const struct pt_regs *regs); +/** + * kmsan_get_metadata() - Return a pointer to KMSAN shadow or origins. + * @addr: kernel address. + * @is_origin: whether to return origins or shadow. + * + * Return NULL if metadata cannot be found. + */ +void *kmsan_get_metadata(void *addr, bool is_origin); + #else static inline void kmsan_init_shadow(void) -- cgit v1.2.3 From ec3e837d8fd96c68599b2861dd412094b7bc335c Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Fri, 21 Jun 2024 13:34:55 +0200 Subject: kmsan: allow disabling KMSAN checks for the current task Like for KASAN, it's useful to temporarily disable KMSAN checks around, e.g., redzone accesses. Introduce kmsan_disable_current() and kmsan_enable_current(), which are similar to their KASAN counterparts. Make them reentrant in order to handle memory allocations in interrupt context. Repurpose the allow_reporting field for this. Link: https://lkml.kernel.org/r/20240621113706.315500-12-iii@linux.ibm.com Signed-off-by: Ilya Leoshkevich Reviewed-by: Alexander Potapenko Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Christoph Lameter Cc: David Rientjes Cc: Dmitry Vyukov Cc: Heiko Carstens Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Joonsoo Kim Cc: Cc: Marco Elver Cc: Mark Rutland Cc: Masami Hiramatsu (Google) Cc: Pekka Enberg Cc: Roman Gushchin Cc: Steven Rostedt (Google) Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/kmsan.h | 24 ++++++++++++++++++++++++ include/linux/kmsan_types.h | 2 +- 2 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kmsan.h b/include/linux/kmsan.h index fe6c2212bdb1..14b5ea6d3a43 100644 --- a/include/linux/kmsan.h +++ b/include/linux/kmsan.h @@ -239,6 +239,22 @@ void kmsan_unpoison_entry_regs(const struct pt_regs *regs); */ void *kmsan_get_metadata(void *addr, bool is_origin); +/** + * kmsan_enable_current(): Enable KMSAN for the current task. + * + * Each kmsan_enable_current() current call must be preceded by a + * kmsan_disable_current() call. These call pairs may be nested. + */ +void kmsan_enable_current(void); + +/** + * kmsan_disable_current(): Disable KMSAN for the current task. + * + * Each kmsan_disable_current() current call must be followed by a + * kmsan_enable_current() call. These call pairs may be nested. + */ +void kmsan_disable_current(void); + #else static inline void kmsan_init_shadow(void) @@ -338,6 +354,14 @@ static inline void kmsan_unpoison_entry_regs(const struct pt_regs *regs) { } +static inline void kmsan_enable_current(void) +{ +} + +static inline void kmsan_disable_current(void) +{ +} + #endif #endif /* _LINUX_KMSAN_H */ diff --git a/include/linux/kmsan_types.h b/include/linux/kmsan_types.h index 929287981afe..dfc59918b3c0 100644 --- a/include/linux/kmsan_types.h +++ b/include/linux/kmsan_types.h @@ -31,7 +31,7 @@ struct kmsan_context_state { struct kmsan_ctx { struct kmsan_context_state cstate; int kmsan_in_runtime; - bool allow_reporting; + unsigned int depth; }; #endif /* _LINUX_KMSAN_TYPES_H */ -- cgit v1.2.3 From 1fdb3c7006d9914e4b070f7eee98dfbdf743ee16 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Fri, 21 Jun 2024 13:34:56 +0200 Subject: kmsan: introduce memset_no_sanitize_memory() Add a wrapper for memset() that prevents unpoisoning. This is useful for filling memory allocator redzones. Link: https://lkml.kernel.org/r/20240621113706.315500-13-iii@linux.ibm.com Signed-off-by: Ilya Leoshkevich Reviewed-by: Alexander Potapenko Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Christoph Lameter Cc: David Rientjes Cc: Dmitry Vyukov Cc: Heiko Carstens Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Joonsoo Kim Cc: Cc: Marco Elver Cc: Mark Rutland Cc: Masami Hiramatsu (Google) Cc: Pekka Enberg Cc: Roman Gushchin Cc: Steven Rostedt (Google) Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/kmsan.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/kmsan.h b/include/linux/kmsan.h index 14b5ea6d3a43..7109644f4c19 100644 --- a/include/linux/kmsan.h +++ b/include/linux/kmsan.h @@ -255,6 +255,19 @@ void kmsan_enable_current(void); */ void kmsan_disable_current(void); +/** + * memset_no_sanitize_memory(): Fill memory without KMSAN instrumentation. + * @s: address of kernel memory to fill. + * @c: constant byte to fill the memory with. + * @n: number of bytes to fill. + * + * This is like memset(), but without KMSAN instrumentation. + */ +static inline void *memset_no_sanitize_memory(void *s, int c, size_t n) +{ + return __memset(s, c, n); +} + #else static inline void kmsan_init_shadow(void) @@ -362,6 +375,11 @@ static inline void kmsan_disable_current(void) { } +static inline void *memset_no_sanitize_memory(void *s, int c, size_t n) +{ + return memset(s, c, n); +} + #endif #endif /* _LINUX_KMSAN_H */ -- cgit v1.2.3 From e6553e2f79b2673b239c3dd47a88451119eb82d9 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Fri, 21 Jun 2024 13:35:00 +0200 Subject: kmsan: expose KMSAN_WARN_ON() KMSAN_WARN_ON() is required for implementing s390-specific KMSAN functions, but right now it's available only to the KMSAN internal functions. Expose it to subsystems through . Link: https://lkml.kernel.org/r/20240621113706.315500-17-iii@linux.ibm.com Signed-off-by: Ilya Leoshkevich Reviewed-by: Alexander Potapenko Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Christoph Lameter Cc: David Rientjes Cc: Dmitry Vyukov Cc: Heiko Carstens Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Joonsoo Kim Cc: Cc: Marco Elver Cc: Mark Rutland Cc: Masami Hiramatsu (Google) Cc: Pekka Enberg Cc: Roman Gushchin Cc: Steven Rostedt (Google) Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/kmsan.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/linux/kmsan.h b/include/linux/kmsan.h index 7109644f4c19..2b1432cc16d5 100644 --- a/include/linux/kmsan.h +++ b/include/linux/kmsan.h @@ -268,6 +268,29 @@ static inline void *memset_no_sanitize_memory(void *s, int c, size_t n) return __memset(s, c, n); } +extern bool kmsan_enabled; +extern int panic_on_kmsan; + +/* + * KMSAN performs a lot of consistency checks that are currently enabled by + * default. BUG_ON is normally discouraged in the kernel, unless used for + * debugging, but KMSAN itself is a debugging tool, so it makes little sense to + * recover if something goes wrong. + */ +#define KMSAN_WARN_ON(cond) \ + ({ \ + const bool __cond = WARN_ON(cond); \ + if (unlikely(__cond)) { \ + WRITE_ONCE(kmsan_enabled, false); \ + if (panic_on_kmsan) { \ + /* Can't call panic() here because */ \ + /* of uaccess checks. */ \ + BUG(); \ + } \ + } \ + __cond; \ + }) + #else static inline void kmsan_init_shadow(void) @@ -380,6 +403,8 @@ static inline void *memset_no_sanitize_memory(void *s, int c, size_t n) return memset(s, c, n); } +#define KMSAN_WARN_ON WARN_ON + #endif #endif /* _LINUX_KMSAN_H */ -- cgit v1.2.3 From ee86814b0562f18255b55c5e6a01a022895994cf Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 20 Jun 2024 23:29:35 +0200 Subject: mm/migrate: move NUMA hinting fault folio isolation + checks under PTL Currently we always take a folio reference even if migration will not even be tried or isolation failed, requiring us to grab+drop an additional reference. Further, we end up calling folio_likely_mapped_shared() while the folio might have already been unmapped, because after we dropped the PTL, that can easily happen. We want to stop touching mapcounts and friends from such context, and only call folio_likely_mapped_shared() while the folio is still mapped: mapcount information is pretty much stale and unreliable otherwise. So let's move checks into numamigrate_isolate_folio(), rename that function to migrate_misplaced_folio_prepare(), and call that function from callsites where we call migrate_misplaced_folio(), but still with the PTL held. We can now stop taking temporary folio references, and really only take a reference if folio isolation succeeded. Doing the folio_likely_mapped_shared() + folio isolation under PT lock is now similar to how we handle MADV_PAGEOUT. While at it, combine the folio_is_file_lru() checks. [david@redhat.com: fix list_del() corruption] Link: https://lkml.kernel.org/r/8f85c31a-e603-4578-bf49-136dae0d4b69@redhat.com Link: https://lkml.kernel.org/r/20240626191129.658CFC32782@smtp.kernel.org Link: https://lkml.kernel.org/r/20240620212935.656243-3-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Baolin Wang Reviewed-by: Zi Yan Tested-by: Donet Tom Signed-off-by: Andrew Morton --- include/linux/migrate.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 517f70b70620..af2579ae93f2 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -140,9 +140,16 @@ const struct movable_operations *page_movable_ops(struct page *page) } #ifdef CONFIG_NUMA_BALANCING +int migrate_misplaced_folio_prepare(struct folio *folio, + struct vm_area_struct *vma, int node); int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma, int node); #else +static inline int migrate_misplaced_folio_prepare(struct folio *folio, + struct vm_area_struct *vma, int node) +{ + return -EAGAIN; /* can't migrate now */ +} static inline int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma, int node) { -- cgit v1.2.3 From bb82ac31ddcedd6a1e6ee30b7afec7acdef811e1 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 25 Jun 2024 12:18:55 +0200 Subject: readahead: drop index argument of page_cache_async_readahead() The index argument of page_cache_async_readahead() is just folio->index so there's no point in passing is separately. Drop it. Link: https://lkml.kernel.org/r/20240625101909.12234-5-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Josef Bacik Tested-by: Zhang Peng Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e37e16ebff7a..eef99375dcf1 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1293,8 +1293,7 @@ void page_cache_sync_readahead(struct address_space *mapping, * @mapping: address_space which holds the pagecache and I/O vectors * @ra: file_ra_state which holds the readahead state * @file: Used by the filesystem for authentication. - * @folio: The folio at @index which triggered the readahead call. - * @index: Index of first page to be read. + * @folio: The folio which triggered the readahead call. * @req_count: Total number of pages being read by the caller. * * page_cache_async_readahead() should be called when a page is used which @@ -1305,9 +1304,9 @@ void page_cache_sync_readahead(struct address_space *mapping, static inline void page_cache_async_readahead(struct address_space *mapping, struct file_ra_state *ra, struct file *file, - struct folio *folio, pgoff_t index, unsigned long req_count) + struct folio *folio, unsigned long req_count) { - DEFINE_READAHEAD(ractl, file, ra, mapping, index); + DEFINE_READAHEAD(ractl, file, ra, mapping, folio->index); page_cache_async_ra(&ractl, folio, req_count); } -- cgit v1.2.3 From af48f95492dc1af36d9636a750ec492035c0ed7d Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Mon, 1 Jul 2024 15:40:48 +0300 Subject: RDMA/core: Introduce "name_assign_type" for an IB device The name_assign_type indicates how the name is provided. Currently these types are supported: - RDMA_NAME_ASSIGN_TYPE_UNKNOWN: Unknown or not set; - RDMA_NAME_ASSIGN_TYPE_USER: Name is provided by the user; The user-created sub device, rxe and siw device has this type. When filling nl device info, it is set in the new attribute RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE. User-space tools like udev "rdma_rename" could check this attribute to determine if this device needs to be renamed or not. Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/522591bef9a369cc8e5dcb77787e017bffee37fe.1719837610.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- include/rdma/ib_verbs.h | 8 ++++++++ include/uapi/rdma/rdma_netlink.h | 9 +++++++++ 2 files changed, 17 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e09d4f09b602..6c5712ae559d 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2794,6 +2794,8 @@ struct ib_device { enum rdma_nl_dev_type type; struct ib_device *parent; struct list_head subdev_list; + + enum rdma_nl_name_assign_type name_assign_type; }; static inline void *rdma_zalloc_obj(struct ib_device *dev, size_t size, @@ -4867,4 +4869,10 @@ int ib_add_sub_device(struct ib_device *parent, * Return 0 on success, an error code otherwise */ int ib_del_sub_device_and_put(struct ib_device *sub); + +static inline void ib_mark_name_assigned_by_user(struct ib_device *ibdev) +{ + ibdev->name_assign_type = RDMA_NAME_ASSIGN_TYPE_USER; +} + #endif /* IB_VERBS_H */ diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 4b69242d7848..2f37568f5556 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -572,6 +572,8 @@ enum rdma_nldev_attr { RDMA_NLDEV_ATTR_PARENT_NAME, /* string */ + RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE, /* u8 */ + /* * Always the end */ @@ -615,4 +617,11 @@ enum rdma_nl_counter_mask { enum rdma_nl_dev_type { RDMA_DEVICE_TYPE_SMI = 1, }; + +/* RDMA device name assignment types */ +enum rdma_nl_name_assign_type { + RDMA_NAME_ASSIGN_TYPE_UNKNOWN = 0, + RDMA_NAME_ASSIGN_TYPE_USER = 1, /* Provided by user-space */ +}; + #endif /* _UAPI_RDMA_NETLINK_H */ -- cgit v1.2.3 From fa2690af573dfefb47ba6eef888797a64b6b5f3c Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 25 Jun 2024 13:53:50 -0700 Subject: mm: page_ref: remove folio_try_get_rcu() The below bug was reported on a non-SMP kernel: [ 275.267158][ T4335] ------------[ cut here ]------------ [ 275.267949][ T4335] kernel BUG at include/linux/page_ref.h:275! [ 275.268526][ T4335] invalid opcode: 0000 [#1] KASAN PTI [ 275.269001][ T4335] CPU: 0 PID: 4335 Comm: trinity-c3 Not tainted 6.7.0-rc4-00061-gefa7df3e3bb5 #1 [ 275.269787][ T4335] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 [ 275.270679][ T4335] RIP: 0010:try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3)) [ 275.272813][ T4335] RSP: 0018:ffffc90005dcf650 EFLAGS: 00010202 [ 275.273346][ T4335] RAX: 0000000000000246 RBX: ffffea00066e0000 RCX: 0000000000000000 [ 275.274032][ T4335] RDX: fffff94000cdc007 RSI: 0000000000000004 RDI: ffffea00066e0034 [ 275.274719][ T4335] RBP: ffffea00066e0000 R08: 0000000000000000 R09: fffff94000cdc006 [ 275.275404][ T4335] R10: ffffea00066e0037 R11: 0000000000000000 R12: 0000000000000136 [ 275.276106][ T4335] R13: ffffea00066e0034 R14: dffffc0000000000 R15: ffffea00066e0008 [ 275.276790][ T4335] FS: 00007fa2f9b61740(0000) GS:ffffffff89d0d000(0000) knlGS:0000000000000000 [ 275.277570][ T4335] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 275.278143][ T4335] CR2: 00007fa2f6c00000 CR3: 0000000134b04000 CR4: 00000000000406f0 [ 275.278833][ T4335] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 275.279521][ T4335] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 275.280201][ T4335] Call Trace: [ 275.280499][ T4335] [ 275.280751][ T4335] ? die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434 arch/x86/kernel/dumpstack.c:447) [ 275.281087][ T4335] ? do_trap (arch/x86/kernel/traps.c:112 arch/x86/kernel/traps.c:153) [ 275.281463][ T4335] ? try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3)) [ 275.281884][ T4335] ? try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3)) [ 275.282300][ T4335] ? do_error_trap (arch/x86/kernel/traps.c:174) [ 275.282711][ T4335] ? try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3)) [ 275.283129][ T4335] ? handle_invalid_op (arch/x86/kernel/traps.c:212) [ 275.283561][ T4335] ? try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3)) [ 275.283990][ T4335] ? exc_invalid_op (arch/x86/kernel/traps.c:264) [ 275.284415][ T4335] ? asm_exc_invalid_op (arch/x86/include/asm/idtentry.h:568) [ 275.284859][ T4335] ? try_get_folio (include/linux/page_ref.h:275 (discriminator 3) mm/gup.c:79 (discriminator 3)) [ 275.285278][ T4335] try_grab_folio (mm/gup.c:148) [ 275.285684][ T4335] __get_user_pages (mm/gup.c:1297 (discriminator 1)) [ 275.286111][ T4335] ? __pfx___get_user_pages (mm/gup.c:1188) [ 275.286579][ T4335] ? __pfx_validate_chain (kernel/locking/lockdep.c:3825) [ 275.287034][ T4335] ? mark_lock (kernel/locking/lockdep.c:4656 (discriminator 1)) [ 275.287416][ T4335] __gup_longterm_locked (mm/gup.c:1509 mm/gup.c:2209) [ 275.288192][ T4335] ? __pfx___gup_longterm_locked (mm/gup.c:2204) [ 275.288697][ T4335] ? __pfx_lock_acquire (kernel/locking/lockdep.c:5722) [ 275.289135][ T4335] ? __pfx___might_resched (kernel/sched/core.c:10106) [ 275.289595][ T4335] pin_user_pages_remote (mm/gup.c:3350) [ 275.290041][ T4335] ? __pfx_pin_user_pages_remote (mm/gup.c:3350) [ 275.290545][ T4335] ? find_held_lock (kernel/locking/lockdep.c:5244 (discriminator 1)) [ 275.290961][ T4335] ? mm_access (kernel/fork.c:1573) [ 275.291353][ T4335] process_vm_rw_single_vec+0x142/0x360 [ 275.291900][ T4335] ? __pfx_process_vm_rw_single_vec+0x10/0x10 [ 275.292471][ T4335] ? mm_access (kernel/fork.c:1573) [ 275.292859][ T4335] process_vm_rw_core+0x272/0x4e0 [ 275.293384][ T4335] ? hlock_class (arch/x86/include/asm/bitops.h:227 arch/x86/include/asm/bitops.h:239 include/asm-generic/bitops/instrumented-non-atomic.h:142 kernel/locking/lockdep.c:228) [ 275.293780][ T4335] ? __pfx_process_vm_rw_core+0x10/0x10 [ 275.294350][ T4335] process_vm_rw (mm/process_vm_access.c:284) [ 275.294748][ T4335] ? __pfx_process_vm_rw (mm/process_vm_access.c:259) [ 275.295197][ T4335] ? __task_pid_nr_ns (include/linux/rcupdate.h:306 (discriminator 1) include/linux/rcupdate.h:780 (discriminator 1) kernel/pid.c:504 (discriminator 1)) [ 275.295634][ T4335] __x64_sys_process_vm_readv (mm/process_vm_access.c:291) [ 275.296139][ T4335] ? syscall_enter_from_user_mode (kernel/entry/common.c:94 kernel/entry/common.c:112) [ 275.296642][ T4335] do_syscall_64 (arch/x86/entry/common.c:51 (discriminator 1) arch/x86/entry/common.c:82 (discriminator 1)) [ 275.297032][ T4335] ? __task_pid_nr_ns (include/linux/rcupdate.h:306 (discriminator 1) include/linux/rcupdate.h:780 (discriminator 1) kernel/pid.c:504 (discriminator 1)) [ 275.297470][ T4335] ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4300 kernel/locking/lockdep.c:4359) [ 275.297988][ T4335] ? do_syscall_64 (arch/x86/include/asm/cpufeature.h:171 arch/x86/entry/common.c:97) [ 275.298389][ T4335] ? lockdep_hardirqs_on_prepare (kernel/locking/lockdep.c:4300 kernel/locking/lockdep.c:4359) [ 275.298906][ T4335] ? do_syscall_64 (arch/x86/include/asm/cpufeature.h:171 arch/x86/entry/common.c:97) [ 275.299304][ T4335] ? do_syscall_64 (arch/x86/include/asm/cpufeature.h:171 arch/x86/entry/common.c:97) [ 275.299703][ T4335] ? do_syscall_64 (arch/x86/include/asm/cpufeature.h:171 arch/x86/entry/common.c:97) [ 275.300115][ T4335] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) This BUG is the VM_BUG_ON(!in_atomic() && !irqs_disabled()) assertion in folio_ref_try_add_rcu() for non-SMP kernel. The process_vm_readv() calls GUP to pin the THP. An optimization for pinning THP instroduced by commit 57edfcfd3419 ("mm/gup: accelerate thp gup even for "pages != NULL"") calls try_grab_folio() to pin the THP, but try_grab_folio() is supposed to be called in atomic context for non-SMP kernel, for example, irq disabled or preemption disabled, due to the optimization introduced by commit e286781d5f2e ("mm: speculative page references"). The commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries") is not actually the root cause although it was bisected to. It just makes the problem exposed more likely. The follow up discussion suggested the optimization for non-SMP kernel may be out-dated and not worth it anymore [1]. So removing the optimization to silence the BUG. However calling try_grab_folio() in GUP slow path actually is unnecessary, so the following patch will clean this up. [1] https://lore.kernel.org/linux-mm/821cf1d6-92b9-4ac4-bacc-d8f2364ac14f@paulmck-laptop/ Link: https://lkml.kernel.org/r/20240625205350.1777481-1-yang@os.amperecomputing.com Fixes: 57edfcfd3419 ("mm/gup: accelerate thp gup even for "pages != NULL"") Signed-off-by: Yang Shi Reported-by: kernel test robot Tested-by: Oliver Sang Acked-by: Peter Xu Acked-by: David Hildenbrand Cc: Christoph Lameter Cc: Matthew Wilcox (Oracle) Cc: Paul E. McKenney Cc: Rik van Riel Cc: Vivek Kasireddy Cc: [6.6+] Signed-off-by: Andrew Morton --- include/linux/page_ref.h | 49 ++---------------------------------------------- 1 file changed, 2 insertions(+), 47 deletions(-) (limited to 'include') diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 1acf5bac7f50..490d0ad6e56d 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -258,54 +258,9 @@ static inline bool folio_try_get(struct folio *folio) return folio_ref_add_unless(folio, 1, 0); } -static inline bool folio_ref_try_add_rcu(struct folio *folio, int count) -{ -#ifdef CONFIG_TINY_RCU - /* - * The caller guarantees the folio will not be freed from interrupt - * context, so (on !SMP) we only need preemption to be disabled - * and TINY_RCU does that for us. - */ -# ifdef CONFIG_PREEMPT_COUNT - VM_BUG_ON(!in_atomic() && !irqs_disabled()); -# endif - VM_BUG_ON_FOLIO(folio_ref_count(folio) == 0, folio); - folio_ref_add(folio, count); -#else - if (unlikely(!folio_ref_add_unless(folio, count, 0))) { - /* Either the folio has been freed, or will be freed. */ - return false; - } -#endif - return true; -} - -/** - * folio_try_get_rcu - Attempt to increase the refcount on a folio. - * @folio: The folio. - * - * This is a version of folio_try_get() optimised for non-SMP kernels. - * If you are still holding the rcu_read_lock() after looking up the - * page and know that the page cannot have its refcount decreased to - * zero in interrupt context, you can use this instead of folio_try_get(). - * - * Example users include get_user_pages_fast() (as pages are not unmapped - * from interrupt context) and the page cache lookups (as pages are not - * truncated from interrupt context). We also know that pages are not - * frozen in interrupt context for the purposes of splitting or migration. - * - * You can also use this function if you're holding a lock that prevents - * pages being frozen & removed; eg the i_pages lock for the page cache - * or the mmap_lock or page table lock for page tables. In this case, - * it will always succeed, and you could have used a plain folio_get(), - * but it's sometimes more convenient to have a common function called - * from both locked and RCU-protected contexts. - * - * Return: True if the reference count was successfully incremented. - */ -static inline bool folio_try_get_rcu(struct folio *folio) +static inline bool folio_ref_try_add(struct folio *folio, int count) { - return folio_ref_try_add_rcu(folio, 1); + return folio_ref_add_unless(folio, count, 0); } static inline int page_ref_freeze(struct page *page, int count) -- cgit v1.2.3 From 82f0b6f041fad768c28b4ad05a683065412c226e Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 25 Jun 2024 20:16:39 -0400 Subject: mm: prevent derefencing NULL ptr in pfn_section_valid() Commit 5ec8e8ea8b77 ("mm/sparsemem: fix race in accessing memory_section->usage") changed pfn_section_valid() to add a READ_ONCE() call around "ms->usage" to fix a race with section_deactivate() where ms->usage can be cleared. The READ_ONCE() call, by itself, is not enough to prevent NULL pointer dereference. We need to check its value before dereferencing it. Link: https://lkml.kernel.org/r/20240626001639.1350646-1-longman@redhat.com Fixes: 5ec8e8ea8b77 ("mm/sparsemem: fix race in accessing memory_section->usage") Signed-off-by: Waiman Long Cc: Charan Teja Kalla Signed-off-by: Andrew Morton --- include/linux/mmzone.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 586a8f0104d7..1dc6248feb83 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1979,8 +1979,9 @@ static inline int subsection_map_index(unsigned long pfn) static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) { int idx = subsection_map_index(pfn); + struct mem_section_usage *usage = READ_ONCE(ms->usage); - return test_bit(idx, READ_ONCE(ms->usage)->subsection_map); + return usage ? test_bit(idx, usage->subsection_map) : 0; } #else static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn) -- cgit v1.2.3 From 099d90642a711caae377f53309abfe27e8724a8b Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 27 Jun 2024 10:39:49 +1000 Subject: mm/filemap: make MAX_PAGECACHE_ORDER acceptable to xarray Patch series "mm/filemap: Limit page cache size to that supported by xarray", v2. Currently, xarray can't support arbitrary page cache size. More details can be found from the WARN_ON() statement in xas_split_alloc(). In our test whose code is attached below, we hit the WARN_ON() on ARM64 system where the base page size is 64KB and huge page size is 512MB. The issue was reported long time ago and some discussions on it can be found here [1]. [1] https://www.spinics.net/lists/linux-xfs/msg75404.html In order to fix the issue, we need to adjust MAX_PAGECACHE_ORDER to one supported by xarray and avoid PMD-sized page cache if needed. The code changes are suggested by David Hildenbrand. PATCH[1] adjusts MAX_PAGECACHE_ORDER to that supported by xarray PATCH[2-3] avoids PMD-sized page cache in the synchronous readahead path PATCH[4] avoids PMD-sized page cache for shmem files if needed Test program ============ # cat test.c #define _GNU_SOURCE #include #include #include #include #include #include #include #include #define TEST_XFS_FILENAME "/tmp/data" #define TEST_SHMEM_FILENAME "/dev/shm/data" #define TEST_MEM_SIZE 0x20000000 int main(int argc, char **argv) { const char *filename; int fd = 0; void *buf = (void *)-1, *p; int pgsize = getpagesize(); int ret; if (pgsize != 0x10000) { fprintf(stderr, "64KB base page size is required\n"); return -EPERM; } system("echo force > /sys/kernel/mm/transparent_hugepage/shmem_enabled"); system("rm -fr /tmp/data"); system("rm -fr /dev/shm/data"); system("echo 1 > /proc/sys/vm/drop_caches"); /* Open xfs or shmem file */ filename = TEST_XFS_FILENAME; if (argc > 1 && !strcmp(argv[1], "shmem")) filename = TEST_SHMEM_FILENAME; fd = open(filename, O_CREAT | O_RDWR | O_TRUNC); if (fd < 0) { fprintf(stderr, "Unable to open <%s>\n", filename); return -EIO; } /* Extend file size */ ret = ftruncate(fd, TEST_MEM_SIZE); if (ret) { fprintf(stderr, "Error %d to ftruncate()\n", ret); goto cleanup; } /* Create VMA */ buf = mmap(NULL, TEST_MEM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (buf == (void *)-1) { fprintf(stderr, "Unable to mmap <%s>\n", filename); goto cleanup; } fprintf(stdout, "mapped buffer at 0x%p\n", buf); ret = madvise(buf, TEST_MEM_SIZE, MADV_HUGEPAGE); if (ret) { fprintf(stderr, "Unable to madvise(MADV_HUGEPAGE)\n"); goto cleanup; } /* Populate VMA */ ret = madvise(buf, TEST_MEM_SIZE, MADV_POPULATE_WRITE); if (ret) { fprintf(stderr, "Error %d to madvise(MADV_POPULATE_WRITE)\n", ret); goto cleanup; } /* Punch the file to enforce xarray split */ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, TEST_MEM_SIZE - pgsize, pgsize); if (ret) fprintf(stderr, "Error %d to fallocate()\n", ret); cleanup: if (buf != (void *)-1) munmap(buf, TEST_MEM_SIZE); if (fd > 0) close(fd); return 0; } # gcc test.c -o test # cat /proc/1/smaps | grep KernelPageSize | head -n 1 KernelPageSize: 64 kB # ./test shmem : ------------[ cut here ]------------ WARNING: CPU: 17 PID: 5253 at lib/xarray.c:1025 xas_split_alloc+0xf8/0x128 Modules linked in: nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib \ nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct \ nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 \ ip_set nf_tables rfkill nfnetlink vfat fat virtio_balloon \ drm fuse xfs libcrc32c crct10dif_ce ghash_ce sha2_ce sha256_arm64 \ virtio_net sha1_ce net_failover failover virtio_console virtio_blk \ dimlib virtio_mmio CPU: 17 PID: 5253 Comm: test Kdump: loaded Tainted: G W 6.10.0-rc5-gavin+ #12 Hardware name: QEMU KVM Virtual Machine, BIOS edk2-20240524-1.el9 05/24/2024 pstate: 83400005 (Nzcv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--) pc : xas_split_alloc+0xf8/0x128 lr : split_huge_page_to_list_to_order+0x1c4/0x720 sp : ffff80008a92f5b0 x29: ffff80008a92f5b0 x28: ffff80008a92f610 x27: ffff80008a92f728 x26: 0000000000000cc0 x25: 000000000000000d x24: ffff0000cf00c858 x23: ffff80008a92f610 x22: ffffffdfc0600000 x21: 0000000000000000 x20: 0000000000000000 x19: ffffffdfc0600000 x18: 0000000000000000 x17: 0000000000000000 x16: 0000018000000000 x15: 3374004000000000 x14: 0000e00000000000 x13: 0000000000002000 x12: 0000000000000020 x11: 3374000000000000 x10: 3374e1c0ffff6000 x9 : ffffb463a84c681c x8 : 0000000000000003 x7 : 0000000000000000 x6 : ffff00011c976ce0 x5 : ffffb463aa47e378 x4 : 0000000000000000 x3 : 0000000000000cc0 x2 : 000000000000000d x1 : 000000000000000c x0 : 0000000000000000 Call trace: xas_split_alloc+0xf8/0x128 split_huge_page_to_list_to_order+0x1c4/0x720 truncate_inode_partial_folio+0xdc/0x160 shmem_undo_range+0x2bc/0x6a8 shmem_fallocate+0x134/0x430 vfs_fallocate+0x124/0x2e8 ksys_fallocate+0x4c/0xa0 __arm64_sys_fallocate+0x24/0x38 invoke_syscall.constprop.0+0x7c/0xd8 do_el0_svc+0xb4/0xd0 el0_svc+0x44/0x1d8 el0t_64_sync_handler+0x134/0x150 el0t_64_sync+0x17c/0x180 This patch (of 4): The largest page cache order can be HPAGE_PMD_ORDER (13) on ARM64 with 64KB base page size. The xarray entry with this order can't be split as the following error messages indicate. ------------[ cut here ]------------ WARNING: CPU: 35 PID: 7484 at lib/xarray.c:1025 xas_split_alloc+0xf8/0x128 Modules linked in: nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib \ nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct \ nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 \ ip_set rfkill nf_tables nfnetlink vfat fat virtio_balloon drm \ fuse xfs libcrc32c crct10dif_ce ghash_ce sha2_ce sha256_arm64 \ sha1_ce virtio_net net_failover virtio_console virtio_blk failover \ dimlib virtio_mmio CPU: 35 PID: 7484 Comm: test Kdump: loaded Tainted: G W 6.10.0-rc5-gavin+ #9 Hardware name: QEMU KVM Virtual Machine, BIOS edk2-20240524-1.el9 05/24/2024 pstate: 83400005 (Nzcv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--) pc : xas_split_alloc+0xf8/0x128 lr : split_huge_page_to_list_to_order+0x1c4/0x720 sp : ffff800087a4f6c0 x29: ffff800087a4f6c0 x28: ffff800087a4f720 x27: 000000001fffffff x26: 0000000000000c40 x25: 000000000000000d x24: ffff00010625b858 x23: ffff800087a4f720 x22: ffffffdfc0780000 x21: 0000000000000000 x20: 0000000000000000 x19: ffffffdfc0780000 x18: 000000001ff40000 x17: 00000000ffffffff x16: 0000018000000000 x15: 51ec004000000000 x14: 0000e00000000000 x13: 0000000000002000 x12: 0000000000000020 x11: 51ec000000000000 x10: 51ece1c0ffff8000 x9 : ffffbeb961a44d28 x8 : 0000000000000003 x7 : ffffffdfc0456420 x6 : ffff0000e1aa6eb8 x5 : 20bf08b4fe778fca x4 : ffffffdfc0456420 x3 : 0000000000000c40 x2 : 000000000000000d x1 : 000000000000000c x0 : 0000000000000000 Call trace: xas_split_alloc+0xf8/0x128 split_huge_page_to_list_to_order+0x1c4/0x720 truncate_inode_partial_folio+0xdc/0x160 truncate_inode_pages_range+0x1b4/0x4a8 truncate_pagecache_range+0x84/0xa0 xfs_flush_unmap_range+0x70/0x90 [xfs] xfs_file_fallocate+0xfc/0x4d8 [xfs] vfs_fallocate+0x124/0x2e8 ksys_fallocate+0x4c/0xa0 __arm64_sys_fallocate+0x24/0x38 invoke_syscall.constprop.0+0x7c/0xd8 do_el0_svc+0xb4/0xd0 el0_svc+0x44/0x1d8 el0t_64_sync_handler+0x134/0x150 el0t_64_sync+0x17c/0x180 Fix it by decreasing MAX_PAGECACHE_ORDER to the largest supported order by xarray. For this specific case, MAX_PAGECACHE_ORDER is dropped from 13 to 11 when CONFIG_BASE_SMALL is disabled. Link: https://lkml.kernel.org/r/20240627003953.1262512-1-gshan@redhat.com Link: https://lkml.kernel.org/r/20240627003953.1262512-2-gshan@redhat.com Fixes: 793917d997df ("mm/readahead: Add large folio readahead") Signed-off-by: Gavin Shan Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Cc: Darrick J. Wong Cc: Don Dutile Cc: Hugh Dickins Cc: Linus Torvalds Cc: Matthew Wilcox (Oracle) Cc: Ryan Roberts Cc: William Kucharski Cc: Zhenyu Zhang Cc: [5.18+] Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 59f1df0cde5a..a0a026d2d244 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -354,11 +354,18 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) * a good order (that's 1MB if you're using 4kB pages) */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER +#define PREFERRED_MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER #else -#define MAX_PAGECACHE_ORDER 8 +#define PREFERRED_MAX_PAGECACHE_ORDER 8 #endif +/* + * xas_split_alloc() does not support arbitrary orders. This implies no + * 512MB THP on ARM64 with 64KB base page size. + */ +#define MAX_XAS_ORDER (XA_CHUNK_SHIFT * 2 - 1) +#define MAX_PAGECACHE_ORDER min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER) + /** * mapping_set_large_folios() - Indicate the file supports large folios. * @mapping: The file. -- cgit v1.2.3 From 5a4d8944d6b1e1aaaa83ea42c116b520b4ed0394 Mon Sep 17 00:00:00 2001 From: Nhat Pham Date: Thu, 27 Jun 2024 13:17:37 -0700 Subject: cachestat: do not flush stats in recency check syzbot detects that cachestat() is flushing stats, which can sleep, in its RCU read section (see [1]). This is done in the workingset_test_recent() step (which checks if the folio's eviction is recent). Move the stat flushing step to before the RCU read section of cachestat, and skip stat flushing during the recency check. [1]: https://lore.kernel.org/cgroups/000000000000f71227061bdf97e0@google.com/ Link: https://lkml.kernel.org/r/20240627201737.3506959-1-nphamcs@gmail.com Fixes: b00684722262 ("mm: workingset: move the stats flush into workingset_test_recent()") Signed-off-by: Nhat Pham Reported-by: syzbot+b7f13b2d0cc156edf61a@syzkaller.appspotmail.com Closes: https://lore.kernel.org/cgroups/000000000000f71227061bdf97e0@google.com/ Debugged-by: Johannes Weiner Suggested-by: Johannes Weiner Acked-by: Johannes Weiner Acked-by: Shakeel Butt Cc: Al Viro Cc: David Hildenbrand Cc: "Huang, Ying" Cc: Kairui Song Cc: Matthew Wilcox (Oracle) Cc: Ryan Roberts Cc: Yosry Ahmed Cc: [6.8+] Signed-off-by: Andrew Morton --- include/linux/swap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index bd450023b9a4..e685e93ba354 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -354,7 +354,8 @@ static inline swp_entry_t page_swap_entry(struct page *page) } /* linux/mm/workingset.c */ -bool workingset_test_recent(void *shadow, bool file, bool *workingset); +bool workingset_test_recent(void *shadow, bool file, bool *workingset, + bool flush); void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages); void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg); void workingset_refault(struct folio *folio, void *shadow); -- cgit v1.2.3 From bd225530a4c717714722c3731442b78954c765b3 Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Thu, 27 Jun 2024 16:27:05 -0600 Subject: mm/hugetlb_vmemmap: fix race with speculative PFN walkers While investigating HVO for THPs [1], it turns out that speculative PFN walkers like compaction can race with vmemmap modifications, e.g., CPU 1 (vmemmap modifier) CPU 2 (speculative PFN walker) ------------------------------- ------------------------------ Allocates an LRU folio page1 Sees page1 Frees page1 Allocates a hugeTLB folio page2 (page1 being a tail of page2) Updates vmemmap mapping page1 get_page_unless_zero(page1) Even though page1->_refcount is zero after HVO, get_page_unless_zero() can still try to modify this read-only field, resulting in a crash. An independent report [2] confirmed this race. There are two discussed approaches to fix this race: 1. Make RO vmemmap RW so that get_page_unless_zero() can fail without triggering a PF. 2. Use RCU to make sure get_page_unless_zero() either sees zero page->_refcount through the old vmemmap or non-zero page->_refcount through the new one. The second approach is preferred here because: 1. It can prevent illegal modifications to struct page[] that has been HVO'ed; 2. It can be generalized, in a way similar to ZERO_PAGE(), to fix similar races in other places, e.g., arch_remove_memory() on x86 [3], which frees vmemmap mapping offlined struct page[]. While adding synchronize_rcu(), the goal is to be surgical, rather than optimized. Specifically, calls to synchronize_rcu() on the error handling paths can be coalesced, but it is not done for the sake of Simplicity: noticeably, this fix removes ~50% more lines than it adds. According to the hugetlb_optimize_vmemmap section in Documentation/admin-guide/sysctl/vm.rst, enabling HVO makes allocating or freeing hugeTLB pages "~2x slower than before". Having synchronize_rcu() on top makes those operations even worse, and this also affects the user interface /proc/sys/vm/nr_overcommit_hugepages. This is *very* hard to trigger: 1. Most hugeTLB use cases I know of are static, i.e., reserved at boot time, because allocating at runtime is not reliable at all. 2. On top of that, someone has to be very unlucky to get tripped over above, because the race window is so small -- I wasn't able to trigger it with a stress testing that does nothing but that (with THPs though). [1] https://lore.kernel.org/20240229183436.4110845-4-yuzhao@google.com/ [2] https://lore.kernel.org/917FFC7F-0615-44DD-90EE-9F85F8EA9974@linux.dev/ [3] https://lore.kernel.org/be130a96-a27e-4240-ad78-776802f57cad@redhat.com/ Link: https://lkml.kernel.org/r/20240627222705.2974207-1-yuzhao@google.com Signed-off-by: Yu Zhao Acked-by: Muchun Song Cc: David Hildenbrand Cc: Frank van der Linden Cc: Matthew Wilcox (Oracle) Cc: Peter Xu Cc: Yang Shi Cc: Yu Zhao Signed-off-by: Andrew Morton --- include/linux/page_ref.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h index 490d0ad6e56d..8c236c651d1d 100644 --- a/include/linux/page_ref.h +++ b/include/linux/page_ref.h @@ -230,7 +230,13 @@ static inline int folio_ref_dec_return(struct folio *folio) static inline bool page_ref_add_unless(struct page *page, int nr, int u) { - bool ret = atomic_add_unless(&page->_refcount, nr, u); + bool ret = false; + + rcu_read_lock(); + /* avoid writing to the vmemmap area being remapped */ + if (!page_is_fake_head(page) && page_ref_count(page) != u) + ret = atomic_add_unless(&page->_refcount, nr, u); + rcu_read_unlock(); if (page_ref_tracepoint_active(page_ref_mod_unless)) __page_ref_mod_unless(page, nr, ret); -- cgit v1.2.3 From 011f583781fa46699f1d4c4e9c39ad68f05ced2d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 24 Jun 2024 11:39:32 +0200 Subject: genirq/irq_sim: add an extended irq_sim initializer Currently users of the interrupt simulator don't have any way of being notified about interrupts from the simulated domain being requested or released. This causes a problem for one of the users - the GPIO simulator - which is unable to lock the pins as interrupts. Define a structure containing callbacks to be executed on various irq_sim-related events (for now: irq request and release) and provide an extended function for creating simulated interrupt domains that takes it and a pointer to custom user data (to be passed to said callbacks) as arguments. Reviewed-by: Linus Walleij Acked-by: Linus Walleij Link: https://lore.kernel.org/r/20240624093934.17089-2-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- include/linux/irq_sim.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/irq_sim.h b/include/linux/irq_sim.h index ab831e5ae748..89b4d8ff274b 100644 --- a/include/linux/irq_sim.h +++ b/include/linux/irq_sim.h @@ -16,11 +16,28 @@ * requested like normal irqs and enqueued from process context. */ +struct irq_sim_ops { + int (*irq_sim_irq_requested)(struct irq_domain *domain, + irq_hw_number_t hwirq, void *data); + void (*irq_sim_irq_released)(struct irq_domain *domain, + irq_hw_number_t hwirq, void *data); +}; + struct irq_domain *irq_domain_create_sim(struct fwnode_handle *fwnode, unsigned int num_irqs); struct irq_domain *devm_irq_domain_create_sim(struct device *dev, struct fwnode_handle *fwnode, unsigned int num_irqs); +struct irq_domain *irq_domain_create_sim_full(struct fwnode_handle *fwnode, + unsigned int num_irqs, + const struct irq_sim_ops *ops, + void *data); +struct irq_domain * +devm_irq_domain_create_sim_full(struct device *dev, + struct fwnode_handle *fwnode, + unsigned int num_irqs, + const struct irq_sim_ops *ops, + void *data); void irq_domain_remove_sim(struct irq_domain *domain); #endif /* _LINUX_IRQ_SIM_H */ -- cgit v1.2.3 From 162e06871e6dcde861ef608e0c00a8b6a2d35d43 Mon Sep 17 00:00:00 2001 From: Anuj Gupta Date: Thu, 4 Jul 2024 11:45:15 +0530 Subject: block: t10-pi: Return correct ref tag when queue has no integrity profile Commit c6e56cf6b2e7 ("block: move integrity information into queue_limits") changed the ref tag calculation logic. It would break if there is no integrity profile. This in turn causes read/write failures for such cases. Fixes: c6e56cf6b2e7 ("block: move integrity information into queue_limits") Signed-off-by: Anuj Gupta Link: https://lore.kernel.org/r/20240704061515.282343-1-joshi.k@samsung.com Signed-off-by: Jens Axboe --- include/linux/t10-pi.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h index 1773610010eb..2c59fe3efcd4 100644 --- a/include/linux/t10-pi.h +++ b/include/linux/t10-pi.h @@ -39,8 +39,11 @@ struct t10_pi_tuple { static inline u32 t10_pi_ref_tag(struct request *rq) { - unsigned int shift = rq->q->limits.integrity.interval_exp; + unsigned int shift = ilog2(queue_logical_block_size(rq->q)); + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && + rq->q->limits.integrity.interval_exp) + shift = rq->q->limits.integrity.interval_exp; return blk_rq_pos(rq) >> (shift - SECTOR_SHIFT) & 0xffffffff; } @@ -61,8 +64,11 @@ static inline u64 lower_48_bits(u64 n) static inline u64 ext_pi_ref_tag(struct request *rq) { - unsigned int shift = rq->q->limits.integrity.interval_exp; + unsigned int shift = ilog2(queue_logical_block_size(rq->q)); + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && + rq->q->limits.integrity.interval_exp) + shift = rq->q->limits.integrity.interval_exp; return lower_48_bits(blk_rq_pos(rq) >> (shift - SECTOR_SHIFT)); } -- cgit v1.2.3 From c10bc5614ce0027fa2282ad11827629d81957a3a Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 3 Jul 2024 20:44:18 +0200 Subject: ata,scsi: Remove wrappers ata_sas_tport_{add,delete}() The ata_sas_tport_add() and ata_sas_tport_delete() wrappers only exist in order to export the internal libata functions which they wrap. Remove the wrappers and instead export the libata functions directly. Reviewed-by: Hannes Reinecke Reviewed-by: John Garry Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240703184418.723066-12-cassel@kernel.org Signed-off-by: Niklas Cassel --- include/linux/libata.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 7d3bd7c9664a..586f0116d1d7 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1250,8 +1250,8 @@ extern struct ata_port *ata_sas_port_alloc(struct ata_host *, struct ata_port_info *, struct Scsi_Host *); extern void ata_port_probe(struct ata_port *ap); extern void ata_port_free(struct ata_port *ap); -extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap); -extern void ata_sas_tport_delete(struct ata_port *ap); +extern int ata_tport_add(struct device *parent, struct ata_port *ap); +extern void ata_tport_delete(struct ata_port *ap); int ata_sas_device_configure(struct scsi_device *sdev, struct queue_limits *lim, struct ata_port *ap); extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap); -- cgit v1.2.3 From 2199d6ff565d66ccf0ff1ea1c6466c379e7cbb58 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 3 Jul 2024 20:44:19 +0200 Subject: ata: libata: Remove unused function declaration for ata_scsi_detect() Remove unused function declaration for ata_scsi_detect(). Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240703184418.723066-13-cassel@kernel.org Signed-off-by: Niklas Cassel --- include/linux/libata.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 586f0116d1d7..580971e11804 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1082,7 +1082,6 @@ extern int ata_host_activate(struct ata_host *host, int irq, const struct scsi_host_template *sht); extern void ata_host_detach(struct ata_host *host); extern void ata_host_init(struct ata_host *, struct device *, struct ata_port_operations *); -extern int ata_scsi_detect(struct scsi_host_template *sht); extern int ata_scsi_ioctl(struct scsi_device *dev, unsigned int cmd, void __user *arg); #ifdef CONFIG_COMPAT -- cgit v1.2.3 From 23262cce529e9f437ce2fa8fe4c2fa5b2b182318 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 3 Jul 2024 20:44:20 +0200 Subject: ata: libata-core: Remove support for decreasing the number of ports Commit f31871951b38 ("libata: separate out ata_host_alloc() and ata_host_register()") added ata_host_alloc(), where the API allowed a LLD to overallocate the number of ports supplied to ata_host_alloc(), as long as the LLD decreased host->n_ports before calling ata_host_register(). However, this functionally has never ever been used by a single LLD. Because of the current API design, the assignment of ap->print_id is deferred until registration time, which is bad, because that means that the ata_port_*() print functions cannot be used by a LLD until after registration time, which means that a LLD is forced to use a print function that is non-port specific, even for a port specific error. Remove the support for decreasing the number of ports, such that it will be possible to assign ap->print_id earlier. Reviewed-by: Damien Le Moal Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240703184418.723066-14-cassel@kernel.org Signed-off-by: Niklas Cassel --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 580971e11804..b7c5d3f33368 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1069,7 +1069,7 @@ extern int sata_std_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline); extern void ata_std_postreset(struct ata_link *link, unsigned int *classes); -extern struct ata_host *ata_host_alloc(struct device *dev, int max_ports); +extern struct ata_host *ata_host_alloc(struct device *dev, int n_ports); extern struct ata_host *ata_host_alloc_pinfo(struct device *dev, const struct ata_port_info * const * ppi, int n_ports); extern void ata_host_get(struct ata_host *host); -- cgit v1.2.3 From 1dd63a6b573ffb989ca618e60b6469f885454606 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 3 Jul 2024 20:44:22 +0200 Subject: ata: libata-core: Remove local_port_no struct member ap->local_port_no is simply ap->port_no + 1. Since ap->local_port_no can be derived from ap->port_no, there is no need for the ap->local_port_no struct member, so remove ap->local_port_no. Reviewed-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240703184418.723066-16-cassel@kernel.org Signed-off-by: Niklas Cassel --- include/linux/libata.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index b7c5d3f33368..84a7bfbac9fa 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -814,7 +814,6 @@ struct ata_port { /* Flags that change dynamically, protected by ap->lock */ unsigned int pflags; /* ATA_PFLAG_xxx */ unsigned int print_id; /* user visible unique port ID */ - unsigned int local_port_no; /* host local port num */ unsigned int port_no; /* 0 based port no. inside the host */ #ifdef CONFIG_ATA_SFF -- cgit v1.2.3 From 0d3603acffe294ffacaf75b18b46f182b68a50df Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Wed, 3 Jul 2024 20:44:25 +0200 Subject: ata,scsi: Remove wrapper ata_sas_port_alloc() The ata_sas_port_alloc() wrapper mainly exists in order to export the internal libata function which it wraps. The secondary reason is that it initializes some ata_port struct members. However, ata_sas_port_alloc() is only used in a single location, sas_ata_init(), which already performs some ata_port struct member initialization, so it does not make sense to spread this initialization out over two separate locations. Thus, remove the wrapper and instead export the libata function directly, and move the libsas specific ata_port initialization to sas_ata_init(), which already does some ata_port initialization. Reviewed-by: Hannes Reinecke Reviewed-by: John Garry Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240703184418.723066-19-cassel@kernel.org Signed-off-by: Niklas Cassel --- include/linux/libata.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 84a7bfbac9fa..17394098bee9 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1244,9 +1244,8 @@ extern int sata_link_debounce(struct ata_link *link, extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy, bool spm_wakeup); extern int ata_slave_link_init(struct ata_port *ap); -extern struct ata_port *ata_sas_port_alloc(struct ata_host *, - struct ata_port_info *, struct Scsi_Host *); extern void ata_port_probe(struct ata_port *ap); +extern struct ata_port *ata_port_alloc(struct ata_host *host); extern void ata_port_free(struct ata_port *ap); extern int ata_tport_add(struct device *parent, struct ata_port *ap); extern void ata_tport_delete(struct ata_port *ap); -- cgit v1.2.3 From a1944676767e855869b6af8e1c7e185372feaf31 Mon Sep 17 00:00:00 2001 From: Gerhard Engleder Date: Sun, 30 Jun 2024 21:47:39 +0200 Subject: misc: keba: Add basic KEBA CP500 system FPGA support The KEBA CP500 system FPGA is a PCIe device, which consists of multiple IP cores. Every IP core has its own auxiliary driver. The cp500 driver registers an auxiliary device for each device and the corresponding drivers are loaded by the Linux driver infrastructure. Currently 3 variants of this device exists. Every variant has its own PCI device ID, which is used to determine the list of available IP cores. In this first version only the auxiliary device for the I2C controller is registered. Besides the auxiliary device registration some other basic functions of the FPGA are implemented; e.g, FPGA version sysfs file, keep FPGA configuration on reset sysfs file, error message for errors on the internal AXI bus of the FPGA. Signed-off-by: Gerhard Engleder Link: https://lore.kernel.org/r/20240630194740.7137-2-gerhard@engleder-embedded.com Signed-off-by: Greg Kroah-Hartman --- include/linux/misc/keba.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 include/linux/misc/keba.h (limited to 'include') diff --git a/include/linux/misc/keba.h b/include/linux/misc/keba.h new file mode 100644 index 000000000000..323b31a847c5 --- /dev/null +++ b/include/linux/misc/keba.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2024, KEBA Industrial Automation Gmbh */ + +#ifndef _LINUX_MISC_KEBA_H +#define _LINUX_MISC_KEBA_H + +#include + +struct i2c_board_info; + +/** + * struct keba_i2c_auxdev - KEBA I2C auxiliary device + * @auxdev: auxiliary device object + * @io: address range of I2C controller IO memory + * @info_size: number of I2C devices to be probed + * @info: I2C devices to be probed + */ +struct keba_i2c_auxdev { + struct auxiliary_device auxdev; + struct resource io; + int info_size; + struct i2c_board_info *info; +}; + +#endif /* _LINUX_MISC_KEBA_H */ -- cgit v1.2.3 From 37797c605da33445adc112561695f70bfaa11133 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Mon, 1 Jul 2024 13:30:34 +0200 Subject: soc: fsl: cpm1: qmc: Introduce functions to get a channel from a phandle list qmc_chan_get_byphandle() and the resource managed version retrieve a channel from a simple phandle. Extend the API and introduce qmc_chan_get_byphandles_index() and the resource managed version in order to retrieve a channel from a phandle list using the provided index to identify the phandle in the list. Also update qmc_chan_get_byphandle() and the resource managed version to use qmc_chan_get_byphandles_index() and so avoid code duplication. Signed-off-by: Herve Codina Link: https://patch.msgid.link/20240701113038.55144-8-herve.codina@bootlin.com Signed-off-by: Mark Brown --- include/soc/fsl/qe/qmc.h | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/soc/fsl/qe/qmc.h b/include/soc/fsl/qe/qmc.h index 2a333fc1ea81..0fa7205145ce 100644 --- a/include/soc/fsl/qe/qmc.h +++ b/include/soc/fsl/qe/qmc.h @@ -16,11 +16,30 @@ struct device_node; struct device; struct qmc_chan; -struct qmc_chan *qmc_chan_get_byphandle(struct device_node *np, const char *phandle_name); +struct qmc_chan *qmc_chan_get_byphandles_index(struct device_node *np, + const char *phandles_name, + int index); +struct qmc_chan *devm_qmc_chan_get_byphandles_index(struct device *dev, + struct device_node *np, + const char *phandles_name, + int index); + +static inline struct qmc_chan *qmc_chan_get_byphandle(struct device_node *np, + const char *phandle_name) +{ + return qmc_chan_get_byphandles_index(np, phandle_name, 0); +} + +static inline struct qmc_chan *devm_qmc_chan_get_byphandle(struct device *dev, + struct device_node *np, + const char *phandle_name) +{ + return devm_qmc_chan_get_byphandles_index(dev, np, phandle_name, 0); +} + struct qmc_chan *qmc_chan_get_bychild(struct device_node *np); void qmc_chan_put(struct qmc_chan *chan); -struct qmc_chan *devm_qmc_chan_get_byphandle(struct device *dev, struct device_node *np, - const char *phandle_name); + struct qmc_chan *devm_qmc_chan_get_bychild(struct device *dev, struct device_node *np); enum qmc_mode { -- cgit v1.2.3 From af8432b2e41abc0a20bdc01a3b144ea7b2f1ee09 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Mon, 1 Jul 2024 13:30:35 +0200 Subject: soc: fsl: cpm1: qmc: Introduce qmc_chan_count_phandles() No function in the QMC API is available to get the number of phandles present in a phandle list. Fill this lack introducing qmc_chan_count_phandles(). Signed-off-by: Herve Codina Link: https://patch.msgid.link/20240701113038.55144-9-herve.codina@bootlin.com Signed-off-by: Mark Brown --- include/soc/fsl/qe/qmc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/soc/fsl/qe/qmc.h b/include/soc/fsl/qe/qmc.h index 0fa7205145ce..294e42ea8d4c 100644 --- a/include/soc/fsl/qe/qmc.h +++ b/include/soc/fsl/qe/qmc.h @@ -16,6 +16,8 @@ struct device_node; struct device; struct qmc_chan; +int qmc_chan_count_phandles(struct device_node *np, const char *phandles_name); + struct qmc_chan *qmc_chan_get_byphandles_index(struct device_node *np, const char *phandles_name, int index); -- cgit v1.2.3 From 4acab508eb03dc1827db6005764de29299e07569 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 3 Jul 2024 10:31:41 +0200 Subject: thermal: core: constify 'type' in devm_thermal_of_cooling_device_register() The 'type' string passed to thermal_of_cooling_device_register() is a 'const char *', so do the same in the devm interface. Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20240703083141.96013-1-krzysztof.kozlowski@linaro.org Signed-off-by: Rafael J. Wysocki --- include/linux/thermal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index f1155c0439c4..f732dab20368 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -261,7 +261,7 @@ thermal_of_cooling_device_register(struct device_node *np, const char *, void *, struct thermal_cooling_device * devm_thermal_of_cooling_device_register(struct device *dev, struct device_node *np, - char *type, void *devdata, + const char *type, void *devdata, const struct thermal_cooling_device_ops *ops); void thermal_cooling_device_update(struct thermal_cooling_device *); void thermal_cooling_device_unregister(struct thermal_cooling_device *); @@ -305,7 +305,7 @@ thermal_of_cooling_device_register(struct device_node *np, static inline struct thermal_cooling_device * devm_thermal_of_cooling_device_register(struct device *dev, struct device_node *np, - char *type, void *devdata, + const char *type, void *devdata, const struct thermal_cooling_device_ops *ops) { return ERR_PTR(-ENODEV); -- cgit v1.2.3 From 14678219cf4093e897ab353fd78eab7994d1be7d Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 14:34:35 +0800 Subject: iommu: Introduce domain attachment handle Currently, when attaching a domain to a device or its PASID, domain is stored within the iommu group. It could be retrieved for use during the window between attachment and detachment. With new features introduced, there's a need to store more information than just a domain pointer. This information essentially represents the association between a domain and a device. For example, the SVA code already has a custom struct iommu_sva which represents a bond between sva domain and a PASID of a device. Looking forward, the IOMMUFD needs a place to store the iommufd_device pointer in the core, so that the device object ID could be quickly retrieved in the critical fault handling path. Introduce domain attachment handle that explicitly represents the attachment relationship between a domain and a device or its PASID. Co-developed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240702063444.105814-2-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- include/linux/iommu.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 17b3f36ad843..afc5af0069bb 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -989,12 +989,22 @@ struct iommu_fwspec { /* ATS is supported */ #define IOMMU_FWSPEC_PCI_RC_ATS (1 << 0) +/* + * An iommu attach handle represents a relationship between an iommu domain + * and a PASID or RID of a device. It is allocated and managed by the component + * that manages the domain and is stored in the iommu group during the time the + * domain is attached. + */ +struct iommu_attach_handle { + struct iommu_domain *domain; +}; + /** * struct iommu_sva - handle to a device-mm bond */ struct iommu_sva { + struct iommu_attach_handle handle; struct device *dev; - struct iommu_domain *domain; struct list_head handle_item; refcount_t users; }; @@ -1052,7 +1062,8 @@ int iommu_device_claim_dma_owner(struct device *dev, void *owner); void iommu_device_release_dma_owner(struct device *dev); int iommu_attach_device_pasid(struct iommu_domain *domain, - struct device *dev, ioasid_t pasid); + struct device *dev, ioasid_t pasid, + struct iommu_attach_handle *handle); void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); struct iommu_domain * @@ -1388,7 +1399,8 @@ static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner) } static inline int iommu_attach_device_pasid(struct iommu_domain *domain, - struct device *dev, ioasid_t pasid) + struct device *dev, ioasid_t pasid, + struct iommu_attach_handle *handle) { return -ENODEV; } -- cgit v1.2.3 From 3e7f57d1ef3f5fbed58974fae38d35e430f57d35 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 14:34:36 +0800 Subject: iommu: Remove sva handle list The struct sva_iommu represents an association between an SVA domain and a PASID of a device. It's stored in the iommu group's pasid array and also tracked by a list in the per-mm data structure. Removes duplicate tracking of sva_iommu by eliminating the list. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240702063444.105814-3-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- include/linux/iommu.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index afc5af0069bb..87ebcc29020e 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1005,14 +1005,12 @@ struct iommu_attach_handle { struct iommu_sva { struct iommu_attach_handle handle; struct device *dev; - struct list_head handle_item; refcount_t users; }; struct iommu_mm_data { u32 pasid; struct list_head sva_domains; - struct list_head sva_handles; }; int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, -- cgit v1.2.3 From 06cdcc32d65759d42c6340700796e2906045b6a5 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 14:34:37 +0800 Subject: iommu: Add attach handle to struct iopf_group Previously, the domain that a page fault targets is stored in an iopf_group, which represents a minimal set of page faults. With the introduction of attach handle, replace the domain with the handle so that the fault handler can obtain more information as needed when handling the faults. iommu_report_device_fault() is currently used for SVA page faults, which handles the page fault in an internal cycle. The domain is retrieved with iommu_get_domain_for_dev_pasid() if the pasid in the fault message is valid. This doesn't work in IOMMUFD case, where if the pasid table of a device is wholly managed by user space, there is no domain attached to the PASID of the device, and all page faults are forwarded through a NESTING domain attaching to RID. Add a static flag in iommu ops, which indicates if the IOMMU driver supports user-managed PASID tables. In the iopf deliver path, if no attach handle found for the iopf PASID, roll back to RID domain when the IOMMU driver supports this capability. iommu_get_domain_for_dev_pasid() is no longer used and can be removed. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240702063444.105814-4-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- include/linux/iommu.h | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 87ebcc29020e..910aec80886e 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -127,7 +127,7 @@ struct iopf_group { /* list node for iommu_fault_param::faults */ struct list_head pending_node; struct work_struct work; - struct iommu_domain *domain; + struct iommu_attach_handle *attach_handle; /* The device's fault data parameter. */ struct iommu_fault_param *fault_param; }; @@ -547,6 +547,10 @@ static inline int __iommu_copy_struct_from_user_array( * @default_domain: If not NULL this will always be set as the default domain. * This should be an IDENTITY/BLOCKED/PLATFORM domain. * Do not use in new drivers. + * @user_pasid_table: IOMMU driver supports user-managed PASID table. There is + * no user domain for each PASID and the I/O page faults are + * forwarded through the user domain attached to the device + * RID. */ struct iommu_ops { bool (*capable)(struct device *dev, enum iommu_cap); @@ -590,6 +594,7 @@ struct iommu_ops { struct iommu_domain *blocked_domain; struct iommu_domain *release_domain; struct iommu_domain *default_domain; + u8 user_pasid_table:1; }; /** @@ -1064,9 +1069,6 @@ int iommu_attach_device_pasid(struct iommu_domain *domain, struct iommu_attach_handle *handle); void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); -struct iommu_domain * -iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, - unsigned int type); ioasid_t iommu_alloc_global_pasid(struct device *dev); void iommu_free_global_pasid(ioasid_t pasid); #else /* CONFIG_IOMMU_API */ @@ -1408,13 +1410,6 @@ static inline void iommu_detach_device_pasid(struct iommu_domain *domain, { } -static inline struct iommu_domain * -iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, - unsigned int type) -{ - return NULL; -} - static inline ioasid_t iommu_alloc_global_pasid(struct device *dev) { return IOMMU_PASID_INVALID; -- cgit v1.2.3 From a27bf2743cb80d3b36b5b43e8e2e702412c41668 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Jun 2024 16:55:35 +0800 Subject: iommu: Add iommu_paging_domain_alloc() interface Commit <17de3f5fdd35> ("iommu: Retire bus ops") removes iommu ops from bus. The iommu subsystem no longer relies on bus for operations. So the bus parameter in iommu_domain_alloc() is no longer relevant. Add a new interface named iommu_paging_domain_alloc(), which explicitly indicates the allocation of a paging domain for DMA managed by a kernel driver. The new interface takes a device pointer as its parameter, that better aligns with the current iommu subsystem. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20240610085555.88197-2-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- include/linux/iommu.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 17b3f36ad843..58ea0935d355 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -780,6 +780,7 @@ extern bool iommu_present(const struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); extern bool iommu_group_has_isolated_msi(struct iommu_group *group); extern struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus); +struct iommu_domain *iommu_paging_domain_alloc(struct device *dev); extern void iommu_domain_free(struct iommu_domain *domain); extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); @@ -1086,6 +1087,11 @@ static inline struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus return NULL; } +static inline struct iommu_domain *iommu_paging_domain_alloc(struct device *dev) +{ + return ERR_PTR(-ENODEV); +} + static inline void iommu_domain_free(struct iommu_domain *domain) { } -- cgit v1.2.3 From 3f7c320916282c26812d70cfe8830abb9e4dc696 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 2 Jul 2024 12:40:48 +0100 Subject: iommu: Resolve fwspec ops automatically There's no real need for callers to resolve ops from a fwnode in order to then pass both to iommu_fwspec_init() - it's simpler and more sensible for that to resolve the ops itself. This in turn means we can centralise the notion of checking for a present driver, and enforce that fwspecs aren't allocated unless and until we know they will be usable. Also use this opportunity to modernise with some "new" helpers that arrived shortly after this code was first written; the generic fwnode_handle_get() clears up that ugly get/put mismatch, while of_fwnode_handle() can now abstract those open-coded dereferences. Tested-by: Jean-Philippe Brucker Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/0e2727adeb8cd73274425322f2f793561bdc927e.1719919669.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- include/acpi/acpi_bus.h | 3 +-- include/linux/iommu.h | 13 ++----------- 2 files changed, 3 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 1a4dfd7a1c4a..9d815837e297 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -736,8 +736,7 @@ struct iommu_ops; bool acpi_dma_supported(const struct acpi_device *adev); enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev); int acpi_iommu_fwspec_init(struct device *dev, u32 id, - struct fwnode_handle *fwnode, - const struct iommu_ops *ops); + struct fwnode_handle *fwnode); int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map); int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, const u32 *input_id); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 17b3f36ad843..81893aad9ee4 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1005,11 +1005,9 @@ struct iommu_mm_data { struct list_head sva_handles; }; -int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, - const struct iommu_ops *ops); +int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode); void iommu_fwspec_free(struct device *dev); int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids); -const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode); static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { @@ -1315,8 +1313,7 @@ static inline void iommu_device_unlink(struct device *dev, struct device *link) } static inline int iommu_fwspec_init(struct device *dev, - struct fwnode_handle *iommu_fwnode, - const struct iommu_ops *ops) + struct fwnode_handle *iommu_fwnode) { return -ENODEV; } @@ -1331,12 +1328,6 @@ static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids, return -ENODEV; } -static inline -const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode) -{ - return NULL; -} - static inline int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) { -- cgit v1.2.3 From 3e36c15fc1cce65cccc93ed16f86d8ff9d2f9992 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 2 Jul 2024 12:40:51 +0100 Subject: iommu: Remove iommu_fwspec ops The ops in iommu_fwspec are only needed for the early configuration and probe process, and by now are easy enough to derive on-demand in those couple of places which need them, so remove the redundant stored copy. Tested-by: Jean-Philippe Brucker Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/55c1410b2cd09531eab4f8e2f18f92a0faa0ea75.1719919669.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- include/linux/iommu.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 81893aad9ee4..11ae1750cb1d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -968,7 +968,6 @@ extern struct iommu_group *generic_single_device_group(struct device *dev); /** * struct iommu_fwspec - per-device IOMMU instance data - * @ops: ops for this device's IOMMU * @iommu_fwnode: firmware handle for this device's IOMMU * @flags: IOMMU_FWSPEC_* flags * @num_ids: number of associated device IDs @@ -979,7 +978,6 @@ extern struct iommu_group *generic_single_device_group(struct device *dev); * consumers. */ struct iommu_fwspec { - const struct iommu_ops *ops; struct fwnode_handle *iommu_fwnode; u32 flags; unsigned int num_ids; -- cgit v1.2.3 From 7640f1a44eba0cc1a41b312080c236f5c668cd50 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 3 Jul 2024 13:06:08 +0300 Subject: printk: Add match_devname_and_update_preferred_console() Let's add match_devname_and_update_preferred_console() for driver subsystems to call during init when the console is ready, and it's character device name is known. For now, we use it only for the serial layer to allow console=DEVNAME:0.0 style hardware based addressing for consoles. The earlier attempt on doing this caused a regression with the kernel command line console order as it added calling __add_preferred_console() again later on during init. A better approach was suggested by Petr where we add the deferred console to the console_cmdline[] and update it later on when the console is ready. Suggested-by: Petr Mladek Co-developed-by: Petr Mladek Signed-off-by: Tony Lindgren Reviewed-by: Petr Mladek Link: https://lore.kernel.org/r/20240703100615.118762-2-tony.lindgren@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/printk.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/printk.h b/include/linux/printk.h index 65c5184470f1..7239976698e4 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -60,6 +60,10 @@ static inline const char *printk_skip_headers(const char *buffer) #define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT #define CONSOLE_LOGLEVEL_QUIET CONFIG_CONSOLE_LOGLEVEL_QUIET +int match_devname_and_update_preferred_console(const char *match, + const char *name, + const short idx); + extern int console_printk[]; #define console_loglevel (console_printk[0]) -- cgit v1.2.3 From 608f6976c309793ceea37292c54b057dab091944 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 26 Jun 2024 07:35:37 -0700 Subject: perf/x86/intel: Support new data source for Lunar Lake A new PEBS data source format is introduced for the p-core of Lunar Lake. The data source field is extended to 8 bits with new encodings. A new layout is introduced into the union intel_x86_pebs_dse. Introduce the lnl_latency_data() to parse the new format. Enlarge the pebs_data_source[] accordingly to include new encodings. Only the mem load and the mem store events can generate the data source. Introduce INTEL_HYBRID_LDLAT_CONSTRAINT and INTEL_HYBRID_STLAT_CONSTRAINT to mark them. Add two new bits for the new cache-related data src, L2_MHB and MSC. The L2_MHB is short for L2 Miss Handling Buffer, which is similar to LFB (Line Fill Buffer), but to track the L2 Cache misses. The MSC stands for the memory-side cache. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andi Kleen Reviewed-by: Ian Rogers Link: https://lkml.kernel.org/r/20240626143545.480761-6-kan.liang@linux.intel.com --- include/uapi/linux/perf_event.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 3a64499b0f5d..4842c36fdf80 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1349,12 +1349,14 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ #define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ #define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -/* 5-0x7 available */ +#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */ +/* 0x7 available */ #define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */ #define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ #define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ -#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ +#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */ #define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ #define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ #define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ -- cgit v1.2.3 From 473b2cf9c4d1711146da1d8574c61e92c6672892 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Thu, 6 Jun 2024 12:56:35 +0530 Subject: PCI: endpoint: Introduce 'epc_deinit' event and notify the EPF drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As like the 'epc_init' event, that is used to signal the EPF drivers about the EPC initialization, let's introduce 'epc_deinit' event that is used to signal EPC deinitialization. The EPC deinitialization applies only when any sort of fundamental reset is supported by the endpoint controller as per the PCIe spec. Reference: PCIe r6.0, sec 4.2.5.9.1 and 6.6.1. Currently, some EPC drivers like pcie-qcom-ep and pcie-tegra194 support PERST# as the fundamental reset. So the 'deinit' event will be notified to the EPF drivers when PERST# assert happens in the above mentioned EPC drivers. The EPF drivers, on receiving the event through the epc_deinit() callback should reset the EPF state machine and also cleanup any configuration that got affected by the fundamental reset like BAR, DMA etc... This change also warrants skipping the cleanups in unbind() if already done in epc_deinit(). Link: https://lore.kernel.org/r/20240606-pci-deinit-v1-2-4395534520dc@linaro.org Signed-off-by: Manivannan Sadhasivam Signed-off-by: Krzysztof Wilczyński Reviewed-by: Niklas Cassel Reviewed-by: Siddharth Vadapalli Reviewed-by: Frank Li --- include/linux/pci-epc.h | 13 +++++++++++++ include/linux/pci-epf.h | 2 ++ 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 11115cd0fe5b..85bdf2adb760 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -197,6 +197,8 @@ struct pci_epc_features { #define to_pci_epc(device) container_of((device), struct pci_epc, dev) +#ifdef CONFIG_PCI_ENDPOINT + #define pci_epc_create(dev, ops) \ __pci_epc_create((dev), (ops), THIS_MODULE) #define devm_pci_epc_create(dev, ops) \ @@ -226,6 +228,7 @@ void pci_epc_linkup(struct pci_epc *epc); void pci_epc_linkdown(struct pci_epc *epc); void pci_epc_init_notify(struct pci_epc *epc); void pci_epc_notify_pending_init(struct pci_epc *epc, struct pci_epf *epf); +void pci_epc_deinit_notify(struct pci_epc *epc); void pci_epc_bus_master_enable_notify(struct pci_epc *epc); void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf, enum pci_epc_interface_type type); @@ -272,4 +275,14 @@ void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc, phys_addr_t *phys_addr, size_t size); void pci_epc_mem_free_addr(struct pci_epc *epc, phys_addr_t phys_addr, void __iomem *virt_addr, size_t size); + +#else +static inline void pci_epc_init_notify(struct pci_epc *epc) +{ +} + +static inline void pci_epc_deinit_notify(struct pci_epc *epc) +{ +} +#endif /* CONFIG_PCI_ENDPOINT */ #endif /* __LINUX_PCI_EPC_H */ diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index dc759eb7157c..0639d4dc8986 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -71,12 +71,14 @@ struct pci_epf_ops { /** * struct pci_epc_event_ops - Callbacks for capturing the EPC events * @epc_init: Callback for the EPC initialization complete event + * @epc_deinit: Callback for the EPC deinitialization event * @link_up: Callback for the EPC link up event * @link_down: Callback for the EPC link down event * @bus_master_enable: Callback for the EPC Bus Master Enable event */ struct pci_epc_event_ops { int (*epc_init)(struct pci_epf *epf); + void (*epc_deinit)(struct pci_epf *epf); int (*link_up)(struct pci_epf *epf); int (*link_down)(struct pci_epf *epf); int (*bus_master_enable)(struct pci_epf *epf); -- cgit v1.2.3 From e269d79c7d35aa3808b1f3c1737d63dab504ddc8 Mon Sep 17 00:00:00 2001 From: Denis Arefev Date: Thu, 13 Jun 2024 12:54:48 +0300 Subject: net: missing check virtio Two missing check in virtio_net_hdr_to_skb() allowed syzbot to crash kernels again 1. After the skb_segment function the buffer may become non-linear (nr_frags != 0), but since the SKBTX_SHARED_FRAG flag is not set anywhere the __skb_linearize function will not be executed, then the buffer will remain non-linear. Then the condition (offset >= skb_headlen(skb)) becomes true, which causes WARN_ON_ONCE in skb_checksum_help. 2. The struct sk_buff and struct virtio_net_hdr members must be mathematically related. (gso_size) must be greater than (needed) otherwise WARN_ON_ONCE. (remainder) must be greater than (needed) otherwise WARN_ON_ONCE. (remainder) may be 0 if division is without remainder. offset+2 (4191) > skb_headlen() (1116) WARNING: CPU: 1 PID: 5084 at net/core/dev.c:3303 skb_checksum_help+0x5e2/0x740 net/core/dev.c:3303 Modules linked in: CPU: 1 PID: 5084 Comm: syz-executor336 Not tainted 6.7.0-rc3-syzkaller-00014-gdf60cee26a2e #0 Hardware name: Google Compute Engine/Google Compute Engine, BIOS Google 11/10/2023 RIP: 0010:skb_checksum_help+0x5e2/0x740 net/core/dev.c:3303 Code: 89 e8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 52 01 00 00 44 89 e2 2b 53 74 4c 89 ee 48 c7 c7 40 57 e9 8b e8 af 8f dd f8 90 <0f> 0b 90 90 e9 87 fe ff ff e8 40 0f 6e f9 e9 4b fa ff ff 48 89 ef RSP: 0018:ffffc90003a9f338 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff888025125780 RCX: ffffffff814db209 RDX: ffff888015393b80 RSI: ffffffff814db216 RDI: 0000000000000001 RBP: ffff8880251257f4 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: 000000000000045c R13: 000000000000105f R14: ffff8880251257f0 R15: 000000000000105d FS: 0000555555c24380(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000002000f000 CR3: 0000000023151000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ip_do_fragment+0xa1b/0x18b0 net/ipv4/ip_output.c:777 ip_fragment.constprop.0+0x161/0x230 net/ipv4/ip_output.c:584 ip_finish_output_gso net/ipv4/ip_output.c:286 [inline] __ip_finish_output net/ipv4/ip_output.c:308 [inline] __ip_finish_output+0x49c/0x650 net/ipv4/ip_output.c:295 ip_finish_output+0x31/0x310 net/ipv4/ip_output.c:323 NF_HOOK_COND include/linux/netfilter.h:303 [inline] ip_output+0x13b/0x2a0 net/ipv4/ip_output.c:433 dst_output include/net/dst.h:451 [inline] ip_local_out+0xaf/0x1a0 net/ipv4/ip_output.c:129 iptunnel_xmit+0x5b4/0x9b0 net/ipv4/ip_tunnel_core.c:82 ipip6_tunnel_xmit net/ipv6/sit.c:1034 [inline] sit_tunnel_xmit+0xed2/0x28f0 net/ipv6/sit.c:1076 __netdev_start_xmit include/linux/netdevice.h:4940 [inline] netdev_start_xmit include/linux/netdevice.h:4954 [inline] xmit_one net/core/dev.c:3545 [inline] dev_hard_start_xmit+0x13d/0x6d0 net/core/dev.c:3561 __dev_queue_xmit+0x7c1/0x3d60 net/core/dev.c:4346 dev_queue_xmit include/linux/netdevice.h:3134 [inline] packet_xmit+0x257/0x380 net/packet/af_packet.c:276 packet_snd net/packet/af_packet.c:3087 [inline] packet_sendmsg+0x24ca/0x5240 net/packet/af_packet.c:3119 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xd5/0x180 net/socket.c:745 __sys_sendto+0x255/0x340 net/socket.c:2190 __do_sys_sendto net/socket.c:2202 [inline] __se_sys_sendto net/socket.c:2198 [inline] __x64_sys_sendto+0xe0/0x1b0 net/socket.c:2198 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x40/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Found by Linux Verification Center (linuxtesting.org) with Syzkaller Fixes: 0f6925b3e8da ("virtio_net: Do not pull payload in skb->head") Signed-off-by: Denis Arefev Message-Id: <20240613095448.27118-1-arefev@swemel.ru> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_net.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 4dfa9b69ca8d..d1d7825318c3 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -56,6 +56,7 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, unsigned int thlen = 0; unsigned int p_off = 0; unsigned int ip_proto; + u64 ret, remainder, gso_size; if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { @@ -98,6 +99,16 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, u32 off = __virtio16_to_cpu(little_endian, hdr->csum_offset); u32 needed = start + max_t(u32, thlen, off + sizeof(__sum16)); + if (hdr->gso_size) { + gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); + ret = div64_u64_rem(skb->len, gso_size, &remainder); + if (!(ret && (hdr->gso_size > needed) && + ((remainder > needed) || (remainder == 0)))) { + return -EINVAL; + } + skb_shinfo(skb)->tx_flags |= SKBFL_SHARED_FRAG; + } + if (!pskb_may_pull(skb, needed)) return -EINVAL; -- cgit v1.2.3 From a1cacb8a8e70c38ec0c78910c668abda99fcb780 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 24 Jun 2024 17:19:56 +0200 Subject: backlight: Add BACKLIGHT_POWER_ constants for power states Duplicate FB_BLANK_ constants as BACKLIGHT_POWER__ constants in the backlight header file. Allows backlight drivers to avoid including the fbdev header file and removes a compile-time dependency between the two subsystems. The new BACKLIGHT_POWER_ constants have the same values as their FB_BLANK_ counterparts. Hence UAPI and internal semantics do not change. The backlight drivers can be converted one by one. Each instance of FB_BLANK_UNBLANK becomes BACKLIGHT_POWER_ON, each of FB_BLANK_POWERDOWN becomes BACKLIGHT_POWER_OFF, and FB_BLANK_NORMAL becomes BACKLIGHT_POWER_REDUCED. Backlight code or drivers do not use FB_BLANK_VSYNC_SUSPEND and FB_BLANK_HSYNC_SUSPEND, so no new constants for these are being added. The semantics of FB_BLANK_NORMAL appear inconsistent. In fbdev, NORMAL means display off with sync enabled. In backlight code, this translates to turn the backlight off, but some drivers interpret it as backlight on. So we keep the current code as is, but mark BACKLIGHT_POWER_REDUCED as deprecated. Drivers should be fixed and the constant removed. This affects ams369fg06 and a few DRM panel drivers. v2: - rename BL_CORE_ power constants to BACKLIGHT_POWER_ (Sam) - fix documentation Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Thompson Link: https://lore.kernel.org/r/20240624152033.25016-2-tzimmermann@suse.de Signed-off-by: Lee Jones --- include/linux/backlight.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/backlight.h b/include/linux/backlight.h index 19a1c0e22629..ea9c1bc8148e 100644 --- a/include/linux/backlight.h +++ b/include/linux/backlight.h @@ -209,15 +209,19 @@ struct backlight_properties { * attribute: /sys/class/backlight//bl_power * When the power property is updated update_status() is called. * - * The possible values are: (0: full on, 1 to 3: power saving - * modes; 4: full off), see FB_BLANK_XXX. + * The possible values are: (0: full on, 4: full off), see + * BACKLIGHT_POWER constants. * - * When the backlight device is enabled @power is set - * to FB_BLANK_UNBLANK. When the backlight device is disabled - * @power is set to FB_BLANK_POWERDOWN. + * When the backlight device is enabled, @power is set to + * BACKLIGHT_POWER_ON. When the backlight device is disabled, + * @power is set to BACKLIGHT_POWER_OFF. */ int power; +#define BACKLIGHT_POWER_ON (0) +#define BACKLIGHT_POWER_OFF (4) +#define BACKLIGHT_POWER_REDUCED (1) // deprecated; don't use in new code + /** * @type: The type of backlight supported. * @@ -346,7 +350,7 @@ static inline int backlight_enable(struct backlight_device *bd) if (!bd) return 0; - bd->props.power = FB_BLANK_UNBLANK; + bd->props.power = BACKLIGHT_POWER_ON; bd->props.state &= ~BL_CORE_FBBLANK; return backlight_update_status(bd); @@ -361,7 +365,7 @@ static inline int backlight_disable(struct backlight_device *bd) if (!bd) return 0; - bd->props.power = FB_BLANK_POWERDOWN; + bd->props.power = BACKLIGHT_POWER_OFF; bd->props.state |= BL_CORE_FBBLANK; return backlight_update_status(bd); @@ -380,7 +384,7 @@ static inline int backlight_disable(struct backlight_device *bd) */ static inline bool backlight_is_blank(const struct backlight_device *bd) { - return bd->props.power != FB_BLANK_UNBLANK || + return bd->props.power != BACKLIGHT_POWER_ON || bd->props.state & (BL_CORE_SUSPENDED | BL_CORE_FBBLANK); } -- cgit v1.2.3 From 326ae03d772de6e082d346466671c3b66de2962d Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 7 May 2024 17:53:56 +0100 Subject: mfd: idt8a340_reg: Start comments with '/*' Several comments in idt8a340_reg.h start with '/**', which denotes the start of a Kernel doc, but are otherwise not Kernel docs. Resolve this conflict by starting these comments with '/*' instead. Flagged by ./scripts/kernel-doc -none Signed-off-by: Simon Horman Link: https://lore.kernel.org/r/20240507-clockmatrix-kernel-doc-v2-1-3138d74192dd@kernel.org Signed-off-by: Lee Jones --- include/linux/mfd/idt8a340_reg.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/idt8a340_reg.h b/include/linux/mfd/idt8a340_reg.h index 0c706085c205..53a222605526 100644 --- a/include/linux/mfd/idt8a340_reg.h +++ b/include/linux/mfd/idt8a340_reg.h @@ -61,7 +61,7 @@ #define HW_Q8_CTRL_SPARE (0xa7d4) #define HW_Q11_CTRL_SPARE (0xa7ec) -/** +/* * Select FOD5 as sync_trigger for Q8 divider. * Transition from logic zero to one * sets trigger to sync Q8 divider. @@ -70,7 +70,7 @@ */ #define Q9_TO_Q8_SYNC_TRIG BIT(1) -/** +/* * Enable FOD5 as driver for clock and sync for Q8 divider. * Enable fanout buffer for FOD5. * @@ -78,7 +78,7 @@ */ #define Q9_TO_Q8_FANOUT_AND_CLOCK_SYNC_ENABLE_MASK (BIT(0) | BIT(2)) -/** +/* * Select FOD6 as sync_trigger for Q11 divider. * Transition from logic zero to one * sets trigger to sync Q11 divider. @@ -87,7 +87,7 @@ */ #define Q10_TO_Q11_SYNC_TRIG BIT(1) -/** +/* * Enable FOD6 as driver for clock and sync for Q11 divider. * Enable fanout buffer for FOD6. * -- cgit v1.2.3 From 13c151a919a876521c16810756764aa38683eb62 Mon Sep 17 00:00:00 2001 From: Andrew Davis Date: Thu, 13 Jun 2024 12:54:30 -0500 Subject: mfd: tps65912: Use devm helper functions to simplify probe This simplifies probe and also allows us to remove the remove callbacks from the core and interface drivers. Do that here. Signed-off-by: Andrew Davis Link: https://lore.kernel.org/r/20240613175430.57698-1-afd@ti.com Signed-off-by: Lee Jones --- include/linux/mfd/tps65912.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/mfd/tps65912.h b/include/linux/mfd/tps65912.h index 860ec0a16c96..e5373c302722 100644 --- a/include/linux/mfd/tps65912.h +++ b/include/linux/mfd/tps65912.h @@ -314,6 +314,5 @@ struct tps65912 { extern const struct regmap_config tps65912_regmap_config; int tps65912_device_init(struct tps65912 *tps); -void tps65912_device_exit(struct tps65912 *tps); #endif /* __LINUX_MFD_TPS65912_H */ -- cgit v1.2.3 From d7636117ca976e217b6452c082e7f7c041f4019c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 5 Jun 2024 22:14:16 +0300 Subject: mfd: lm3533: Move to new GPIO descriptor-based APIs Legacy GPIO APIs are subject to remove. Convert the driver to new APIs. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240605191458.2536819-1-andriy.shevchenko@linux.intel.com Signed-off-by: Lee Jones --- include/linux/mfd/lm3533.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/lm3533.h b/include/linux/mfd/lm3533.h index 77092f6363ad..69059a7a2ce5 100644 --- a/include/linux/mfd/lm3533.h +++ b/include/linux/mfd/lm3533.h @@ -16,6 +16,7 @@ DEVICE_ATTR(_name, S_IRUGO | S_IWUSR , show_##_name, store_##_name) struct device; +struct gpio_desc; struct regmap; struct lm3533 { @@ -23,7 +24,7 @@ struct lm3533 { struct regmap *regmap; - int gpio_hwen; + struct gpio_desc *hwen; int irq; unsigned have_als:1; @@ -69,8 +70,6 @@ enum lm3533_boost_ovp { }; struct lm3533_platform_data { - int gpio_hwen; - enum lm3533_boost_ovp boost_ovp; enum lm3533_boost_freq boost_freq; -- cgit v1.2.3 From 1d845319dc819c39e2ee568f1c6bd0cdb2fbc035 Mon Sep 17 00:00:00 2001 From: Etienne Carriere Date: Mon, 17 Jun 2024 11:20:16 +0200 Subject: dt-bindings: mfd: Dual licensing for st,stpmic1 bindings Change include/dt-bindings/mfd/st,stpmic1.h license model from GPLv2.0 only to dual GPLv2.0 or BSD-2-Clause. I have every legitimacy to request this change on behalf of STMicroelectronics. This change clarifies that this DT binding header file can be shared with software components as bootloaders and OSes that are not published under GPLv2 terms. In CC are all the contributors to this header file. Cc: Pascal Paillet Cc: Lee Jones Cc: Rob Herring Signed-off-by: Etienne Carriere Acked-by: Rob Herring (Arm) Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20240617092016.2958046-1-etienne.carriere@foss.st.com Signed-off-by: Lee Jones --- include/dt-bindings/mfd/st,stpmic1.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/dt-bindings/mfd/st,stpmic1.h b/include/dt-bindings/mfd/st,stpmic1.h index 321cd08797d9..9dd15b9c743e 100644 --- a/include/dt-bindings/mfd/st,stpmic1.h +++ b/include/dt-bindings/mfd/st,stpmic1.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ /* * Copyright (C) STMicroelectronics 2018 - All Rights Reserved * Author: Philippe Peurichard , -- cgit v1.2.3 From d89e8096957e35742c9922d3f6628f24de0d6163 Mon Sep 17 00:00:00 2001 From: Johan Jonker Date: Thu, 27 Jun 2024 23:17:45 +0200 Subject: dt-bindings: clock: rk3188-cru-common: remove CLK_NR_CLKS CLK_NR_CLKS should not be part of the binding. Remove since the kernel code no longer uses it. Signed-off-by: Johan Jonker Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/6f21c09b-e8d2-4749-aca6-572c79df775d@gmail.com Signed-off-by: Heiko Stuebner --- include/dt-bindings/clock/rk3188-cru-common.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/clock/rk3188-cru-common.h b/include/dt-bindings/clock/rk3188-cru-common.h index afad90680fce..01e14ab252a7 100644 --- a/include/dt-bindings/clock/rk3188-cru-common.h +++ b/include/dt-bindings/clock/rk3188-cru-common.h @@ -132,8 +132,6 @@ #define HCLK_VDPU 472 #define HCLK_HDMI 473 -#define CLK_NR_CLKS (HCLK_HDMI + 1) - /* soft-reset indices */ #define SRST_MCORE 2 #define SRST_CORE0 3 -- cgit v1.2.3 From caa93b7c25945d302689de07bd404655db93ae6e Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 3 Jul 2024 13:18:49 +0100 Subject: ethtool: move firmware flashing flag to struct ethtool_netdev_state Commit 31e0aa99dc02 ("ethtool: Veto some operations during firmware flashing process") added a flag module_fw_flash_in_progress to struct net_device. As this is ethtool related state, move it to the recently created struct ethtool_netdev_state, accessed via the 'ethtool' member of struct net_device. Suggested-by: Jakub Kicinski Signed-off-by: Edward Cree Reviewed-by: Michal Kubiak Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20240703121849.652893-1-edward.cree@amd.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 2 ++ include/linux/netdevice.h | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index f74bb0cf8ed1..3a99238ef895 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1107,11 +1107,13 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, * @rss_lock: Protects entries in @rss_ctx. May be taken from * within RTNL. * @wol_enabled: Wake-on-LAN is enabled + * @module_fw_flash_in_progress: Module firmware flashing is in progress. */ struct ethtool_netdev_state { struct xarray rss_ctx; struct mutex rss_lock; unsigned wol_enabled:1; + unsigned module_fw_flash_in_progress:1; }; struct phy_device; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3c719f0d5f5a..93558645c6d0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1989,8 +1989,6 @@ enum netdev_reg_state { * * @threaded: napi threaded mode is enabled * - * @module_fw_flash_in_progress: Module firmware flashing is in progress. - * * @net_notifier_list: List of per-net netdev notifier block * that follow this device when it is moved * to another network namespace. @@ -2376,7 +2374,6 @@ struct net_device { bool proto_down; bool threaded; - unsigned module_fw_flash_in_progress:1; struct list_head net_notifier_list; #if IS_ENABLED(CONFIG_MACSEC) -- cgit v1.2.3 From a61809a33239821d70eba77bd0d6d13c29bbad0d Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Wed, 3 Jul 2024 18:33:14 +0300 Subject: tpm: Address !chip->auth in tpm_buf_append_name() Unless tpm_chip_bootstrap() was called by the driver, !chip->auth can cause a null derefence in tpm_buf_append_name(). Thus, address !chip->auth in tpm_buf_append_name() and remove the fallback implementation for !TCG_TPM2_HMAC. Cc: stable@vger.kernel.org # v6.10+ Reported-by: Stefan Berger Closes: https://lore.kernel.org/linux-integrity/20240617193408.1234365-1-stefanb@linux.ibm.com/ Fixes: d0a25bb961e6 ("tpm: Add HMAC session name/handle append") Tested-by: Michael Ellerman # ppc Signed-off-by: Jarkko Sakkinen --- include/linux/tpm.h | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 21a67dc9efe8..4d3071e885a0 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -490,11 +490,22 @@ static inline void tpm_buf_append_empty_auth(struct tpm_buf *buf, u32 handle) { } #endif + +static inline struct tpm2_auth *tpm2_chip_auth(struct tpm_chip *chip) +{ #ifdef CONFIG_TCG_TPM2_HMAC + return chip->auth; +#else + return NULL; +#endif +} -int tpm2_start_auth_session(struct tpm_chip *chip); void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf, u32 handle, u8 *name); + +#ifdef CONFIG_TCG_TPM2_HMAC + +int tpm2_start_auth_session(struct tpm_chip *chip); void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf, u8 attributes, u8 *passphrase, int passphraselen); @@ -521,14 +532,6 @@ static inline int tpm2_start_auth_session(struct tpm_chip *chip) static inline void tpm2_end_auth_session(struct tpm_chip *chip) { } -static inline void tpm_buf_append_name(struct tpm_chip *chip, - struct tpm_buf *buf, - u32 handle, u8 *name) -{ - tpm_buf_append_u32(buf, handle); - /* count the number of handles in the upper bits of flags */ - buf->handles++; -} static inline void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf, u8 attributes, u8 *passphrase, -- cgit v1.2.3 From 7ca110f2679b7d1f3ac1afc90e6ffbf0af3edf0d Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Wed, 3 Jul 2024 18:47:46 +0300 Subject: tpm: Address !chip->auth in tpm_buf_append_hmac_session*() Unless tpm_chip_bootstrap() was called by the driver, !chip->auth can cause a null derefence in tpm_buf_hmac_session*(). Thus, address !chip->auth in tpm_buf_hmac_session*() and remove the fallback implementation for !TCG_TPM2_HMAC. Cc: stable@vger.kernel.org # v6.9+ Reported-by: Stefan Berger Closes: https://lore.kernel.org/linux-integrity/20240617193408.1234365-1-stefanb@linux.ibm.com/ Fixes: 1085b8276bb4 ("tpm: Add the rest of the session HMAC API") Tested-by: Michael Ellerman # ppc Signed-off-by: Jarkko Sakkinen --- include/linux/tpm.h | 68 ++++++++++++++++------------------------------------- 1 file changed, 20 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 4d3071e885a0..e93ee8d936a9 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -502,10 +502,6 @@ static inline struct tpm2_auth *tpm2_chip_auth(struct tpm_chip *chip) void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf, u32 handle, u8 *name); - -#ifdef CONFIG_TCG_TPM2_HMAC - -int tpm2_start_auth_session(struct tpm_chip *chip); void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf, u8 attributes, u8 *passphrase, int passphraselen); @@ -515,9 +511,27 @@ static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip, u8 *passphrase, int passphraselen) { - tpm_buf_append_hmac_session(chip, buf, attributes, passphrase, - passphraselen); + struct tpm_header *head; + int offset; + + if (tpm2_chip_auth(chip)) { + tpm_buf_append_hmac_session(chip, buf, attributes, passphrase, passphraselen); + } else { + offset = buf->handles * 4 + TPM_HEADER_SIZE; + head = (struct tpm_header *)buf->data; + + /* + * If the only sessions are optional, the command tag must change to + * TPM2_ST_NO_SESSIONS. + */ + if (tpm_buf_length(buf) == offset) + head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS); + } } + +#ifdef CONFIG_TCG_TPM2_HMAC + +int tpm2_start_auth_session(struct tpm_chip *chip); void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf); int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf, int rc); @@ -532,48 +546,6 @@ static inline int tpm2_start_auth_session(struct tpm_chip *chip) static inline void tpm2_end_auth_session(struct tpm_chip *chip) { } -static inline void tpm_buf_append_hmac_session(struct tpm_chip *chip, - struct tpm_buf *buf, - u8 attributes, u8 *passphrase, - int passphraselen) -{ - /* offset tells us where the sessions area begins */ - int offset = buf->handles * 4 + TPM_HEADER_SIZE; - u32 len = 9 + passphraselen; - - if (tpm_buf_length(buf) != offset) { - /* not the first session so update the existing length */ - len += get_unaligned_be32(&buf->data[offset]); - put_unaligned_be32(len, &buf->data[offset]); - } else { - tpm_buf_append_u32(buf, len); - } - /* auth handle */ - tpm_buf_append_u32(buf, TPM2_RS_PW); - /* nonce */ - tpm_buf_append_u16(buf, 0); - /* attributes */ - tpm_buf_append_u8(buf, 0); - /* passphrase */ - tpm_buf_append_u16(buf, passphraselen); - tpm_buf_append(buf, passphrase, passphraselen); -} -static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip, - struct tpm_buf *buf, - u8 attributes, - u8 *passphrase, - int passphraselen) -{ - int offset = buf->handles * 4 + TPM_HEADER_SIZE; - struct tpm_header *head = (struct tpm_header *) buf->data; - - /* - * if the only sessions are optional, the command tag - * must change to TPM2_ST_NO_SESSIONS - */ - if (tpm_buf_length(buf) == offset) - head->tag = cpu_to_be16(TPM2_ST_NO_SESSIONS); -} static inline void tpm_buf_fill_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf) { -- cgit v1.2.3 From 87024f5837485c2f9541283747428df54c0f9183 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 24 Jun 2024 17:58:55 -0700 Subject: mm: memcg: rename soft limit reclaim-related functions Rename exported function related to the softlimit reclaim to have memcg1_ prefix. Link: https://lkml.kernel.org/r/20240625005906.106920-4-roman.gushchin@linux.dev Signed-off-by: Roman Gushchin Acked-by: Michal Hocko Acked-by: Shakeel Butt Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 7403dd5926eb..83c8327455d8 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1121,9 +1121,9 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, void split_page_memcg(struct page *head, int old_order, int new_order); -unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, - gfp_t gfp_mask, - unsigned long *total_scanned); +unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order, + gfp_t gfp_mask, + unsigned long *total_scanned); #else /* CONFIG_MEMCG */ @@ -1572,9 +1572,9 @@ static inline void split_page_memcg(struct page *head, int old_order, int new_or } static inline -unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, - gfp_t gfp_mask, - unsigned long *total_scanned) +unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order, + gfp_t gfp_mask, + unsigned long *total_scanned) { return 0; } -- cgit v1.2.3 From 66d60c428b23c5b171218985b6880958fb7edbe3 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 24 Jun 2024 17:58:58 -0700 Subject: mm: memcg: move legacy memcg event code into memcontrol-v1.c Cgroup v1's memory controller contains a pretty complicated event notifications mechanism which is not used on cgroup v2. Let's move the corresponding code into memcontrol-v1.c. Please, note, that mem_cgroup_event_ratelimit() remains in memcontrol.c, otherwise it would require exporting too many details on memcg stats outside of memcontrol.c. Link: https://lkml.kernel.org/r/20240625005906.106920-7-roman.gushchin@linux.dev Signed-off-by: Roman Gushchin Acked-by: Michal Hocko Acked-by: Shakeel Butt Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 83c8327455d8..588179d29849 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -69,18 +69,6 @@ struct mem_cgroup_id { refcount_t ref; }; -/* - * Per memcg event counter is incremented at every pagein/pageout. With THP, - * it will be incremented by the number of pages. This counter is used - * to trigger some periodic events. This is straightforward and better - * than using jiffies etc. to handle periodic memcg event. - */ -enum mem_cgroup_events_target { - MEM_CGROUP_TARGET_THRESH, - MEM_CGROUP_TARGET_SOFTLIMIT, - MEM_CGROUP_NTARGETS, -}; - struct memcg_vmstats_percpu; struct memcg_vmstats; struct lruvec_stats_percpu; -- cgit v1.2.3 From 6f1173d6845974aeb654a7f070c0c9f21283e1b3 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 24 Jun 2024 17:59:04 -0700 Subject: mm: memcg: group cgroup v1 memcg related declarations Group all cgroup v1-related declarations at the end of memcontrol.h and mm/memcontrol-v1.h with an intention to put them all together under a config option later on. It should make things easier to follow and maintain too. Link: https://lkml.kernel.org/r/20240625005906.106920-13-roman.gushchin@linux.dev Signed-off-by: Roman Gushchin Acked-by: Michal Hocko Acked-by: Shakeel Butt Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 144 ++++++++++++++++++++++++--------------------- 1 file changed, 76 insertions(+), 68 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 588179d29849..a70d64ed04f5 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -950,39 +950,13 @@ static inline void mem_cgroup_exit_user_fault(void) current->in_user_fault = 0; } -static inline bool task_in_memcg_oom(struct task_struct *p) -{ - return p->memcg_in_oom; -} - -bool mem_cgroup_oom_synchronize(bool wait); struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim, struct mem_cgroup *oom_domain); void mem_cgroup_print_oom_group(struct mem_cgroup *memcg); -void folio_memcg_lock(struct folio *folio); -void folio_memcg_unlock(struct folio *folio); - void __mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx, int val); -/* try to stablize folio_memcg() for all the pages in a memcg */ -static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg) -{ - rcu_read_lock(); - - if (mem_cgroup_disabled() || !atomic_read(&memcg->moving_account)) - return true; - - rcu_read_unlock(); - return false; -} - -static inline void mem_cgroup_unlock_pages(void) -{ - rcu_read_unlock(); -} - /* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx, int val) @@ -1109,10 +1083,6 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, void split_page_memcg(struct page *head, int old_order, int new_order); -unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order, - gfp_t gfp_mask, - unsigned long *total_scanned); - #else /* CONFIG_MEMCG */ #define MEM_CGROUP_ID_SHIFT 0 @@ -1423,26 +1393,6 @@ mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg) { } -static inline void folio_memcg_lock(struct folio *folio) -{ -} - -static inline void folio_memcg_unlock(struct folio *folio) -{ -} - -static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg) -{ - /* to match folio_memcg_rcu() */ - rcu_read_lock(); - return true; -} - -static inline void mem_cgroup_unlock_pages(void) -{ - rcu_read_unlock(); -} - static inline void mem_cgroup_handle_over_high(gfp_t gfp_mask) { } @@ -1455,16 +1405,6 @@ static inline void mem_cgroup_exit_user_fault(void) { } -static inline bool task_in_memcg_oom(struct task_struct *p) -{ - return false; -} - -static inline bool mem_cgroup_oom_synchronize(bool wait) -{ - return false; -} - static inline struct mem_cgroup *mem_cgroup_get_oom_group( struct task_struct *victim, struct mem_cgroup *oom_domain) { @@ -1558,14 +1498,6 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx) static inline void split_page_memcg(struct page *head, int old_order, int new_order) { } - -static inline -unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order, - gfp_t gfp_mask, - unsigned long *total_scanned) -{ - return 0; -} #endif /* CONFIG_MEMCG */ /* @@ -1916,4 +1848,80 @@ static inline bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg) } #endif + +/* Cgroup v1-related declarations */ + +#ifdef CONFIG_MEMCG +unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order, + gfp_t gfp_mask, + unsigned long *total_scanned); + +bool mem_cgroup_oom_synchronize(bool wait); + +static inline bool task_in_memcg_oom(struct task_struct *p) +{ + return p->memcg_in_oom; +} + +void folio_memcg_lock(struct folio *folio); +void folio_memcg_unlock(struct folio *folio); + +/* try to stablize folio_memcg() for all the pages in a memcg */ +static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg) +{ + rcu_read_lock(); + + if (mem_cgroup_disabled() || !atomic_read(&memcg->moving_account)) + return true; + + rcu_read_unlock(); + return false; +} + +static inline void mem_cgroup_unlock_pages(void) +{ + rcu_read_unlock(); +} + +#else /* CONFIG_MEMCG */ +static inline +unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order, + gfp_t gfp_mask, + unsigned long *total_scanned) +{ + return 0; +} + +static inline void folio_memcg_lock(struct folio *folio) +{ +} + +static inline void folio_memcg_unlock(struct folio *folio) +{ +} + +static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg) +{ + /* to match folio_memcg_rcu() */ + rcu_read_lock(); + return true; +} + +static inline void mem_cgroup_unlock_pages(void) +{ + rcu_read_unlock(); +} + +static inline bool task_in_memcg_oom(struct task_struct *p) +{ + return false; +} + +static inline bool mem_cgroup_oom_synchronize(bool wait) +{ + return false; +} + +#endif /* CONFIG_MEMCG */ + #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.3 From e93d4166b40a84df83c4d5cb4c709d022808ef9b Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 24 Jun 2024 17:59:05 -0700 Subject: mm: memcg: put cgroup v1-specific code under a config option Put legacy cgroup v1 memory controller code under a new CONFIG_MEMCG_V1 config option. The option is turned off by default. Nobody except those who are still using cgroup v1 should turn it on. If the option is not set, memory controller can still be mounted under cgroup v1, but none of memcg-specific control files are present. Please note, that not all cgroup v1's memory controller code is guarded yet (but most of it), it's a subject for some follow-up work. Thanks to Michal Hocko for providing a better Kconfig option description. [roman.gushchin@linux.dev: better config option description provided by Michal] Link: https://lkml.kernel.org/r/ZnxXNtvqllc9CDoo@google.com Link: https://lkml.kernel.org/r/20240625005906.106920-14-roman.gushchin@linux.dev Signed-off-by: Roman Gushchin Acked-by: Michal Hocko Acked-by: Shakeel Butt Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index a70d64ed04f5..796cfa842346 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1851,7 +1851,7 @@ static inline bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg) /* Cgroup v1-related declarations */ -#ifdef CONFIG_MEMCG +#ifdef CONFIG_MEMCG_V1 unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, unsigned long *total_scanned); @@ -1883,7 +1883,7 @@ static inline void mem_cgroup_unlock_pages(void) rcu_read_unlock(); } -#else /* CONFIG_MEMCG */ +#else /* CONFIG_MEMCG_V1 */ static inline unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, @@ -1922,6 +1922,6 @@ static inline bool mem_cgroup_oom_synchronize(bool wait) return false; } -#endif /* CONFIG_MEMCG */ +#endif /* CONFIG_MEMCG_V1 */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.3 From 410abb20acaea9679fc1b2276d47e70fba331451 Mon Sep 17 00:00:00 2001 From: Dan Schatzberg Date: Wed, 3 Jan 2024 08:48:36 -0800 Subject: mm: add defines for min/max swappiness Patch series "Add swappiness argument to memory.reclaim", v6. This patch proposes augmenting the memory.reclaim interface with a swappiness= argument that overrides the swappiness value for that instance of proactive reclaim. Userspace proactive reclaimers use the memory.reclaim interface to trigger reclaim. The memory.reclaim interface does not allow for any way to effect the balance of file vs anon during proactive reclaim. The only approach is to adjust the vm.swappiness setting. However, there are a few reasons we look to control the balance of file vs anon during proactive reclaim, separately from reactive reclaim: * Swapout should be limited to manage SSD write endurance. In near-OOM situations we are fine with lots of swap-out to avoid OOMs. As these are typically rare events, they have relatively little impact on write endurance. However, proactive reclaim runs continuously and so its impact on SSD write endurance is more significant. Therefore it is desireable to control swap-out for proactive reclaim separately from reactive reclaim * Some userspace OOM killers like systemd-oomd[1] support OOM killing on swap exhaustion. This makes sense if the swap exhaustion is triggered due to reactive reclaim but less so if it is triggered due to proactive reclaim (e.g. one could see OOMs when free memory is ample but anon is just particularly cold). Therefore, it's desireable to have proactive reclaim reduce or stop swap-out before the threshold at which OOM killing occurs. In the case of Meta's Senpai proactive reclaimer, we adjust vm.swappiness before writes to memory.reclaim[2]. This has been in production for nearly two years and has addressed our needs to control proactive vs reactive reclaim behavior but is still not ideal for a number of reasons: * vm.swappiness is a global setting, adjusting it can race/interfere with other system administration that wishes to control vm.swappiness. In our case, we need to disable Senpai before adjusting vm.swappiness. * vm.swappiness is stateful - so a crash or restart of Senpai can leave a misconfigured setting. This requires some additional management to record the "desired" setting and ensure Senpai always adjusts to it. With this patch, we avoid these downsides of adjusting vm.swappiness globally. Previously, this exact interface addition was proposed by Yosry[3]. In response, Roman proposed instead an interface to specify precise file/anon/slab reclaim amounts[4]. More recently Huan also proposed this as well[5] and others similarly questioned if this was the proper interface. Previous proposals sought to use this to allow proactive reclaimers to effectively perform a custom reclaim algorithm by issuing proactive reclaim with different settings to control file vs anon reclaim (e.g. to only reclaim anon from some applications). Responses argued that adjusting swappiness is a poor interface for custom reclaim. In contrast, I argue in favor of a swappiness setting not as a way to implement custom reclaim algorithms but rather to bias the balance of anon vs file due to differences of proactive vs reactive reclaim. In this context, swappiness is the existing interface for controlling this balance and this patch simply allows for it to be configured differently for proactive vs reactive reclaim. Specifying explicit amounts of anon vs file pages to reclaim feels inappropriate for this prupose. Proactive reclaimers are un-aware of the relative age of file vs anon for a cgroup which makes it difficult to manage proactive reclaim of different memory pools. A proactive reclaimer would need some amount of anon reclaim attempts separate from the amount of file reclaim attempts which seems brittle given that it's difficult to observe the impact. [1]https://www.freedesktop.org/software/systemd/man/latest/systemd-oomd.service.html [2]https://github.com/facebookincubator/oomd/blob/main/src/oomd/plugins/Senpai.cpp#L585-L598 [3]https://lore.kernel.org/linux-mm/CAJD7tkbDpyoODveCsnaqBBMZEkDvshXJmNdbk51yKSNgD7aGdg@mail.gmail.com/ [4]https://lore.kernel.org/linux-mm/YoPHtHXzpK51F%2F1Z@carbon/ [5]https://lore.kernel.org/lkml/20231108065818.19932-1-link@vivo.com/ This patch (of 2): We use the constants 0 and 200 in a few places in the mm code when referring to the min and max swappiness. This patch adds MIN_SWAPPINESS and MAX_SWAPPINESS #defines to improve clarity. There are no functional changes. Link: https://lkml.kernel.org/r/20240103164841.2800183-1-schatzberg.dan@gmail.com Link: https://lkml.kernel.org/r/20240103164841.2800183-2-schatzberg.dan@gmail.com Signed-off-by: Dan Schatzberg Acked-by: David Rientjes Acked-by: Chris Li Reviewed-by: Nhat Pham Cc: David Hildenbrand Cc: Hugh Dickins Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kefeng Wang Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin Cc: Shakeel Butt Cc: Shakeel Butt Cc: Tejun Heo Cc: Yosry Ahmed Cc: Yue Zhao Cc: Zefan Li Signed-off-by: Andrew Morton --- include/linux/swap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index d33ce740b695..e97db0fa7659 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -404,6 +404,8 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, #define MEMCG_RECLAIM_MAY_SWAP (1 << 1) #define MEMCG_RECLAIM_PROACTIVE (1 << 2) +#define MIN_SWAPPINESS 0 +#define MAX_SWAPPINESS 200 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, -- cgit v1.2.3 From 68cd9050d871e4db5433420b5ceb32f5512d18bc Mon Sep 17 00:00:00 2001 From: Dan Schatzberg Date: Wed, 3 Jan 2024 08:48:37 -0800 Subject: mm: add swappiness= arg to memory.reclaim Allow proactive reclaimers to submit an additional swappiness= argument to memory.reclaim. This overrides the global or per-memcg swappiness setting for that reclaim attempt. For example: echo "2M swappiness=0" > /sys/fs/cgroup/memory.reclaim will perform reclaim on the rootcg with a swappiness setting of 0 (no swap) regardless of the vm.swappiness sysctl setting. Userspace proactive reclaimers use the memory.reclaim interface to trigger reclaim. The memory.reclaim interface does not allow for any way to effect the balance of file vs anon during proactive reclaim. The only approach is to adjust the vm.swappiness setting. However, there are a few reasons we look to control the balance of file vs anon during proactive reclaim, separately from reactive reclaim: * Swapout should be limited to manage SSD write endurance. In near-OOM situations we are fine with lots of swap-out to avoid OOMs. As these are typically rare events, they have relatively little impact on write endurance. However, proactive reclaim runs continuously and so its impact on SSD write endurance is more significant. Therefore it is desireable to control swap-out for proactive reclaim separately from reactive reclaim * Some userspace OOM killers like systemd-oomd[1] support OOM killing on swap exhaustion. This makes sense if the swap exhaustion is triggered due to reactive reclaim but less so if it is triggered due to proactive reclaim (e.g. one could see OOMs when free memory is ample but anon is just particularly cold). Therefore, it's desireable to have proactive reclaim reduce or stop swap-out before the threshold at which OOM killing occurs. In the case of Meta's Senpai proactive reclaimer, we adjust vm.swappiness before writes to memory.reclaim[2]. This has been in production for nearly two years and has addressed our needs to control proactive vs reactive reclaim behavior but is still not ideal for a number of reasons: * vm.swappiness is a global setting, adjusting it can race/interfere with other system administration that wishes to control vm.swappiness. In our case, we need to disable Senpai before adjusting vm.swappiness. * vm.swappiness is stateful - so a crash or restart of Senpai can leave a misconfigured setting. This requires some additional management to record the "desired" setting and ensure Senpai always adjusts to it. With this patch, we avoid these downsides of adjusting vm.swappiness globally. [1]https://www.freedesktop.org/software/systemd/man/latest/systemd-oomd.service.html [2]https://github.com/facebookincubator/oomd/blob/main/src/oomd/plugins/Senpai.cpp#L585-L598 Link: https://lkml.kernel.org/r/20240103164841.2800183-3-schatzberg.dan@gmail.com Signed-off-by: Dan Schatzberg Suggested-by: Yosry Ahmed Acked-by: Michal Hocko Acked-by: David Rientjes Acked-by: Chris Li Cc: David Hildenbrand Cc: Hugh Dickins Cc: Johannes Weiner Cc: Jonathan Corbet Cc: Kefeng Wang Cc: Matthew Wilcox (Oracle) Cc: Muchun Song Cc: Roman Gushchin Cc: Shakeel Butt Cc: Shakeel Butt Cc: Tejun Heo Cc: Yue Zhao Cc: Zefan Li Cc: Nhat Pham Signed-off-by: Andrew Morton --- include/linux/swap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index e97db0fa7659..3df75d62a835 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -409,7 +409,8 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, - unsigned int reclaim_options); + unsigned int reclaim_options, + int *swappiness); extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, gfp_t gfp_mask, bool noswap, pg_data_t *pgdat, -- cgit v1.2.3 From 773e9ae77fe777ac09739d8c32b32fff147f2d66 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 28 Jun 2024 21:03:10 +0000 Subject: mm: memcg: factor out legacy socket memory accounting code Move out the legacy cgroup v1 socket memory accounting code into mm/memcontrol-v1.c. This commit introduces three new functions: memcg1_tcpmem_active(), memcg1_charge_skmem() and memcg1_uncharge_skmem(), which contain all cgroup v1-specific code and become trivial if CONFIG_MEMCG_V1 isn't set. Note, that !!memcg->tcpmem_pressure check in mem_cgroup_under_socket_pressure() can't be easily moved into memcontrol-v1.h without including memcontrol-v1.h from memcontrol.h which isn't a good idea, so it's better to just #ifdef it. Link: https://lkml.kernel.org/r/20240628210317.272856-3-roman.gushchin@linux.dev Signed-off-by: Roman Gushchin Acked-by: Shakeel Butt Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 796cfa842346..44ab6394c9ed 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1650,8 +1650,10 @@ void mem_cgroup_sk_alloc(struct sock *sk); void mem_cgroup_sk_free(struct sock *sk); static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { +#ifdef CONFIG_MEMCG_V1 if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) return !!memcg->tcpmem_pressure; +#endif /* CONFIG_MEMCG_V1 */ do { if (time_before(jiffies, READ_ONCE(memcg->socket_pressure))) return true; -- cgit v1.2.3 From 94b7e5bf09b08aa4cce4625d0a4b307399b77e2c Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 28 Jun 2024 21:03:14 +0000 Subject: mm: memcg: put memcg1-specific struct mem_cgroup's members under CONFIG_MEMCG_V1 Put memcg1-specific members of struct mem_cgroup under the CONFIG_MEMCG_V1 config option. Also group them close to the end of struct mem_cgroup just before the dynamic per-node part. Link: https://lkml.kernel.org/r/20240628210317.272856-7-roman.gushchin@linux.dev Signed-off-by: Roman Gushchin Acked-by: Shakeel Butt Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 103 +++++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 44ab6394c9ed..107b0c5d6eab 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -188,10 +188,6 @@ struct mem_cgroup { struct page_counter memsw; /* v1 only */ }; - /* Legacy consumer-oriented counters */ - struct page_counter kmem; /* v1 only */ - struct page_counter tcpmem; /* v1 only */ - /* Range enforcement for interrupt charges */ struct work_struct high_work; @@ -205,8 +201,6 @@ struct mem_cgroup { bool zswap_writeback; #endif - unsigned long soft_limit; - /* vmpressure notifications */ struct vmpressure vmpressure; @@ -215,13 +209,7 @@ struct mem_cgroup { */ bool oom_group; - /* protected by memcg_oom_lock */ - bool oom_lock; - int under_oom; - - int swappiness; - /* OOM-Killer disable */ - int oom_kill_disable; + int swappiness; /* memory.events and memory.events.local */ struct cgroup_file events_file; @@ -230,27 +218,6 @@ struct mem_cgroup { /* handle for "memory.swap.events" */ struct cgroup_file swap_events_file; - /* protect arrays of thresholds */ - struct mutex thresholds_lock; - - /* thresholds for memory usage. RCU-protected */ - struct mem_cgroup_thresholds thresholds; - - /* thresholds for mem+swap usage. RCU-protected */ - struct mem_cgroup_thresholds memsw_thresholds; - - /* For oom notifier event fd */ - struct list_head oom_notify; - - /* - * Should we move charges of a task when a task is moved into this - * mem_cgroup ? And what type of charges should we move ? - */ - unsigned long move_charge_at_immigrate; - /* taken only while moving_account > 0 */ - spinlock_t move_lock; - unsigned long move_lock_flags; - CACHELINE_PADDING(_pad1_); /* memory.stat */ @@ -267,10 +234,6 @@ struct mem_cgroup { */ unsigned long socket_pressure; - /* Legacy tcp memory accounting */ - bool tcpmem_active; - int tcpmem_pressure; - #ifdef CONFIG_MEMCG_KMEM int kmemcg_id; /* @@ -284,14 +247,6 @@ struct mem_cgroup { struct list_head objcg_list; #endif - CACHELINE_PADDING(_pad2_); - - /* - * set > 0 if pages under this cgroup are moving to other cgroup. - */ - atomic_t moving_account; - struct task_struct *move_lock_task; - struct memcg_vmstats_percpu __percpu *vmstats_percpu; #ifdef CONFIG_CGROUP_WRITEBACK @@ -300,10 +255,6 @@ struct mem_cgroup { struct memcg_cgwb_frn cgwb_frn[MEMCG_CGWB_FRN_CNT]; #endif - /* List of events which userspace want to receive */ - struct list_head event_list; - spinlock_t event_list_lock; - #ifdef CONFIG_TRANSPARENT_HUGEPAGE struct deferred_split deferred_split_queue; #endif @@ -313,6 +264,58 @@ struct mem_cgroup { struct lru_gen_mm_list mm_list; #endif +#ifdef CONFIG_MEMCG_V1 + /* Legacy consumer-oriented counters */ + struct page_counter kmem; /* v1 only */ + struct page_counter tcpmem; /* v1 only */ + + unsigned long soft_limit; + + /* protected by memcg_oom_lock */ + bool oom_lock; + int under_oom; + + /* OOM-Killer disable */ + int oom_kill_disable; + + /* protect arrays of thresholds */ + struct mutex thresholds_lock; + + /* thresholds for memory usage. RCU-protected */ + struct mem_cgroup_thresholds thresholds; + + /* thresholds for mem+swap usage. RCU-protected */ + struct mem_cgroup_thresholds memsw_thresholds; + + /* For oom notifier event fd */ + struct list_head oom_notify; + + /* + * Should we move charges of a task when a task is moved into this + * mem_cgroup ? And what type of charges should we move ? + */ + unsigned long move_charge_at_immigrate; + /* taken only while moving_account > 0 */ + spinlock_t move_lock; + unsigned long move_lock_flags; + + /* Legacy tcp memory accounting */ + bool tcpmem_active; + int tcpmem_pressure; + + CACHELINE_PADDING(_pad2_); + + /* + * set > 0 if pages under this cgroup are moving to other cgroup. + */ + atomic_t moving_account; + struct task_struct *move_lock_task; + + /* List of events which userspace want to receive */ + struct list_head event_list; + spinlock_t event_list_lock; +#endif /* CONFIG_MEMCG_V1 */ + struct mem_cgroup_per_node *nodeinfo[]; }; -- cgit v1.2.3 From 98c9daf5ae6be008f78c07b744bcff7bcc6e98da Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 28 Jun 2024 21:03:15 +0000 Subject: mm: memcg: guard memcg1-specific members of struct mem_cgroup_per_node Put memcg1-specific members of struct mem_cgroup_per_node under the CONFIG_MEMCG_V1 config option. Link: https://lkml.kernel.org/r/20240628210317.272856-8-roman.gushchin@linux.dev Signed-off-by: Roman Gushchin Acked-by: Shakeel Butt Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 107b0c5d6eab..c7ef628ee882 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -92,6 +92,7 @@ struct mem_cgroup_per_node { struct lruvec_stats *lruvec_stats; struct shrinker_info __rcu *shrinker_info; +#ifdef CONFIG_MEMCG_V1 /* * Memcg-v1 only stuff in middle as buffer between read mostly fields * and update often fields to avoid false sharing. Once v1 stuff is @@ -102,6 +103,7 @@ struct mem_cgroup_per_node { unsigned long usage_in_excess;/* Set to the value by which */ /* the soft limit is exceeded*/ bool on_tree; +#endif /* Fields which get updated often at the end. */ struct lruvec lruvec; -- cgit v1.2.3 From 1c3a0b3d0b11fb98c40360f849563185218c2dd4 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 28 Jun 2024 21:03:16 +0000 Subject: mm: memcg: put struct task_struct::memcg_in_oom under CONFIG_MEMCG_V1 The memcg_in_oom field of the struct task_struct is not used by the cgroup v2's memory controller, so it can be happily compiled out if CONFIG_MEMCG_V1 is not set. Link: https://lkml.kernel.org/r/20240628210317.272856-9-roman.gushchin@linux.dev Signed-off-by: Roman Gushchin Acked-by: Shakeel Butt Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/sched.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 61591ac6eab6..2a16023e8620 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1447,9 +1447,11 @@ struct task_struct { unsigned int kcov_softirq; #endif -#ifdef CONFIG_MEMCG +#ifdef CONFIG_MEMCG_V1 struct mem_cgroup *memcg_in_oom; +#endif +#ifdef CONFIG_MEMCG /* Number of pages to reclaim on returning to userland: */ unsigned int memcg_nr_pages_over_high; -- cgit v1.2.3 From 1419ff984aad4c08d121793f71a520b432ead88a Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 28 Jun 2024 21:03:17 +0000 Subject: mm: memcg: put struct task_struct::in_user_fault under CONFIG_MEMCG_V1 The struct task_struct's in_user_fault member is not used by the cgroup v2's memory controller, so it can be put under the CONFIG_MEMCG_V1 config option. To do so, mem_cgroup_enter_user_fault() and mem_cgroup_exit_user_fault() are moved under the CONFIG_MEMCG_V1 option as well. Link: https://lkml.kernel.org/r/20240628210317.272856-10-roman.gushchin@linux.dev Signed-off-by: Roman Gushchin Acked-by: Shakeel Butt Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 40 ++++++++++++++++++++-------------------- include/linux/sched.h | 2 +- 2 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index c7ef628ee882..d0c9365ff039 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -943,18 +943,6 @@ void mem_cgroup_print_oom_context(struct mem_cgroup *memcg, void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg); -static inline void mem_cgroup_enter_user_fault(void) -{ - WARN_ON(current->in_user_fault); - current->in_user_fault = 1; -} - -static inline void mem_cgroup_exit_user_fault(void) -{ - WARN_ON(!current->in_user_fault); - current->in_user_fault = 0; -} - struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim, struct mem_cgroup *oom_domain); void mem_cgroup_print_oom_group(struct mem_cgroup *memcg); @@ -1402,14 +1390,6 @@ static inline void mem_cgroup_handle_over_high(gfp_t gfp_mask) { } -static inline void mem_cgroup_enter_user_fault(void) -{ -} - -static inline void mem_cgroup_exit_user_fault(void) -{ -} - static inline struct mem_cgroup *mem_cgroup_get_oom_group( struct task_struct *victim, struct mem_cgroup *oom_domain) { @@ -1890,6 +1870,18 @@ static inline void mem_cgroup_unlock_pages(void) rcu_read_unlock(); } +static inline void mem_cgroup_enter_user_fault(void) +{ + WARN_ON(current->in_user_fault); + current->in_user_fault = 1; +} + +static inline void mem_cgroup_exit_user_fault(void) +{ + WARN_ON(!current->in_user_fault); + current->in_user_fault = 0; +} + #else /* CONFIG_MEMCG_V1 */ static inline unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order, @@ -1929,6 +1921,14 @@ static inline bool mem_cgroup_oom_synchronize(bool wait) return false; } +static inline void mem_cgroup_enter_user_fault(void) +{ +} + +static inline void mem_cgroup_exit_user_fault(void) +{ +} + #endif /* CONFIG_MEMCG_V1 */ #endif /* _LINUX_MEMCONTROL_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 2a16023e8620..a7770c566c4d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -934,7 +934,7 @@ struct task_struct { #ifndef TIF_RESTORE_SIGMASK unsigned restore_sigmask:1; #endif -#ifdef CONFIG_MEMCG +#ifdef CONFIG_MEMCG_V1 unsigned in_user_fault:1; #endif #ifdef CONFIG_LRU_GEN -- cgit v1.2.3 From 50183ac2cfb54e027dd36fb22ea1bd1e91e3a08b Mon Sep 17 00:00:00 2001 From: Ram Prakash Gupta Date: Thu, 27 Jun 2024 14:07:55 +0530 Subject: scsi: ufs: core: Suspend clk scaling on no request Currently UFS clk scaling is getting suspended only when the clks are scaled down. When high load is generated, a huge amount of latency is added due to scaling up the clk and completing the request post that. Suspending the scaling in its existing state when high load is generated improves the random performance KPI by 28%. So suspending the scaling when there are no requests. And the clk would be put in low scaled state when the actual request load is low. Make this change optional by having the check enabled using vops since for some devices suspending without bringing the clk in low scaled state might have impact on power consumption of the SoC. Signed-off-by: Ram Prakash Gupta Link: https://lore.kernel.org/r/20240627083756.25340-2-quic_rampraka@quicinc.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 9e0581115b34..049520bcc6c0 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -459,6 +459,7 @@ struct ufs_clk_scaling { bool is_initialized; bool is_busy_started; bool is_suspended; + bool suspend_on_no_request; }; #define UFS_EVENT_HIST_LENGTH 8 -- cgit v1.2.3 From 81e7706345f06e1e97a092f59697b7e20a0ee868 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 4 Jul 2024 14:28:14 +0900 Subject: dm: handle REQ_OP_ZONE_RESET_ALL This commit implements processing of the REQ_OP_ZONE_RESET_ALL operation for zoned mapped devices. Given that this operation always has a BIO sector of 0 and a 0 size, processing through the regular BIO __split_and_process_bio() function does not work because this function would always select the first target. Instead, handling of this operation is implemented using the function __send_zone_reset_all(). Similarly to the __send_empty_flush() function, the new __send_zone_reset_all() function manually goes through all targets of a mapped device table doing the following: 1) If the target can natively support REQ_OP_ZONE_RESET_ALL, __send_duplicate_bios() is used to forward the reset all operation to the target. This case is handled with the __send_zone_reset_all_native() function. 2) For other targets, the function __send_zone_reset_all_emulated() is executed to emulate the execution of REQ_OP_ZONE_RESET_ALL using regular REQ_OP_ZONE_RESET operations. Targets that can natively support REQ_OP_ZONE_RESET_ALL are identified using the new target field zone_reset_all_supported. This boolean is set to true in for targets that have reliable zone limits, that is, targets that map all sequential write required zones of their zoned device(s). Setting this field is handled in dm_set_zones_restrictions() and device_get_zone_resource_limits(). For targets with unreliable zone limits, REQ_OP_ZONE_RESET_ALL must be emulated (case 2 above). This is implemented with __send_zone_reset_all_emulated() and is similar to the block layer function blkdev_zone_reset_all_emulated(): first a report zones is done for the zones of the target to identify zones that need reset, that is, any sequential write required zone that is not already empty. This is done using a bitmap and the function dm_zone_get_reset_bitmap() which sets to 1 the bit corresponding to a zone that needs reset. Next, this zone bitmap is inspected and a clone BIO modified to use the REQ_OP_ZONE_RESET operation issued for any zone with its bit set in the zone bitmap. This implementation is more efficient than what the block layer does with blkdev_zone_reset_all_emulated(), which is always used for DM zoned devices currently: as we can natively use REQ_OP_ZONE_RESET_ALL on targets mapping all sequential write required zones, resetting all zones of a zoned mapped device can be much faster compared to always emulating this operation using regular per-zone reset. In the worst case, this implementation is as-efficient as the block layer emulation. This reduction in the time it takes to reset all zones of a zoned mapped device depends directly on the mapped device targets mapping (reliable zone limits or not). Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240704052816.623865-4-dlemoal@kernel.org Signed-off-by: Jens Axboe --- include/linux/device-mapper.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 82b2195efaca..15d28164bbbd 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -357,6 +357,13 @@ struct dm_target { */ bool discards_supported:1; + /* + * Automatically set by dm-core if this target supports + * REQ_OP_ZONE_RESET_ALL. Otherwise, this operation will be emulated + * using REQ_OP_ZONE_RESET. Target drivers must not set this manually. + */ + bool zone_reset_all_supported:1; + /* * Set if this target requires that discards be split on * 'max_discard_sectors' boundaries. -- cgit v1.2.3 From f2a7bea23710fceb99dac6da4ef82c3cc8932f7f Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 4 Jul 2024 14:28:15 +0900 Subject: block: Remove REQ_OP_ZONE_RESET_ALL emulation Now that device mapper can handle resetting all zones of a mapped zoned device using REQ_OP_ZONE_RESET_ALL, all zoned block device drivers support this operation. With this, the request queue feature BLK_FEAT_ZONE_RESETALL is not necessary and the emulation code in blk-zone.c can be removed. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240704052816.623865-5-dlemoal@kernel.org Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4d0d4b83bc74..dc250d8070d2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -318,9 +318,6 @@ typedef unsigned int __bitwise blk_features_t; /* is a zoned device */ #define BLK_FEAT_ZONED ((__force blk_features_t)(1u << 10)) -/* supports Zone Reset All */ -#define BLK_FEAT_ZONE_RESETALL ((__force blk_features_t)(1u << 11)) - /* supports PCI(e) p2p requests */ #define BLK_FEAT_PCI_P2PDMA ((__force blk_features_t)(1u << 12)) @@ -618,8 +615,6 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) (!((q)->limits.features & BLK_FEAT_ROTATIONAL)) #define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT) -#define blk_queue_zone_resetall(q) \ - ((q)->limits.features & BLK_FEAT_ZONE_RESETALL) #define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX) #define blk_queue_pci_p2pdma(q) ((q)->limits.features & BLK_FEAT_PCI_P2PDMA) #ifdef CONFIG_BLK_RQ_ALLOC_TIME -- cgit v1.2.3 From bf86bcdb40123ee99669ee91b67e023669433a1a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 Jul 2024 18:51:20 +0200 Subject: blk-lib: check for kill signal in ioctl BLKZEROOUT Zeroout can access a significant capacity and take longer than the user expected. A user may change their mind about wanting to run that command and attempt to kill the process and do something else with their device. But since the task is uninterruptable, they have to wait for it to finish, which could be many hours. Add a new BLKDEV_ZERO_KILLABLE flag for blkdev_issue_zeroout that checks for a fatal signal at each iteration so the user doesn't have to wait for their regretted operation to complete naturally. Heavily based on an earlier patch from Keith Busch. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240701165219.1571322-11-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index dc250d8070d2..02e04df27282 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1095,6 +1095,7 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector, #define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */ #define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */ +#define BLKDEV_ZERO_KILLABLE (1 << 2) /* interruptible by fatal signals */ extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, -- cgit v1.2.3 From f37bee9508885ce5edd4875c744e1af28cfac76c Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Mon, 1 Jul 2024 21:28:32 +0300 Subject: net: pcs: xpcs: Move native device ID macro to linux/pcs/pcs-xpcs.h One of the next commits will alter the DW XPCS driver to support setting a custom device ID for the particular MDIO-device detected on the platform. The generic DW XPCS ID can be used as a custom ID as well in case if the DW XPCS-device was erroneously synthesized with no or some undefined ID. In addition to that having all supported DW XPCS device IDs defined in a single place will improve the code maintainability and readability. Note while at it rename the macros to being shorter and looking alike to the already defined NXP XPCS ID macro. Signed-off-by: Serge Semin Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/pcs/pcs-xpcs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index da3a6c30f6d2..8dfe90295f12 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -12,6 +12,8 @@ #define NXP_SJA1105_XPCS_ID 0x00000010 #define NXP_SJA1110_XPCS_ID 0x00000020 +#define DW_XPCS_ID 0x7996ced0 +#define DW_XPCS_ID_MASK 0xffffffff /* AN mode */ #define DW_AN_C73 1 -- cgit v1.2.3 From 71b200b388ef2ff1b547114f17dc1fd088bfc2c6 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Mon, 1 Jul 2024 21:28:34 +0300 Subject: net: pcs: xpcs: Convert xpcs_id to dw_xpcs_desc A structure with the PCS/PMA MMD IDs data is being introduced in one of the next commits. In order to prevent the names ambiguity let's convert the xpcs_id structure name to dw_xpcs_desc. The later version is more suitable since the structure content is indeed the device descriptor containing the data and callbacks required for the driver to correctly set the device up. Signed-off-by: Serge Semin Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/pcs/pcs-xpcs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index 8dfe90295f12..e706bd16b986 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -28,11 +28,11 @@ /* dev_flag */ #define DW_DEV_TXGBE BIT(0) -struct xpcs_id; +struct dw_xpcs_desc; struct dw_xpcs { + const struct dw_xpcs_desc *desc; struct mdio_device *mdiodev; - const struct xpcs_id *id; struct phylink_pcs pcs; phy_interface_t interface; int dev_flag; -- cgit v1.2.3 From bcac735cf653ef8dc6d7c08ed311e0a77f2665f0 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Mon, 1 Jul 2024 21:28:36 +0300 Subject: net: pcs: xpcs: Introduce DW XPCS info structure The being introduced structure will preserve the PCS and PMA IDs retrieved from the respective DW XPCS MMDs or potentially pre-defined by the client drivers. (The later change will be introduced later in the framework of the commit adding the memory-mapped DW XPCS devices support.) The structure fields are filled in in the xpcs_get_id() function, which used to be responsible for the PCS Device ID getting only. Besides of the PCS ID the method now fetches the PMA/PMD IDs too from MMD 1, which used to be done in xpcs_dev_flag(). The retrieved PMA ID will be from now utilized for the PMA-specific tweaks like it was introduced for the Wangxun TxGBE PCS in the commit f629acc6f210 ("net: pcs: xpcs: support to switch mode for Wangxun NICs"). Note 1. The xpcs_get_id() error-handling semantics has been changed. From now the error number will be returned from the function. There is no point in the next IOs or saving 0xffs and then looping over the actual device IDs if device couldn't be reached. -ENODEV will be returned if the very first IO operation failed thus indicating that no device could be found. Note 2. The PCS and PMA IDs macros have been converted to enum'es. The enum'es will be populated later in another commit with the virtual IDs identifying the DW XPCS devices which have some platform-specifics, but have been synthesized with the default PCS/PMA ID. Signed-off-by: Serge Semin Signed-off-by: David S. Miller --- include/linux/pcs/pcs-xpcs.h | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index e706bd16b986..1dc60f5e653f 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -9,11 +9,7 @@ #include #include - -#define NXP_SJA1105_XPCS_ID 0x00000010 -#define NXP_SJA1110_XPCS_ID 0x00000020 -#define DW_XPCS_ID 0x7996ced0 -#define DW_XPCS_ID_MASK 0xffffffff +#include /* AN mode */ #define DW_AN_C73 1 @@ -22,20 +18,30 @@ #define DW_AN_C37_1000BASEX 4 #define DW_10GBASER 5 -/* device vendor OUI */ -#define DW_OUI_WX 0x0018fc80 +struct dw_xpcs_desc; -/* dev_flag */ -#define DW_DEV_TXGBE BIT(0) +enum dw_xpcs_pcs_id { + NXP_SJA1105_XPCS_ID = 0x00000010, + NXP_SJA1110_XPCS_ID = 0x00000020, + DW_XPCS_ID = 0x7996ced0, + DW_XPCS_ID_MASK = 0xffffffff, +}; -struct dw_xpcs_desc; +enum dw_xpcs_pma_id { + WX_TXGBE_XPCS_PMA_10G_ID = 0x0018fc80, +}; + +struct dw_xpcs_info { + u32 pcs; + u32 pma; +}; struct dw_xpcs { + struct dw_xpcs_info info; const struct dw_xpcs_desc *desc; struct mdio_device *mdiodev; struct phylink_pcs pcs; phy_interface_t interface; - int dev_flag; }; int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface); -- cgit v1.2.3 From f6bb3e9d98c2e8d70587d5ddaf9426ef30d7865c Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Mon, 1 Jul 2024 21:28:38 +0300 Subject: net: pcs: xpcs: Add Synopsys DW xPCS platform device driver Synopsys DesignWare XPCS IP-core can be synthesized with the device CSRs being accessible over the MCI or APB3 interface instead of the MDIO bus (see the CSR_INTERFACE HDL parameter). Thus all the PCS registers can be just memory mapped and be a subject of the standard MMIO operations of course taking into account the peculiarities of the Clause C45 CSRs mapping. From that perspective the DW XPCS devices would look as just normal platform devices for the kernel. On the other hand in order to have the DW XPCS devices handled by the pcs-xpcs.c driver they need to be registered in the framework of the MDIO-subsystem. So the suggested change is about providing a DW XPCS platform device driver registering a virtual MDIO-bus with a single MDIO-device representing the DW XPCS device. DW XPCS platform device is supposed to be described by the respective compatible string "snps,dw-xpcs" (or with the PMA-specific compatible string), CSRs memory space and optional peripheral bus and reference clock sources. Depending on the INDIRECT_ACCESS IP-core synthesize parameter the memory-mapped reg-space can be represented as either directly or indirectly mapped Clause 45 space. In the former case the particular address is determined based on the MMD device and the registers offset (5 + 16 bits all together) within the device reg-space. In the later case there is only 8 lower address bits are utilized for the registers mapping (255 CSRs). The upper bits are supposed to be written into the respective viewport CSR in order to select the respective MMD sub-page. Note, only the peripheral bus clock source is requested in the platform device probe procedure. The core and pad clocks handling has been implemented in the framework of the xpcs_create() method intentionally since the clocks-related setups are supposed to be performed later, during the DW XPCS main configuration procedures. (For instance they will be required for the DW Gen5 10G PMA configuration.) Signed-off-by: Serge Semin Signed-off-by: David S. Miller --- include/linux/pcs/pcs-xpcs.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index 1dc60f5e653f..813be644647f 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -7,6 +7,8 @@ #ifndef __LINUX_PCS_XPCS_H #define __LINUX_PCS_XPCS_H +#include +#include #include #include #include @@ -21,6 +23,7 @@ struct dw_xpcs_desc; enum dw_xpcs_pcs_id { + DW_XPCS_ID_NATIVE = 0, NXP_SJA1105_XPCS_ID = 0x00000010, NXP_SJA1110_XPCS_ID = 0x00000020, DW_XPCS_ID = 0x7996ced0, @@ -28,6 +31,14 @@ enum dw_xpcs_pcs_id { }; enum dw_xpcs_pma_id { + DW_XPCS_PMA_ID_NATIVE = 0, + DW_XPCS_PMA_GEN1_3G_ID, + DW_XPCS_PMA_GEN2_3G_ID, + DW_XPCS_PMA_GEN2_6G_ID, + DW_XPCS_PMA_GEN4_3G_ID, + DW_XPCS_PMA_GEN4_6G_ID, + DW_XPCS_PMA_GEN5_10G_ID, + DW_XPCS_PMA_GEN5_12G_ID, WX_TXGBE_XPCS_PMA_10G_ID = 0x0018fc80, }; @@ -36,10 +47,17 @@ struct dw_xpcs_info { u32 pma; }; +enum dw_xpcs_clock { + DW_XPCS_CORE_CLK, + DW_XPCS_PAD_CLK, + DW_XPCS_NUM_CLKS, +}; + struct dw_xpcs { struct dw_xpcs_info info; const struct dw_xpcs_desc *desc; struct mdio_device *mdiodev; + struct clk_bulk_data clks[DW_XPCS_NUM_CLKS]; struct phylink_pcs pcs; phy_interface_t interface; }; -- cgit v1.2.3 From 9cad7275463ab9e52aeae8d2ecb169afee6876f3 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Mon, 1 Jul 2024 21:28:39 +0300 Subject: net: pcs: xpcs: Add fwnode-based descriptor creation method It's now possible to have the DW XPCS device defined as a standard platform device for instance in the platform DT-file. Although that functionality is useless unless there is a way to have the device found by the client drivers (STMMAC/DW *MAC, NXP SJA1105 Eth Switch, etc). Provide such ability by means of the xpcs_create_fwnode() method. It needs to be called with the device DW XPCS fwnode instance passed. That node will be then used to find the MDIO-device instance in order to create the DW XPCS descriptor. Note the method semantics and name is similar to what has been recently introduced in the Lynx PCS driver. Signed-off-by: Serge Semin Signed-off-by: David S. Miller --- include/linux/pcs/pcs-xpcs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index 813be644647f..b4a4eb6c8866 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -8,6 +8,7 @@ #define __LINUX_PCS_XPCS_H #include +#include #include #include #include @@ -72,6 +73,8 @@ int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable); struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr, phy_interface_t interface); +struct dw_xpcs *xpcs_create_fwnode(struct fwnode_handle *fwnode, + phy_interface_t interface); void xpcs_destroy(struct dw_xpcs *xpcs); #endif /* __LINUX_PCS_XPCS_H */ -- cgit v1.2.3 From 351066bad6ade5ad0f5f90fde2dc759759959969 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Mon, 1 Jul 2024 21:28:40 +0300 Subject: net: stmmac: Create DW XPCS device with particular address Currently the only STMMAC platform driver using the DW XPCS code is the Intel mGBE device driver. (It can be determined by finding all the drivers having the stmmac_mdio_bus_data::has_xpcs flag set.) At the same time the low-level platform driver masks out the DW XPCS MDIO-address from being auto-detected as PHY by the MDIO subsystem core. Seeing the PCS MDIO ID is known the procedure of the DW XPCS device creation can be simplified by dropping the loop over all the MDIO IDs. From now the DW XPCS device descriptor will be created for the MDIO-bus address pre-defined by the platform drivers via the stmmac_mdio_bus_data::pcs_mask field. Note besides this shall speed up a bit the Intel mGBE probing. Signed-off-by: Serge Semin Signed-off-by: David S. Miller --- include/linux/stmmac.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 9c54f82901a1..84e13bd5df28 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -82,7 +82,7 @@ struct stmmac_priv; struct stmmac_mdio_bus_data { unsigned int phy_mask; - unsigned int has_xpcs; + unsigned int pcs_mask; unsigned int default_an_inband; int *irqs; int probed_phy_irq; -- cgit v1.2.3 From 87b6426ab92efd91a17aed1f5b1d94f36938e28f Mon Sep 17 00:00:00 2001 From: Pascal Paillet Date: Fri, 28 Jun 2024 10:58:11 +0200 Subject: regulator: Add STM32MP25 regulator bindings These bindings will be used for the SCMI voltage domain. Signed-off-by: Pascal Paillet Acked-by: Krzysztof Kozlowski Acked-by: Mark Brown Signed-off-by: Alexandre Torgue --- .../dt-bindings/regulator/st,stm32mp25-regulator.h | 48 ++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 include/dt-bindings/regulator/st,stm32mp25-regulator.h (limited to 'include') diff --git a/include/dt-bindings/regulator/st,stm32mp25-regulator.h b/include/dt-bindings/regulator/st,stm32mp25-regulator.h new file mode 100644 index 000000000000..3c3d30911dd0 --- /dev/null +++ b/include/dt-bindings/regulator/st,stm32mp25-regulator.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (C) 2024, STMicroelectronics - All Rights Reserved + */ + +#ifndef __DT_BINDINGS_REGULATOR_ST_STM32MP25_REGULATOR_H +#define __DT_BINDINGS_REGULATOR_ST_STM32MP25_REGULATOR_H + +/* SCMI voltage domains identifiers */ + +/* SOC Internal regulators */ +#define VOLTD_SCMI_VDDIO1 0 +#define VOLTD_SCMI_VDDIO2 1 +#define VOLTD_SCMI_VDDIO3 2 +#define VOLTD_SCMI_VDDIO4 3 +#define VOLTD_SCMI_VDDIO 4 +#define VOLTD_SCMI_UCPD 5 +#define VOLTD_SCMI_USB33 6 +#define VOLTD_SCMI_ADC 7 +#define VOLTD_SCMI_GPU 8 +#define VOLTD_SCMI_VREFBUF 9 + +/* STPMIC2 regulators */ +#define VOLTD_SCMI_STPMIC2_BUCK1 10 +#define VOLTD_SCMI_STPMIC2_BUCK2 11 +#define VOLTD_SCMI_STPMIC2_BUCK3 12 +#define VOLTD_SCMI_STPMIC2_BUCK4 13 +#define VOLTD_SCMI_STPMIC2_BUCK5 14 +#define VOLTD_SCMI_STPMIC2_BUCK6 15 +#define VOLTD_SCMI_STPMIC2_BUCK7 16 +#define VOLTD_SCMI_STPMIC2_LDO1 17 +#define VOLTD_SCMI_STPMIC2_LDO2 18 +#define VOLTD_SCMI_STPMIC2_LDO3 19 +#define VOLTD_SCMI_STPMIC2_LDO4 20 +#define VOLTD_SCMI_STPMIC2_LDO5 21 +#define VOLTD_SCMI_STPMIC2_LDO6 22 +#define VOLTD_SCMI_STPMIC2_LDO7 23 +#define VOLTD_SCMI_STPMIC2_LDO8 24 +#define VOLTD_SCMI_STPMIC2_REFDDR 25 + +/* External regulators */ +#define VOLTD_SCMI_REGU0 26 +#define VOLTD_SCMI_REGU1 27 +#define VOLTD_SCMI_REGU2 28 +#define VOLTD_SCMI_REGU3 29 +#define VOLTD_SCMI_REGU4 30 + +#endif /*__DT_BINDINGS_REGULATOR_ST_STM32MP25_REGULATOR_H */ -- cgit v1.2.3 From 093b0f366567aa3fed85c316f832607069202b23 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Thu, 4 Jul 2024 10:56:52 +0200 Subject: net: psample: add user cookie Add a user cookie to the sample metadata so that sample emitters can provide more contextual information to samples. If present, send the user cookie in a new attribute: PSAMPLE_ATTR_USER_COOKIE. Reviewed-by: Michal Kubiak Acked-by: Eelco Chaudron Reviewed-by: Simon Horman Reviewed-by: Ido Schimmel Signed-off-by: Adrian Moreno Link: https://patch.msgid.link/20240704085710.353845-2-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- include/net/psample.h | 2 ++ include/uapi/linux/psample.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/psample.h b/include/net/psample.h index 0509d2d6be67..2ac71260a546 100644 --- a/include/net/psample.h +++ b/include/net/psample.h @@ -25,6 +25,8 @@ struct psample_metadata { out_tc_occ_valid:1, latency_valid:1, unused:5; + const u8 *user_cookie; + u32 user_cookie_len; }; struct psample_group *psample_group_get(struct net *net, u32 group_num); diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h index e585db5bf2d2..e80637e1d97b 100644 --- a/include/uapi/linux/psample.h +++ b/include/uapi/linux/psample.h @@ -19,6 +19,7 @@ enum { PSAMPLE_ATTR_LATENCY, /* u64, nanoseconds */ PSAMPLE_ATTR_TIMESTAMP, /* u64, nanoseconds */ PSAMPLE_ATTR_PROTO, /* u16 */ + PSAMPLE_ATTR_USER_COOKIE, /* binary, user provided data */ __PSAMPLE_ATTR_MAX }; -- cgit v1.2.3 From 7b1b2b60c63f070e0dfbe072ccaae13168b38d01 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Thu, 4 Jul 2024 10:56:55 +0200 Subject: net: psample: allow using rate as probability Although not explicitly documented in the psample module itself, the definition of PSAMPLE_ATTR_SAMPLE_RATE seems inherited from act_sample. Quoting tc-sample(8): "RATE of 100 will lead to an average of one sampled packet out of every 100 observed." With this semantics, the rates that we can express with an unsigned 32-bits number are very unevenly distributed and concentrated towards "sampling few packets". For example, we can express a probability of 2.32E-8% but we cannot express anything between 100% and 50%. For sampling applications that are capable of sampling a decent amount of packets, this sampling rate semantics is not very useful. Add a new flag to the uAPI that indicates that the sampling rate is expressed in scaled probability, this is: - 0 is 0% probability, no packets get sampled. - U32_MAX is 100% probability, all packets get sampled. Reviewed-by: Aaron Conole Acked-by: Eelco Chaudron Reviewed-by: Ido Schimmel Signed-off-by: Adrian Moreno Link: https://patch.msgid.link/20240704085710.353845-5-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- include/net/psample.h | 3 ++- include/uapi/linux/psample.h | 10 +++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/psample.h b/include/net/psample.h index 2ac71260a546..c52e9ebd88dd 100644 --- a/include/net/psample.h +++ b/include/net/psample.h @@ -24,7 +24,8 @@ struct psample_metadata { u8 out_tc_valid:1, out_tc_occ_valid:1, latency_valid:1, - unused:5; + rate_as_probability:1, + unused:4; const u8 *user_cookie; u32 user_cookie_len; }; diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h index e80637e1d97b..b765f0e81f20 100644 --- a/include/uapi/linux/psample.h +++ b/include/uapi/linux/psample.h @@ -8,7 +8,11 @@ enum { PSAMPLE_ATTR_ORIGSIZE, PSAMPLE_ATTR_SAMPLE_GROUP, PSAMPLE_ATTR_GROUP_SEQ, - PSAMPLE_ATTR_SAMPLE_RATE, + PSAMPLE_ATTR_SAMPLE_RATE, /* u32, ratio between observed and + * sampled packets or scaled probability + * if PSAMPLE_ATTR_SAMPLE_PROBABILITY + * is set. + */ PSAMPLE_ATTR_DATA, PSAMPLE_ATTR_GROUP_REFCOUNT, PSAMPLE_ATTR_TUNNEL, @@ -20,6 +24,10 @@ enum { PSAMPLE_ATTR_TIMESTAMP, /* u64, nanoseconds */ PSAMPLE_ATTR_PROTO, /* u16 */ PSAMPLE_ATTR_USER_COOKIE, /* binary, user provided data */ + PSAMPLE_ATTR_SAMPLE_PROBABILITY,/* no argument, interpret rate in + * PSAMPLE_ATTR_SAMPLE_RATE as a + * probability scaled 0 - U32_MAX. + */ __PSAMPLE_ATTR_MAX }; -- cgit v1.2.3 From aae0b82b46cb5004bdf82a000c004d69a0885c33 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Thu, 4 Jul 2024 10:56:56 +0200 Subject: net: openvswitch: add psample action Add support for a new action: psample. This action accepts a u32 group id and a variable-length cookie and uses the psample multicast group to make the packet available for observability. The maximum length of the user-defined cookie is set to 16, same as tc_cookie, to discourage using cookies that will not be offloadable. Reviewed-by: Michal Kubiak Reviewed-by: Aaron Conole Reviewed-by: Ilya Maximets Acked-by: Eelco Chaudron Signed-off-by: Adrian Moreno Link: https://patch.msgid.link/20240704085710.353845-6-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/openvswitch.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index efc82c318fa2..3dd653748725 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -914,6 +914,31 @@ struct check_pkt_len_arg { }; #endif +#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16 +/** + * enum ovs_psample_attr - Attributes for %OVS_ACTION_ATTR_PSAMPLE + * action. + * + * @OVS_PSAMPLE_ATTR_GROUP: 32-bit number to identify the source of the + * sample. + * @OVS_PSAMPLE_ATTR_COOKIE: An optional variable-length binary cookie that + * contains user-defined metadata. The maximum length is + * OVS_PSAMPLE_COOKIE_MAX_SIZE bytes. + * + * Sends the packet to the psample multicast group with the specified group and + * cookie. It is possible to combine this action with the + * %OVS_ACTION_ATTR_TRUNC action to limit the size of the sample. + */ +enum ovs_psample_attr { + OVS_PSAMPLE_ATTR_GROUP = 1, /* u32 number. */ + OVS_PSAMPLE_ATTR_COOKIE, /* Optional, user specified cookie. */ + + /* private: */ + __OVS_PSAMPLE_ATTR_MAX +}; + +#define OVS_PSAMPLE_ATTR_MAX (__OVS_PSAMPLE_ATTR_MAX - 1) + /** * enum ovs_action_attr - Action types. * @@ -966,6 +991,8 @@ struct check_pkt_len_arg { * of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS * argument. * @OVS_ACTION_ATTR_DROP: Explicit drop action. + * @OVS_ACTION_ATTR_PSAMPLE: Send a sample of the packet to external observers + * via psample. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -1004,6 +1031,7 @@ enum ovs_action_attr { OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */ OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */ OVS_ACTION_ATTR_DROP, /* u32 error code. */ + OVS_ACTION_ATTR_PSAMPLE, /* Nested OVS_PSAMPLE_ATTR_*. */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ -- cgit v1.2.3 From 71763d8a8203c28178d7be7f18af73d4dddb36ba Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Thu, 4 Jul 2024 10:56:57 +0200 Subject: net: openvswitch: store sampling probability in cb. When a packet sample is observed, the sampling rate that was used is important to estimate the real frequency of such event. Store the probability of the parent sample action in the skb's cb area and use it in psample action to pass it down to psample module. Reviewed-by: Aaron Conole Acked-by: Eelco Chaudron Reviewed-by: Ilya Maximets Signed-off-by: Adrian Moreno Link: https://patch.msgid.link/20240704085710.353845-7-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/openvswitch.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 3dd653748725..3a701bd1f31b 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -649,7 +649,8 @@ enum ovs_flow_attr { * Actions are passed as nested attributes. * * Executes the specified actions with the given probability on a per-packet - * basis. + * basis. Nested actions will be able to access the probability value of the + * parent @OVS_ACTION_ATTR_SAMPLE. */ enum ovs_sample_attr { OVS_SAMPLE_ATTR_UNSPEC, -- cgit v1.2.3 From e46296002113b4556baffd5dc68c4f9e22dae13a Mon Sep 17 00:00:00 2001 From: "Kory Maincent (Dent Project)" Date: Thu, 4 Jul 2024 10:11:56 +0200 Subject: net: ethtool: pse-pd: Expand C33 PSE status with class, power and extended state This update expands the status information provided by ethtool for PSE c33. It includes details such as the detected class, current power delivered, and extended state information. Reviewed-by: Oleksij Rempel Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20240704-feature_poe_power_cap-v6-1-320003204264@bootlin.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 15 +++ include/linux/pse-pd/pse.h | 8 ++ include/uapi/linux/ethtool.h | 191 +++++++++++++++++++++++++++++++++++ include/uapi/linux/ethtool_netlink.h | 4 + 4 files changed, 218 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 3a99238ef895..3e70f5d9e0bb 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1273,4 +1273,19 @@ struct ethtool_forced_speed_map { void ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size); + +/* C33 PSE extended state and substate. */ +struct ethtool_c33_pse_ext_state_info { + enum ethtool_c33_pse_ext_state c33_pse_ext_state; + union { + enum ethtool_c33_pse_ext_substate_error_condition error_condition; + enum ethtool_c33_pse_ext_substate_mr_pse_enable mr_pse_enable; + enum ethtool_c33_pse_ext_substate_option_detect_ted option_detect_ted; + enum ethtool_c33_pse_ext_substate_option_vport_lim option_vport_lim; + enum ethtool_c33_pse_ext_substate_ovld_detected ovld_detected; + enum ethtool_c33_pse_ext_substate_power_not_available power_not_available; + enum ethtool_c33_pse_ext_substate_short_detected short_detected; + u32 __c33_pse_ext_substate; + }; +}; #endif /* _LINUX_ETHTOOL_H */ diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h index 6eec24ffa866..38b9308e5e7a 100644 --- a/include/linux/pse-pd/pse.h +++ b/include/linux/pse-pd/pse.h @@ -36,12 +36,20 @@ struct pse_control_config { * functions. IEEE 802.3-2022 30.9.1.1.2 aPSEAdminState * @c33_pw_status: power detection status of the PSE. * IEEE 802.3-2022 30.9.1.1.5 aPSEPowerDetectionStatus: + * @c33_pw_class: detected class of a powered PD + * IEEE 802.3-2022 30.9.1.1.8 aPSEPowerClassification + * @c33_actual_pw: power currently delivered by the PSE in mW + * IEEE 802.3-2022 30.9.1.1.23 aPSEActualPower + * @c33_ext_state_info: extended state information of the PSE */ struct pse_control_status { enum ethtool_podl_pse_admin_state podl_admin_state; enum ethtool_podl_pse_pw_d_status podl_pw_status; enum ethtool_c33_pse_admin_state c33_admin_state; enum ethtool_c33_pse_pw_d_status c33_pw_status; + u32 c33_pw_class; + u32 c33_actual_pw; + struct ethtool_c33_pse_ext_state_info c33_ext_state_info; }; /** diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index e011384c915c..230110b97029 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -752,6 +752,197 @@ enum ethtool_module_power_mode { ETHTOOL_MODULE_POWER_MODE_HIGH, }; +/** + * enum ethtool_c33_pse_ext_state - groups of PSE extended states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION: Group of error_condition states + * @ETHTOOL_C33_PSE_EXT_STATE_MR_MPS_VALID: Group of mr_mps_valid states + * @ETHTOOL_C33_PSE_EXT_STATE_MR_PSE_ENABLE: Group of mr_pse_enable states + * @ETHTOOL_C33_PSE_EXT_STATE_OPTION_DETECT_TED: Group of option_detect_ted + * states + * @ETHTOOL_C33_PSE_EXT_STATE_OPTION_VPORT_LIM: Group of option_vport_lim states + * @ETHTOOL_C33_PSE_EXT_STATE_OVLD_DETECTED: Group of ovld_detected states + * @ETHTOOL_C33_PSE_EXT_STATE_PD_DLL_POWER_TYPE: Group of pd_dll_power_type + * states + * @ETHTOOL_C33_PSE_EXT_STATE_POWER_NOT_AVAILABLE: Group of power_not_available + * states + * @ETHTOOL_C33_PSE_EXT_STATE_SHORT_DETECTED: Group of short_detected states + */ +enum ethtool_c33_pse_ext_state { + ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION = 1, + ETHTOOL_C33_PSE_EXT_STATE_MR_MPS_VALID, + ETHTOOL_C33_PSE_EXT_STATE_MR_PSE_ENABLE, + ETHTOOL_C33_PSE_EXT_STATE_OPTION_DETECT_TED, + ETHTOOL_C33_PSE_EXT_STATE_OPTION_VPORT_LIM, + ETHTOOL_C33_PSE_EXT_STATE_OVLD_DETECTED, + ETHTOOL_C33_PSE_EXT_STATE_PD_DLL_POWER_TYPE, + ETHTOOL_C33_PSE_EXT_STATE_POWER_NOT_AVAILABLE, + ETHTOOL_C33_PSE_EXT_STATE_SHORT_DETECTED, +}; + +/** + * enum ethtool_c33_pse_ext_substate_mr_mps_valid - mr_mps_valid states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_DETECTED_UNDERLOAD: Underload + * state + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_CONNECTION_OPEN: Port is not + * connected + * + * The PSE monitors either the DC or AC Maintain Power Signature + * (MPS, see 33.2.9.1). This variable indicates the presence or absence of + * a valid MPS. + */ +enum ethtool_c33_pse_ext_substate_mr_mps_valid { + ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_DETECTED_UNDERLOAD = 1, + ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_CONNECTION_OPEN, +}; + +/** + * enum ethtool_c33_pse_ext_substate_error_condition - error_condition states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_NON_EXISTING_PORT: Non-existing + * port number + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNDEFINED_PORT: Undefined port + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_INTERNAL_HW_FAULT: Internal + * hardware fault + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_COMM_ERROR_AFTER_FORCE_ON: + * Communication error after force on + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNKNOWN_PORT_STATUS: Unknown + * port status + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_TURN_OFF: Host + * crash turn off + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_FORCE_SHUTDOWN: + * Host crash force shutdown + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_CONFIG_CHANGE: Configuration + * change + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_DETECTED_OVER_TEMP: Over + * temperature detected + * + * error_condition is a variable indicating the status of + * implementation-specific fault conditions or optionally other system faults + * that prevent the PSE from meeting the specifications in Table 33–11 and that + * require the PSE not to source power. These error conditions are different + * from those monitored by the state diagrams in Figure 33–10. + */ +enum ethtool_c33_pse_ext_substate_error_condition { + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_NON_EXISTING_PORT = 1, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNDEFINED_PORT, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_INTERNAL_HW_FAULT, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_COMM_ERROR_AFTER_FORCE_ON, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNKNOWN_PORT_STATUS, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_TURN_OFF, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_FORCE_SHUTDOWN, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_CONFIG_CHANGE, + ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_DETECTED_OVER_TEMP, +}; + +/** + * enum ethtool_c33_pse_ext_substate_mr_pse_enable - mr_pse_enable states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_PSE_ENABLE_DISABLE_PIN_ACTIVE: Disable + * pin active + * + * mr_pse_enable is control variable that selects PSE operation and test + * functions. + */ +enum ethtool_c33_pse_ext_substate_mr_pse_enable { + ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_PSE_ENABLE_DISABLE_PIN_ACTIVE = 1, +}; + +/** + * enum ethtool_c33_pse_ext_substate_option_detect_ted - option_detect_ted + * states functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_DET_IN_PROCESS: Detection + * in process + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_CONNECTION_CHECK_ERROR: + * Connection check error + * + * option_detect_ted is a variable indicating if detection can be performed + * by the PSE during the ted_timer interval. + */ +enum ethtool_c33_pse_ext_substate_option_detect_ted { + ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_DET_IN_PROCESS = 1, + ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_CONNECTION_CHECK_ERROR, +}; + +/** + * enum ethtool_c33_pse_ext_substate_option_vport_lim - option_vport_lim states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_HIGH_VOLTAGE: Main supply + * voltage is high + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_LOW_VOLTAGE: Main supply + * voltage is low + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_VOLTAGE_INJECTION: Voltage + * injection into the port + * + * option_vport_lim is an optional variable indicates if VPSE is out of the + * operating range during normal operating state. + */ +enum ethtool_c33_pse_ext_substate_option_vport_lim { + ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_HIGH_VOLTAGE = 1, + ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_LOW_VOLTAGE, + ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_VOLTAGE_INJECTION, +}; + +/** + * enum ethtool_c33_pse_ext_substate_ovld_detected - ovld_detected states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_OVLD_DETECTED_OVERLOAD: Overload state + * + * ovld_detected is a variable indicating if the PSE output current has been + * in an overload condition (see 33.2.7.6) for at least TCUT of a one-second + * sliding time. + */ +enum ethtool_c33_pse_ext_substate_ovld_detected { + ETHTOOL_C33_PSE_EXT_SUBSTATE_OVLD_DETECTED_OVERLOAD = 1, +}; + +/** + * enum ethtool_c33_pse_ext_substate_power_not_available - power_not_available + * states functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_BUDGET_EXCEEDED: Power + * budget exceeded for the controller + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PORT_PW_LIMIT_EXCEEDS_CONTROLLER_BUDGET: + * Configured port power limit exceeded controller power budget + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PD_REQUEST_EXCEEDS_PORT_LIMIT: + * Power request from PD exceeds port limit + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_HW_PW_LIMIT: Power + * denied due to Hardware power limit + * + * power_not_available is a variable that is asserted in an + * implementation-dependent manner when the PSE is no longer capable of + * sourcing sufficient power to support the attached PD. Sufficient power + * is defined by classification; see 33.2.6. + */ +enum ethtool_c33_pse_ext_substate_power_not_available { + ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_BUDGET_EXCEEDED = 1, + ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PORT_PW_LIMIT_EXCEEDS_CONTROLLER_BUDGET, + ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PD_REQUEST_EXCEEDS_PORT_LIMIT, + ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_HW_PW_LIMIT, +}; + +/** + * enum ethtool_c33_pse_ext_substate_short_detected - short_detected states + * functions. IEEE 802.3-2022 33.2.4.4 Variables + * + * @ETHTOOL_C33_PSE_EXT_SUBSTATE_SHORT_DETECTED_SHORT_CONDITION: Short + * condition was detected + * + * short_detected is a variable indicating if the PSE output current has been + * in a short circuit condition for TLIM within a sliding window (see 33.2.7.7). + */ +enum ethtool_c33_pse_ext_substate_short_detected { + ETHTOOL_C33_PSE_EXT_SUBSTATE_SHORT_DETECTED_SHORT_CONDITION = 1, +}; + /** * enum ethtool_pse_types - Types of PSE controller. * @ETHTOOL_PSE_UNKNOWN: Type of PSE controller is unknown diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 840dabdc9d88..b8895da001bc 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -939,6 +939,10 @@ enum { ETHTOOL_A_C33_PSE_ADMIN_STATE, /* u32 */ ETHTOOL_A_C33_PSE_ADMIN_CONTROL, /* u32 */ ETHTOOL_A_C33_PSE_PW_D_STATUS, /* u32 */ + ETHTOOL_A_C33_PSE_PW_CLASS, /* u32 */ + ETHTOOL_A_C33_PSE_ACTUAL_PW, /* u32 */ + ETHTOOL_A_C33_PSE_EXT_STATE, /* u32 */ + ETHTOOL_A_C33_PSE_EXT_SUBSTATE, /* u32 */ /* add new constants above here */ __ETHTOOL_A_PSE_CNT, -- cgit v1.2.3 From 4a83abcef5f4f36be4e04ba18edd64226f6f4a19 Mon Sep 17 00:00:00 2001 From: "Kory Maincent (Dent Project)" Date: Thu, 4 Jul 2024 10:11:59 +0200 Subject: net: pse-pd: Add new power limit get and set c33 features This patch add a way to get and set the power limit of a PSE PI. For that it uses regulator API callbacks wrapper like get_voltage() and get/set_current_limit() as power is simply V * I. We used mW unit as defined by the IEEE 802.3-2022 standards. set_current_limit() uses the voltage return by get_voltage() and the desired power limit to calculate the current limit. get_voltage() callback is then mandatory to set the power limit. get_current_limit() callback is by default looking at a driver callback and fallback to extracting the current limit from _pse_ethtool_get_status() if the driver does not set its callback. We prefer let the user the choice because ethtool_get_status return much more information than the current limit. expand pse status with c33_pw_limit_ranges to return the ranges available to configure the power limit. Reviewed-by: Sai Krishna Acked-by: Oleksij Rempel Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20240704-feature_poe_power_cap-v6-4-320003204264@bootlin.com Signed-off-by: Jakub Kicinski --- include/linux/pse-pd/pse.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'include') diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h index 38b9308e5e7a..591a53e082e6 100644 --- a/include/linux/pse-pd/pse.h +++ b/include/linux/pse-pd/pse.h @@ -9,6 +9,9 @@ #include #include +/* Maximum current in uA according to IEEE 802.3-2022 Table 145-1 */ +#define MAX_PI_CURRENT 1920000 + struct phy_device; struct pse_controller_dev; @@ -41,6 +44,12 @@ struct pse_control_config { * @c33_actual_pw: power currently delivered by the PSE in mW * IEEE 802.3-2022 30.9.1.1.23 aPSEActualPower * @c33_ext_state_info: extended state information of the PSE + * @c33_avail_pw_limit: available power limit of the PSE in mW + * IEEE 802.3-2022 145.2.5.4 pse_avail_pwr + * @c33_pw_limit_ranges: supported power limit configuration range. The driver + * is in charge of the memory allocation. + * @c33_pw_limit_nb_ranges: number of supported power limit configuration + * ranges */ struct pse_control_status { enum ethtool_podl_pse_admin_state podl_admin_state; @@ -50,6 +59,9 @@ struct pse_control_status { u32 c33_pw_class; u32 c33_actual_pw; struct ethtool_c33_pse_ext_state_info c33_ext_state_info; + u32 c33_avail_pw_limit; + struct ethtool_c33_pse_pw_limit_range *c33_pw_limit_ranges; + u32 c33_pw_limit_nb_ranges; }; /** @@ -61,6 +73,14 @@ struct pse_control_status { * May also return negative errno. * @pi_enable: Configure the PSE PI as enabled. * @pi_disable: Configure the PSE PI as disabled. + * @pi_get_voltage: Return voltage similarly to get_voltage regulator + * callback. + * @pi_get_current_limit: Get the configured current limit similarly to + * get_current_limit regulator callback. + * @pi_set_current_limit: Configure the current limit similarly to + * set_current_limit regulator callback. + * Should not return an error in case of MAX_PI_CURRENT + * current value set. */ struct pse_controller_ops { int (*ethtool_get_status)(struct pse_controller_dev *pcdev, @@ -70,6 +90,11 @@ struct pse_controller_ops { int (*pi_is_enabled)(struct pse_controller_dev *pcdev, int id); int (*pi_enable)(struct pse_controller_dev *pcdev, int id); int (*pi_disable)(struct pse_controller_dev *pcdev, int id); + int (*pi_get_voltage)(struct pse_controller_dev *pcdev, int id); + int (*pi_get_current_limit)(struct pse_controller_dev *pcdev, + int id); + int (*pi_set_current_limit)(struct pse_controller_dev *pcdev, + int id, int max_uA); }; struct module; @@ -156,6 +181,11 @@ int pse_ethtool_get_status(struct pse_control *psec, int pse_ethtool_set_config(struct pse_control *psec, struct netlink_ext_ack *extack, const struct pse_control_config *config); +int pse_ethtool_set_pw_limit(struct pse_control *psec, + struct netlink_ext_ack *extack, + const unsigned int pw_limit); +int pse_ethtool_get_pw_limit(struct pse_control *psec, + struct netlink_ext_ack *extack); bool pse_has_podl(struct pse_control *psec); bool pse_has_c33(struct pse_control *psec); @@ -185,6 +215,19 @@ static inline int pse_ethtool_set_config(struct pse_control *psec, return -EOPNOTSUPP; } +static inline int pse_ethtool_set_pw_limit(struct pse_control *psec, + struct netlink_ext_ack *extack, + const unsigned int pw_limit) +{ + return -EOPNOTSUPP; +} + +static inline int pse_ethtool_get_pw_limit(struct pse_control *psec, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + static inline bool pse_has_podl(struct pse_control *psec) { return false; -- cgit v1.2.3 From 30d7b6727724ce3729f2cb5b8be985d2d1931d2b Mon Sep 17 00:00:00 2001 From: "Kory Maincent (Dent Project)" Date: Thu, 4 Jul 2024 10:12:00 +0200 Subject: net: ethtool: Add new power limit get and set features This patch expands the status information provided by ethtool for PSE c33 with available power limit and available power limit ranges. It also adds a call to pse_ethtool_set_pw_limit() to configure the PSE control power limit. Reviewed-by: Oleksij Rempel Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20240704-feature_poe_power_cap-v6-5-320003204264@bootlin.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 5 +++++ include/uapi/linux/ethtool_netlink.h | 8 ++++++++ 2 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 3e70f5d9e0bb..e213b5508da6 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1288,4 +1288,9 @@ struct ethtool_c33_pse_ext_state_info { u32 __c33_pse_ext_substate; }; }; + +struct ethtool_c33_pse_pw_limit_range { + u32 min; + u32 max; +}; #endif /* _LINUX_ETHTOOL_H */ diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index b8895da001bc..6d5bdcc67631 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -930,6 +930,12 @@ enum { }; /* Power Sourcing Equipment */ +enum { + ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC, + ETHTOOL_A_C33_PSE_PW_LIMIT_MIN, /* u32 */ + ETHTOOL_A_C33_PSE_PW_LIMIT_MAX, /* u32 */ +}; + enum { ETHTOOL_A_PSE_UNSPEC, ETHTOOL_A_PSE_HEADER, /* nest - _A_HEADER_* */ @@ -943,6 +949,8 @@ enum { ETHTOOL_A_C33_PSE_ACTUAL_PW, /* u32 */ ETHTOOL_A_C33_PSE_EXT_STATE, /* u32 */ ETHTOOL_A_C33_PSE_EXT_SUBSTATE, /* u32 */ + ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT, /* u32 */ + ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES, /* nest - _C33_PSE_PW_LIMIT_* */ /* add new constants above here */ __ETHTOOL_A_PSE_CNT, -- cgit v1.2.3 From fefbbdfb59d3a20fd98734363e6dd9fa7cc65c70 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 5 Jul 2024 18:03:42 +0200 Subject: ALSA: seq: Add tempo base unit for MIDI2 Set Tempo messages MIDI2 Set Tempo message defines the tempo in 10ns unit for finer accuracy, while MIDI1 was defined in 1us unit. For adapting this different unit, introduce "tempo_base" field to snd_seq_queue_tempo struct so that user-space can pass the proper tempo base unit. The accepted value is limited, it must be either 0, 10 or 1000. The protocol version is bumped to 1.0.4 along with this. The access with the older protocol version ignores the tempo-base value in ioctls and always treats as 1000. Reviewed-by: Jaroslav Kysela Link: https://patch.msgid.link/20240705160344.6481-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- include/uapi/sound/asequencer.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/sound/asequencer.h b/include/uapi/sound/asequencer.h index c85fdd8895d8..39b37edcf813 100644 --- a/include/uapi/sound/asequencer.h +++ b/include/uapi/sound/asequencer.h @@ -10,7 +10,7 @@ #include /** version of the sequencer */ -#define SNDRV_SEQ_VERSION SNDRV_PROTOCOL_VERSION(1, 0, 3) +#define SNDRV_SEQ_VERSION SNDRV_PROTOCOL_VERSION(1, 0, 4) /** * definition of sequencer event types @@ -523,11 +523,12 @@ struct snd_seq_queue_status { /* queue tempo */ struct snd_seq_queue_tempo { int queue; /* sequencer queue */ - unsigned int tempo; /* current tempo, us/tick */ + unsigned int tempo; /* current tempo, us/tick (or different time-base below) */ int ppq; /* time resolution, ticks/quarter */ unsigned int skew_value; /* queue skew */ unsigned int skew_base; /* queue skew base */ - char reserved[24]; /* for the future */ + unsigned short tempo_base; /* tempo base in nsec unit; either 10 or 1000 */ + char reserved[22]; /* for the future */ }; -- cgit v1.2.3 From 4bf1ea3fc914faef37d8c793b7144d4765ff4a00 Mon Sep 17 00:00:00 2001 From: Devin Bayer Date: Fri, 28 Jun 2024 10:46:03 +0200 Subject: platform/x86: asus-wmi: support the disable camera LED on F10 of Zenbook 2023 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a sysfs entry for the LED on F10 above the crossed out camera icon on 2023 Zenbooks. Signed-off-by: Devin Bayer Link: https://lore.kernel.org/r/20240628084603.217106-1-dev@doubly.so Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/linux/platform_data/x86/asus-wmi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h index 3eb5cd6773ad..0aeeae1c1943 100644 --- a/include/linux/platform_data/x86/asus-wmi.h +++ b/include/linux/platform_data/x86/asus-wmi.h @@ -51,6 +51,10 @@ #define ASUS_WMI_DEVID_LED6 0x00020016 #define ASUS_WMI_DEVID_MICMUTE_LED 0x00040017 +/* Disable Camera LED */ +#define ASUS_WMI_DEVID_CAMERA_LED_NEG 0x00060078 /* 0 = on (unused) */ +#define ASUS_WMI_DEVID_CAMERA_LED 0x00060079 /* 1 = on */ + /* Backlight and Brightness */ #define ASUS_WMI_DEVID_ALS_ENABLE 0x00050001 /* Ambient Light Sensor */ #define ASUS_WMI_DEVID_BACKLIGHT 0x00050011 -- cgit v1.2.3 From d1f1570f3d6db5d35642092a671812e62bfba79d Mon Sep 17 00:00:00 2001 From: Varadarajan Narayanan Date: Tue, 30 Apr 2024 12:12:10 +0530 Subject: dt-bindings: interconnect: Add Qualcomm IPQ9574 support Add interconnect-cells to clock provider so that it can be used as icc provider. Add master/slave ids for Qualcomm IPQ9574 Network-On-Chip interfaces. This will be used by the gcc-ipq9574 driver that will for providing interconnect services using the icc-clk framework. Acked-by: Georgi Djakov Reviewed-by: Krzysztof Kozlowski Signed-off-by: Varadarajan Narayanan Link: https://lore.kernel.org/r/20240430064214.2030013-3-quic_varada@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/interconnect/qcom,ipq9574.h | 59 +++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 include/dt-bindings/interconnect/qcom,ipq9574.h (limited to 'include') diff --git a/include/dt-bindings/interconnect/qcom,ipq9574.h b/include/dt-bindings/interconnect/qcom,ipq9574.h new file mode 100644 index 000000000000..42019335c7dd --- /dev/null +++ b/include/dt-bindings/interconnect/qcom,ipq9574.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +#ifndef INTERCONNECT_QCOM_IPQ9574_H +#define INTERCONNECT_QCOM_IPQ9574_H + +#define MASTER_ANOC_PCIE0 0 +#define SLAVE_ANOC_PCIE0 1 +#define MASTER_SNOC_PCIE0 2 +#define SLAVE_SNOC_PCIE0 3 +#define MASTER_ANOC_PCIE1 4 +#define SLAVE_ANOC_PCIE1 5 +#define MASTER_SNOC_PCIE1 6 +#define SLAVE_SNOC_PCIE1 7 +#define MASTER_ANOC_PCIE2 8 +#define SLAVE_ANOC_PCIE2 9 +#define MASTER_SNOC_PCIE2 10 +#define SLAVE_SNOC_PCIE2 11 +#define MASTER_ANOC_PCIE3 12 +#define SLAVE_ANOC_PCIE3 13 +#define MASTER_SNOC_PCIE3 14 +#define SLAVE_SNOC_PCIE3 15 +#define MASTER_USB 16 +#define SLAVE_USB 17 +#define MASTER_USB_AXI 18 +#define SLAVE_USB_AXI 19 +#define MASTER_NSSNOC_NSSCC 20 +#define SLAVE_NSSNOC_NSSCC 21 +#define MASTER_NSSNOC_SNOC_0 22 +#define SLAVE_NSSNOC_SNOC_0 23 +#define MASTER_NSSNOC_SNOC_1 24 +#define SLAVE_NSSNOC_SNOC_1 25 +#define MASTER_NSSNOC_PCNOC_1 26 +#define SLAVE_NSSNOC_PCNOC_1 27 +#define MASTER_NSSNOC_QOSGEN_REF 28 +#define SLAVE_NSSNOC_QOSGEN_REF 29 +#define MASTER_NSSNOC_TIMEOUT_REF 30 +#define SLAVE_NSSNOC_TIMEOUT_REF 31 +#define MASTER_NSSNOC_XO_DCD 32 +#define SLAVE_NSSNOC_XO_DCD 33 +#define MASTER_NSSNOC_ATB 34 +#define SLAVE_NSSNOC_ATB 35 +#define MASTER_MEM_NOC_NSSNOC 36 +#define SLAVE_MEM_NOC_NSSNOC 37 +#define MASTER_NSSNOC_MEMNOC 38 +#define SLAVE_NSSNOC_MEMNOC 39 +#define MASTER_NSSNOC_MEM_NOC_1 40 +#define SLAVE_NSSNOC_MEM_NOC_1 41 + +#define MASTER_NSSNOC_PPE 0 +#define SLAVE_NSSNOC_PPE 1 +#define MASTER_NSSNOC_PPE_CFG 2 +#define SLAVE_NSSNOC_PPE_CFG 3 +#define MASTER_NSSNOC_NSS_CSR 4 +#define SLAVE_NSSNOC_NSS_CSR 5 +#define MASTER_NSSNOC_IMEM_QSB 6 +#define SLAVE_NSSNOC_IMEM_QSB 7 +#define MASTER_NSSNOC_IMEM_AHB 8 +#define SLAVE_NSSNOC_IMEM_AHB 9 + +#endif /* INTERCONNECT_QCOM_IPQ9574_H */ -- cgit v1.2.3 From 28bdacbcb36d093e23734acccecd139f5fc05f67 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 26 Jun 2024 16:53:23 +0800 Subject: mm: move memory_failure_queue() into copy_mc_[user]_highpage() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "mm: migrate: support poison recover from migrate folio", v5. The folio migration is widely used in kernel, memory compaction, memory hotplug, soft offline page, numa balance, memory demote/promotion, etc, but once access a poisoned source folio when migrating, the kernel will panic. There is a mechanism in the kernel to recover from uncorrectable memory errors, ARCH_HAS_COPY_MC(eg, Machine Check Safe Memory Copy on x86), which is already used in NVDIMM or core-mm paths(eg, CoW, khugepaged, coredump, ksm copy), see copy_mc_to_{user,kernel}, copy_mc_{user_}highpage callers. This series of patches provide the recovery mechanism from folio copy for the widely used folio migration. Please note, because folio migration is no guarantee of success, so we could chose to make folio migration tolerant of memory failures, adding folio_mc_copy() which is a #MC versions of folio_copy(), once accessing a poisoned source folio, we could return error and make the folio migration fail, and this could avoid the similar panic shown below. CPU: 1 PID: 88343 Comm: test_softofflin Kdump: loaded Not tainted 6.6.0 pc : copy_page+0x10/0xc0 lr : copy_highpage+0x38/0x50 ... Call trace: copy_page+0x10/0xc0 folio_copy+0x78/0x90 migrate_folio_extra+0x54/0xa0 move_to_new_folio+0xd8/0x1f0 migrate_folio_move+0xb8/0x300 migrate_pages_batch+0x528/0x788 migrate_pages_sync+0x8c/0x258 migrate_pages+0x440/0x528 soft_offline_in_use_page+0x2ec/0x3c0 soft_offline_page+0x238/0x310 soft_offline_page_store+0x6c/0xc0 dev_attr_store+0x20/0x40 sysfs_kf_write+0x4c/0x68 kernfs_fop_write_iter+0x130/0x1c8 new_sync_write+0xa4/0x138 vfs_write+0x238/0x2d8 ksys_write+0x74/0x110 This patch (of 5): There is a memory_failure_queue() call after copy_mc_[user]_highpage(), see callers, eg, CoW/KSM page copy, it is used to mark the source page as h/w poisoned and unmap it from other tasks, and the upcomming poison recover from migrate folio will do the similar thing, so let's move the memory_failure_queue() into the copy_mc_[user]_highpage() instead of adding it into each user, this should also enhance the handling of poisoned page in khugepaged. Link: https://lkml.kernel.org/r/20240626085328.608006-1-wangkefeng.wang@huawei.com Link: https://lkml.kernel.org/r/20240626085328.608006-2-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Jane Chu Reviewed-by: Miaohe Lin Cc: Alistair Popple Cc: Benjamin LaHaise Cc: David Hildenbrand Cc: Hugh Dickins Cc: Jérôme Glisse Cc: Jiaqi Yan Cc: Lance Yang Cc: Matthew Wilcox (Oracle) Cc: Muchun Song Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Tony Luck Cc: Vishal Moola (Oracle) Signed-off-by: Andrew Morton --- include/linux/highmem.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index fa6891e06316..930a591b9b61 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -352,6 +352,9 @@ static inline int copy_mc_user_highpage(struct page *to, struct page *from, kunmap_local(vto); kunmap_local(vfrom); + if (ret) + memory_failure_queue(page_to_pfn(from), 0); + return ret; } @@ -368,6 +371,9 @@ static inline int copy_mc_highpage(struct page *to, struct page *from) kunmap_local(vto); kunmap_local(vfrom); + if (ret) + memory_failure_queue(page_to_pfn(from), 0); + return ret; } #else -- cgit v1.2.3 From 02f4ee5a144cef6b26421cb42cca64bb4138d459 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 26 Jun 2024 16:53:24 +0800 Subject: mm: add folio_mc_copy() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a #MC variant of folio_copy() which uses copy_mc_highpage() to support #MC handled during folio copy, it will be used in folio migration soon. Link: https://lkml.kernel.org/r/20240626085328.608006-3-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Jane Chu Reviewed-by: Miaohe Lin Cc: Alistair Popple Cc: Benjamin LaHaise Cc: David Hildenbrand Cc: Hugh Dickins Cc: Jérôme Glisse Cc: Jiaqi Yan Cc: Lance Yang Cc: Matthew Wilcox (Oracle) Cc: Muchun Song Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Tony Luck Cc: Vishal Moola (Oracle) Signed-off-by: Andrew Morton --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index e2140ea6ae98..a2e3ebead410 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1291,6 +1291,7 @@ void put_pages_list(struct list_head *pages); void split_page(struct page *page, unsigned int order); void folio_copy(struct folio *dst, struct folio *src); +int folio_mc_copy(struct folio *dst, struct folio *src); unsigned long nr_free_buffer_pages(void); -- cgit v1.2.3 From 3f59493713534b61abcbe2e57582cf7e31a42d51 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Wed, 26 Jun 2024 16:53:28 +0800 Subject: mm: migrate: remove folio_migrate_copy() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The folio_migrate_copy() is just a wrapper of folio_copy() and folio_migrate_flags(), it is simple and only aio use it for now, unfold it and remove folio_migrate_copy(). Link: https://lkml.kernel.org/r/20240626085328.608006-7-wangkefeng.wang@huawei.com Signed-off-by: Kefeng Wang Reviewed-by: Jane Chu Cc: Alistair Popple Cc: Benjamin LaHaise Cc: David Hildenbrand Cc: Hugh Dickins Cc: Jérôme Glisse Cc: Jiaqi Yan Cc: Lance Yang Cc: Matthew Wilcox (Oracle) Cc: Miaohe Lin Cc: Muchun Song Cc: Naoya Horiguchi Cc: Oscar Salvador Cc: Tony Luck Cc: Vishal Moola (Oracle) Signed-off-by: Andrew Morton --- include/linux/migrate.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index af2579ae93f2..644be30b69c8 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -76,7 +76,6 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl) __releases(ptl); void folio_migrate_flags(struct folio *newfolio, struct folio *folio); -void folio_migrate_copy(struct folio *newfolio, struct folio *folio); int folio_migrate_mapping(struct address_space *mapping, struct folio *newfolio, struct folio *folio, int extra_count); -- cgit v1.2.3 From 25f76c3db2f08428b5acd082a52787164001eb6e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 6 Jul 2024 09:52:17 +0200 Subject: block: add a bvec_phys helper Get callers out of poking into bvec internals a bit more. Not a huge win right now, but with the proposed new DMA mapping API we might end up with a lot more of this otherwise. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20240706075228.2350978-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bvec.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index bd1e361b351c..f41c7f0ef91e 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -280,4 +280,18 @@ static inline void *bvec_virt(struct bio_vec *bvec) return page_address(bvec->bv_page) + bvec->bv_offset; } +/** + * bvec_phys - return the physical address for a bvec + * @bvec: bvec to return the physical address for + */ +static inline phys_addr_t bvec_phys(const struct bio_vec *bvec) +{ + /* + * Note this open codes page_to_phys because page_to_phys is defined in + * , which we don't want to pull in here. If it ever moves to + * a sensible place we should start using it. + */ + return PFN_PHYS(page_to_pfn(bvec->bv_page)) + bvec->bv_offset; +} + #endif /* __LINUX_BVEC_H */ -- cgit v1.2.3 From cd6193877c603f4b0c3c7e5607ffa3d52815403f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 1 Jul 2024 09:35:33 +0200 Subject: x86/efistub: Enable SMBIOS protocol handling for x86 The smbios.c source file is not currently included in the x86 build, and before we can do so, it needs some tweaks to build correctly in combination with the EFI mixed mode support. Reviewed-by: Lukas Wunner Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/efi.h b/include/linux/efi.h index 418e555459da..2a539816a436 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -74,10 +74,10 @@ typedef void *efi_handle_t; */ typedef guid_t efi_guid_t __aligned(__alignof__(u32)); -#define EFI_GUID(a, b, c, d...) (efi_guid_t){ { \ +#define EFI_GUID(a, b, c, d...) ((efi_guid_t){ { \ (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ (b) & 0xff, ((b) >> 8) & 0xff, \ - (c) & 0xff, ((c) >> 8) & 0xff, d } } + (c) & 0xff, ((c) >> 8) & 0xff, d } }) /* * Generic EFI table header -- cgit v1.2.3 From 71e49eccdca6328eecc335ed8f5557bd0ed70fc6 Mon Sep 17 00:00:00 2001 From: Aditya Garg Date: Sun, 30 Jun 2024 19:24:54 +0000 Subject: x86/efistub: Call Apple set_os protocol on dual GPU Intel Macs 0c18184de990 ("platform/x86: apple-gmux: support MMIO gmux on T2 Macs") brought support for T2 Macs in apple-gmux. But in order to use dual GPU, the integrated GPU has to be enabled. On such dual GPU EFI Macs, the EFI stub needs to report that it is booting macOS in order to prevent the firmware from disabling the iGPU. This patch is also applicable for some non T2 Intel Macs. Based on this patch for GRUB by Andreas Heider : https://lists.gnu.org/archive/html/grub-devel/2013-12/msg00442.html Credits also goto Kerem Karabay for helping porting the patch to the Linux kernel. Cc: Orlando Chamberlain Signed-off-by: Aditya Garg [ardb: limit scope using list of DMI matches provided by Lukas and Orlando] Reviewed-by: Lukas Wunner Tested-by: Aditya Garg Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/efi.h b/include/linux/efi.h index 2a539816a436..3a6c04a9f9aa 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -385,6 +385,7 @@ void efi_native_runtime_setup(void); #define EFI_MEMORY_ATTRIBUTES_TABLE_GUID EFI_GUID(0xdcfa911d, 0x26eb, 0x469f, 0xa2, 0x20, 0x38, 0xb7, 0xdc, 0x46, 0x12, 0x20) #define EFI_CONSOLE_OUT_DEVICE_GUID EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define APPLE_PROPERTIES_PROTOCOL_GUID EFI_GUID(0x91bd12fe, 0xf6c3, 0x44fb, 0xa5, 0xb7, 0x51, 0x22, 0xab, 0x30, 0x3a, 0xe0) +#define APPLE_SET_OS_PROTOCOL_GUID EFI_GUID(0xc5c5da95, 0x7d5c, 0x45e6, 0xb2, 0xf1, 0x3f, 0xd5, 0x2b, 0xb1, 0x00, 0x77) #define EFI_TCG2_PROTOCOL_GUID EFI_GUID(0x607f766c, 0x7455, 0x42be, 0x93, 0x0b, 0xe4, 0xd7, 0x6d, 0xb2, 0x72, 0x0f) #define EFI_TCG2_FINAL_EVENTS_TABLE_GUID EFI_GUID(0x1e2ed096, 0x30e2, 0x4254, 0xbd, 0x89, 0x86, 0x3b, 0xbe, 0xf8, 0x23, 0x25) #define EFI_LOAD_FILE_PROTOCOL_GUID EFI_GUID(0x56ec3091, 0x954c, 0x11d2, 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b) -- cgit v1.2.3 From f21711bbdbf0d95a389bfaad54ce444b46830d58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sat, 6 Jul 2024 13:13:42 +0200 Subject: regmap-irq: handle const struct regmap_irq_sub_irq_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The struct instances supplied by the drivers are never modified. Handle them as const in the regmap core allowing the drivers to put them into .rodata. Also add a new entry to const_structs.checkpatch to make sure future instances of this struct already enter the tree as const. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20240706-regmap-const-structs-v1-2-d08c776da787@weissschuh.net Signed-off-by: Mark Brown --- include/linux/regmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index a6bc2980a98b..2da1cfc52233 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1607,7 +1607,7 @@ struct regmap_irq_chip { unsigned int main_status; unsigned int num_main_status_bits; - struct regmap_irq_sub_irq_map *sub_reg_offsets; + const struct regmap_irq_sub_irq_map *sub_reg_offsets; int num_main_regs; unsigned int status_base; -- cgit v1.2.3 From f45b94ffc5f1204b35b5c695ed265b1385951616 Mon Sep 17 00:00:00 2001 From: Varadarajan Narayanan Date: Tue, 30 Apr 2024 12:12:09 +0530 Subject: interconnect: icc-clk: Specify master/slave ids Presently, icc-clk driver autogenerates the master and slave ids. However, devices with multiple nodes on the interconnect could have other constraints and may not match with the auto generated node ids. Hence, modify the driver to use the master/slave ids provided by the caller instead of auto generating. Also, update clk-cbf-8996 accordingly. Acked-by: Georgi Djakov Signed-off-by: Varadarajan Narayanan Link: https://lore.kernel.org/r/20240430064214.2030013-2-quic_varada@quicinc.com Signed-off-by: Bjorn Andersson --- include/linux/interconnect-clk.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/interconnect-clk.h b/include/linux/interconnect-clk.h index 0cd80112bea5..170898faaacb 100644 --- a/include/linux/interconnect-clk.h +++ b/include/linux/interconnect-clk.h @@ -11,6 +11,8 @@ struct device; struct icc_clk_data { struct clk *clk; const char *name; + unsigned int master_id; + unsigned int slave_id; }; struct icc_provider *icc_clk_register(struct device *dev, -- cgit v1.2.3 From d3153113619216e87038a20bebf82582f9be10e7 Mon Sep 17 00:00:00 2001 From: Varadarajan Narayanan Date: Tue, 30 Apr 2024 12:12:11 +0530 Subject: interconnect: icc-clk: Add devm_icc_clk_register Wrap icc_clk_register to create devm_icc_clk_register to be able to release the resources properly. Acked-by: Georgi Djakov Reviewed-by: Dmitry Baryshkov Signed-off-by: Varadarajan Narayanan Link: https://lore.kernel.org/r/20240430064214.2030013-4-quic_varada@quicinc.com Signed-off-by: Bjorn Andersson --- include/linux/interconnect-clk.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/interconnect-clk.h b/include/linux/interconnect-clk.h index 170898faaacb..9bcee3e9c56c 100644 --- a/include/linux/interconnect-clk.h +++ b/include/linux/interconnect-clk.h @@ -19,6 +19,8 @@ struct icc_provider *icc_clk_register(struct device *dev, unsigned int first_id, unsigned int num_clocks, const struct icc_clk_data *data); +int devm_icc_clk_register(struct device *dev, unsigned int first_id, + unsigned int num_clocks, const struct icc_clk_data *data); void icc_clk_unregister(struct icc_provider *provider); #endif -- cgit v1.2.3 From 7e86845a0346efc95fddaa97ce5cd6a8bda8c71c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 4 Jun 2024 15:45:24 -0400 Subject: rpcrdma: Implement generic device removal Commit e87a911fed07 ("nvme-rdma: use ib_client API to detect device removal") explains the benefits of handling device removal outside of the CM event handler. Sketch in an IB device removal notification mechanism that can be used by both the client and server side RPC-over-RDMA transport implementations. Suggested-by: Sagi Grimberg Signed-off-by: Chuck Lever Reviewed-by: Sagi Grimberg Signed-off-by: Anna Schumaker --- include/linux/sunrpc/rdma_rn.h | 27 +++++++++++++++++++++++++++ include/trace/events/rpcrdma.h | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 include/linux/sunrpc/rdma_rn.h (limited to 'include') diff --git a/include/linux/sunrpc/rdma_rn.h b/include/linux/sunrpc/rdma_rn.h new file mode 100644 index 000000000000..7d032ca057af --- /dev/null +++ b/include/linux/sunrpc/rdma_rn.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * * Copyright (c) 2024, Oracle and/or its affiliates. + */ + +#ifndef _LINUX_SUNRPC_RDMA_RN_H +#define _LINUX_SUNRPC_RDMA_RN_H + +#include + +/** + * rpcrdma_notification - request removal notification + */ +struct rpcrdma_notification { + void (*rn_done)(struct rpcrdma_notification *rn); + u32 rn_index; +}; + +int rpcrdma_rn_register(struct ib_device *device, + struct rpcrdma_notification *rn, + void (*done)(struct rpcrdma_notification *rn)); +void rpcrdma_rn_unregister(struct ib_device *device, + struct rpcrdma_notification *rn); +int rpcrdma_ib_client_register(void); +void rpcrdma_ib_client_unregister(void); + +#endif /* _LINUX_SUNRPC_RDMA_RN_H */ diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 14392652273a..ecdaf088219d 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -2220,6 +2220,40 @@ TRACE_EVENT(svcrdma_sq_post_err, ) ); +DECLARE_EVENT_CLASS(rpcrdma_client_device_class, + TP_PROTO( + const struct ib_device *device + ), + + TP_ARGS(device), + + TP_STRUCT__entry( + __string(name, device->name) + ), + + TP_fast_assign( + __assign_str(name); + ), + + TP_printk("device=%s", + __get_str(name) + ) +); + +#define DEFINE_CLIENT_DEVICE_EVENT(name) \ + DEFINE_EVENT(rpcrdma_client_device_class, name, \ + TP_PROTO( \ + const struct ib_device *device \ + ), \ + TP_ARGS(device) \ + ) + +DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_completion); +DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_add_one); +DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one); +DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_wait_on); +DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one_done); + #endif /* _TRACE_RPCRDMA_H */ #include -- cgit v1.2.3 From 3f4eb9ff923413cdb4c7e171c06d3564f6286712 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 4 Jun 2024 15:45:25 -0400 Subject: xprtrdma: Handle device removal outside of the CM event handler Wait for all disconnects to complete to ensure the transport has divested all of its hardware resources before the underlying RDMA device can be removed. Signed-off-by: Chuck Lever Reviewed-by: Sagi Grimberg Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index ecdaf088219d..ba2d6a0e41cc 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -669,6 +669,29 @@ TRACE_EVENT(xprtrdma_inline_thresh, DEFINE_CONN_EVENT(connect); DEFINE_CONN_EVENT(disconnect); +TRACE_EVENT(xprtrdma_device_removal, + TP_PROTO( + const struct rdma_cm_id *id + ), + + TP_ARGS(id), + + TP_STRUCT__entry( + __string(name, id->device->name) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + ), + + TP_fast_assign( + __assign_str(name); + memcpy(__entry->addr, &id->route.addr.dst_addr, + sizeof(struct sockaddr_in6)); + ), + + TP_printk("device %s to be removed, disconnecting %pISpc\n", + __get_str(name), __entry->addr + ) +); + DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); TRACE_EVENT(xprtrdma_op_connect, -- cgit v1.2.3 From 820620516993c151c88e9b59edc79ed12d1c31cd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:19 -0400 Subject: NFSv4: Clean up open delegation return structure Instead of having the fields open coded in the struct nfs_openres, add a separate structure for them so that we can reuse that code for the WANT_DELEGATION case. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d09b9773b20c..682559e19d9d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -449,6 +449,22 @@ struct stateowner_id { __u32 uniquifier; }; +struct nfs4_open_delegation { + __u32 open_delegation_type; + union { + struct { + fmode_t type; + __u32 do_recall; + nfs4_stateid stateid; + unsigned long pagemod_limit; + }; + struct { + __u32 why_no_delegation; + __u32 will_notify; + }; + }; +}; + /* * Arguments to the open call. */ @@ -490,13 +506,10 @@ struct nfs_openres { struct nfs_fattr * f_attr; struct nfs_seqid * seqid; const struct nfs_server *server; - fmode_t delegation_type; - nfs4_stateid delegation; - unsigned long pagemod_limit; - __u32 do_recall; __u32 attrset[NFS4_BITMAP_SIZE]; struct nfs4_string *owner; struct nfs4_string *group_owner; + struct nfs4_open_delegation delegation; __u32 access_request; __u32 access_supported; __u32 access_result; -- cgit v1.2.3 From 6a68aed602d7b770552c7f890d0c30c53725c7b8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:21 -0400 Subject: NFSv4: Add new attribute delegation definitions Add the attribute delegation XDR definitions from the spec. Signed-off-by: Tom Haynes Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs4.h | 9 +++++++++ include/uapi/linux/nfs4.h | 2 ++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 0d896ce296ce..c074e0ac390f 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -367,6 +367,8 @@ enum open_delegation_type4 { NFS4_OPEN_DELEGATE_READ = 1, NFS4_OPEN_DELEGATE_WRITE = 2, NFS4_OPEN_DELEGATE_NONE_EXT = 3, /* 4.1 */ + NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG = 4, + NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG = 5, }; enum why_no_delegation4 { /* new to v4.1 */ @@ -507,6 +509,11 @@ enum { FATTR4_XATTR_SUPPORT = 82, }; +enum { + FATTR4_TIME_DELEG_ACCESS = 84, + FATTR4_TIME_DELEG_MODIFY = 85, +}; + /* * The following internal definitions enable processing the above * attribute bits within 32-bit word boundaries. @@ -586,6 +593,8 @@ enum { #define FATTR4_WORD2_SECURITY_LABEL BIT(FATTR4_SEC_LABEL - 64) #define FATTR4_WORD2_MODE_UMASK BIT(FATTR4_MODE_UMASK - 64) #define FATTR4_WORD2_XATTR_SUPPORT BIT(FATTR4_XATTR_SUPPORT - 64) +#define FATTR4_WORD2_TIME_DELEG_ACCESS BIT(FATTR4_TIME_DELEG_ACCESS - 64) +#define FATTR4_WORD2_TIME_DELEG_MODIFY BIT(FATTR4_TIME_DELEG_MODIFY - 64) /* MDS threshold bitmap bits */ #define THRESHOLD_RD (1UL << 0) diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index 1d2043708bf1..afd7e32906c3 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -69,6 +69,8 @@ #define NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL 0x10000 #define NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED 0x20000 +#define NFS4_SHARE_WANT_DELEG_TIMESTAMPS 0x100000 + #define NFS4_CDFC4_FORE 0x1 #define NFS4_CDFC4_BACK 0x2 #define NFS4_CDFC4_BOTH 0x3 -- cgit v1.2.3 From 90f9ae74422d7e7447cb0ea21e1227142b58c52a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:22 -0400 Subject: NFSv4: Plumb in XDR support for the new delegation-only setattr op We want to send the updated atime and mtime as part of the delegreturn compound. Add a special structure to hold those variables. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 682559e19d9d..f40be64ce942 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -622,6 +622,13 @@ struct nfs_release_lockowner_res { struct nfs4_sequence_res seq_res; }; +struct nfs4_delegattr { + struct timespec64 atime; + struct timespec64 mtime; + bool atime_set; + bool mtime_set; +}; + struct nfs4_delegreturnargs { struct nfs4_sequence_args seq_args; const struct nfs_fh *fhandle; @@ -629,6 +636,7 @@ struct nfs4_delegreturnargs { const u32 *bitmask; u32 bitmask_store[NFS_BITMASK_SZ]; struct nfs4_layoutreturn_args *lr_args; + struct nfs4_delegattr *sattr_args; }; struct nfs4_delegreturnres { @@ -637,6 +645,8 @@ struct nfs4_delegreturnres { struct nfs_server *server; struct nfs4_layoutreturn_res *lr_res; int lr_ret; + bool sattr_res; + int sattr_ret; }; /* -- cgit v1.2.3 From 4201916f2ab13577d45876f4bc784be55e4a83da Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:24 -0400 Subject: NFSv4: Add a flags argument to the 'have_delegation' callback This argument will be used to allow the caller to specify whether or not they need to know that this is an attribute delegation. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index f40be64ce942..51611583af51 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1830,7 +1830,7 @@ struct nfs_rpc_ops { int open_flags, struct iattr *iattr, int *); - int (*have_delegation)(struct inode *, fmode_t); + int (*have_delegation)(struct inode *, fmode_t, int); struct nfs_client *(*alloc_client) (const struct nfs_client_initdata *); struct nfs_client *(*init_client) (struct nfs_client *, const struct nfs_client_initdata *); -- cgit v1.2.3 From 86e1c54d152e2799671e149d6e4d1c1a4a2be857 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:26 -0400 Subject: NFSv4: Add recovery of attribute delegations After a reboot of the NFSv4.2 server, the recovery code needs to specify whether the delegation to be recovered is an attribute delegation or not. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 51611583af51..d8cfa956d24c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -484,7 +484,7 @@ struct nfs_openargs { nfs4_verifier verifier; /* EXCLUSIVE */ }; nfs4_stateid delegation; /* CLAIM_DELEGATE_CUR */ - fmode_t delegation_type; /* CLAIM_PREVIOUS */ + __u32 delegation_type; /* CLAIM_PREVIOUS */ } u; const struct qstr * name; const struct nfs_server *server; /* Needed for ID mapping */ -- cgit v1.2.3 From dcb3c20f741993efbd95be78aab93fac29516323 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:27 -0400 Subject: NFSv4: Add a capability for delegated attributes Cache whether or not the server may have support for delegated attributes in a capability flag. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 92de074e63b9..5a76a87cd924 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -278,6 +278,7 @@ struct nfs_server { #define NFS_CAP_LGOPEN (1U << 5) #define NFS_CAP_CASE_INSENSITIVE (1U << 6) #define NFS_CAP_CASE_PRESERVING (1U << 7) +#define NFS_CAP_DELEGTIME (1U << 13) #define NFS_CAP_POSIX_LOCK (1U << 14) #define NFS_CAP_UIDGID_NOMAP (1U << 15) #define NFS_CAP_STATEID_NFSV41 (1U << 16) -- cgit v1.2.3 From 707f13b3d081ea811c40014e7b61f2c487ee9a4f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:32 -0400 Subject: NFSv4: Add support for the FATTR4_OPEN_ARGUMENTS attribute Query the server for the OPEN arguments that it supports so that we can figure out which extensions we can use. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs4.h | 2 ++ include/linux/nfs_xdr.h | 9 +++++++++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index c074e0ac390f..f9df88091c6d 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -512,6 +512,7 @@ enum { enum { FATTR4_TIME_DELEG_ACCESS = 84, FATTR4_TIME_DELEG_MODIFY = 85, + FATTR4_OPEN_ARGUMENTS = 86, }; /* @@ -595,6 +596,7 @@ enum { #define FATTR4_WORD2_XATTR_SUPPORT BIT(FATTR4_XATTR_SUPPORT - 64) #define FATTR4_WORD2_TIME_DELEG_ACCESS BIT(FATTR4_TIME_DELEG_ACCESS - 64) #define FATTR4_WORD2_TIME_DELEG_MODIFY BIT(FATTR4_TIME_DELEG_MODIFY - 64) +#define FATTR4_WORD2_OPEN_ARGUMENTS BIT(FATTR4_OPEN_ARGUMENTS - 64) /* MDS threshold bitmap bits */ #define THRESHOLD_RD (1UL << 0) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d8cfa956d24c..af510a7ec46a 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1213,6 +1213,14 @@ struct nfs4_statfs_res { struct nfs_fsstat *fsstat; }; +struct nfs4_open_caps { + u32 oa_share_access[1]; + u32 oa_share_deny[1]; + u32 oa_share_access_want[1]; + u32 oa_open_claim[1]; + u32 oa_createmode[1]; +}; + struct nfs4_server_caps_arg { struct nfs4_sequence_args seq_args; struct nfs_fh *fhandle; @@ -1229,6 +1237,7 @@ struct nfs4_server_caps_res { u32 fh_expire_type; u32 case_insensitive; u32 case_preserving; + struct nfs4_open_caps open_caps; }; #define NFS4_PATHNAME_MAXCOMPONENTS 512 -- cgit v1.2.3 From d2a00cceb93a4df2afaceb836297628d0aeec7ad Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Jun 2024 21:21:33 -0400 Subject: NFSv4: Detect support for OPEN4_SHARE_ACCESS_WANT_OPEN_XOR_DELEGATION If the server supports the NFSv4.2 protocol extension to optimise away returning a stateid when it returns a delegation, then we cache that information in another capability flag. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_fs_sb.h | 1 + include/uapi/linux/nfs4.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 5a76a87cd924..fe5b1a8bd723 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -278,6 +278,7 @@ struct nfs_server { #define NFS_CAP_LGOPEN (1U << 5) #define NFS_CAP_CASE_INSENSITIVE (1U << 6) #define NFS_CAP_CASE_PRESERVING (1U << 7) +#define NFS_CAP_OPEN_XOR (1U << 12) #define NFS_CAP_DELEGTIME (1U << 13) #define NFS_CAP_POSIX_LOCK (1U << 14) #define NFS_CAP_UIDGID_NOMAP (1U << 15) diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index afd7e32906c3..caf4db2fcbb9 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -46,6 +46,7 @@ #define NFS4_OPEN_RESULT_CONFIRM 0x0002 #define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004 #define NFS4_OPEN_RESULT_PRESERVE_UNLINKED 0x0008 +#define NFS4_OPEN_RESULT_NO_OPEN_STATEID 0x0010 #define NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK 0x0020 #define NFS4_SHARE_ACCESS_MASK 0x000F @@ -70,6 +71,7 @@ #define NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED 0x20000 #define NFS4_SHARE_WANT_DELEG_TIMESTAMPS 0x100000 +#define NFS4_SHARE_WANT_OPEN_XOR_DELEGATION 0x200000 #define NFS4_CDFC4_FORE 0x1 #define NFS4_CDFC4_BACK 0x2 -- cgit v1.2.3 From adb4b42d19aea91826621a8d0bac94cf2c08f8bc Mon Sep 17 00:00:00 2001 From: Lance Shelton Date: Sun, 16 Jun 2024 21:21:36 -0400 Subject: Return the delegation when deleting sillyrenamed files Add a callback to return the delegation in order to allow generic NFS code to return the delegation when appropriate. Signed-off-by: Trond Myklebust Signed-off-by: Lance Shelton Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index af510a7ec46a..01efacae4634 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1840,6 +1840,7 @@ struct nfs_rpc_ops { struct iattr *iattr, int *); int (*have_delegation)(struct inode *, fmode_t, int); + int (*return_delegation)(struct inode *); struct nfs_client *(*alloc_client) (const struct nfs_client_initdata *); struct nfs_client *(*init_client) (struct nfs_client *, const struct nfs_client_initdata *); -- cgit v1.2.3 From 924cf3c91fe29c7ebd1b9d56e10f513c1bd7d4bd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 13 Jun 2024 01:00:46 -0400 Subject: NFSv4.1: constify the stateid argument in nfs41_test_stateid() Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 01efacae4634..45623af3e7b8 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1438,7 +1438,7 @@ struct nfs41_secinfo_no_name_args { struct nfs41_test_stateid_args { struct nfs4_sequence_args seq_args; - nfs4_stateid *stateid; + nfs4_stateid stateid; }; struct nfs41_test_stateid_res { -- cgit v1.2.3 From 5468fc8298a97ca504aca8ac0c7b9e6fd7c1b588 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 13 Jun 2024 01:00:55 -0400 Subject: NFSv4/pNFS: Do layout state recovery upon reboot Some pNFS implementations, such as flexible files, want the client to send the layout stats and layout errors that may have incurred while the metadata server was booting. To do so, the client sends a layoutreturn with an all-zero stateid while the server is in grace during reboot recovery. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index fe5b1a8bd723..ba9df1848b35 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -278,6 +278,7 @@ struct nfs_server { #define NFS_CAP_LGOPEN (1U << 5) #define NFS_CAP_CASE_INSENSITIVE (1U << 6) #define NFS_CAP_CASE_PRESERVING (1U << 7) +#define NFS_CAP_REBOOT_LAYOUTRETURN (1U << 8) #define NFS_CAP_OPEN_XOR (1U << 12) #define NFS_CAP_DELEGTIME (1U << 13) #define NFS_CAP_POSIX_LOCK (1U << 14) -- cgit v1.2.3 From 2f1f31042ef07719a0d5cb4784b8a32d20c13110 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 1 Jul 2024 12:50:48 +0200 Subject: nfs: Block on write congestion Commit 6df25e58532b ("nfs: remove reliance on bdi congestion") introduced NFS-private solution for limiting number of writes outstanding against a particular server. Unlike previous bdi congestion this algorithm actually works and limits number of outstanding writeback pages to nfs_congestion_kb which scales with amount of client's memory and is capped at 256 MB. As a result some workloads such as random buffered writes over NFS got slower (from ~170 MB/s to ~126 MB/s). The fio command to reproduce is: fio --direct=0 --ioengine=sync --thread --invalidate=1 --group_reporting=1 --runtime=300 --fallocate=posix --ramp_time=10 --new_group --rw=randwrite --size=64256m --numjobs=4 --bs=4k --fsync_on_close=1 --end_fsync=1 This happens because the client sends ~256 MB worth of dirty pages to the server and any further background writeback request is ignored until the number of writeback pages gets below the threshold of 192 MB. By the time this happens and clients decides to trigger another round of writeback, the server often has no pages to write and the disk is idle. To fix this problem and make the client react faster to eased congestion of the server by blocking waiting for congestion to resolve instead of aborting writeback. This improves the random 4k buffered write throughput to 184 MB/s. Reviewed-by: Sagi Grimberg Reviewed-by: Jeff Layton Signed-off-by: Jan Kara Signed-off-by: Anna Schumaker --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index ba9df1848b35..1df86ab98c77 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -140,6 +140,7 @@ struct nfs_server { struct rpc_clnt * client_acl; /* ACL RPC client handle */ struct nlm_host *nlm_host; /* NLM client handle */ struct nfs_iostats __percpu *io_stats; /* I/O statistics */ + wait_queue_head_t write_congestion_wait; /* wait until write congestion eases */ atomic_long_t writeback; /* number of writeback pages */ unsigned int write_congested;/* flag set when writeback gets too high */ unsigned int flags; /* various flags */ -- cgit v1.2.3 From 7e8e78a0ba00c88f0ded86de64bdddc82e06b196 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 Jul 2024 07:26:48 +0200 Subject: nfs: remove dead code for the old swap over NFS implementation Remove the code testing folio_test_swapcache either explicitly or implicitly in pagemap.h headers, as is now handled using the direct I/O path and not the buffered I/O path that these helpers are located in. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 1c315f854ea8..7bc31df457ea 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -208,8 +208,8 @@ static inline struct inode *nfs_page_to_inode(const struct nfs_page *req) struct folio *folio = nfs_page_to_folio(req); if (folio == NULL) - return page_file_mapping(req->wb_page)->host; - return folio_file_mapping(folio)->host; + return req->wb_page->mapping->host; + return folio->mapping->host; } /** -- cgit v1.2.3 From 9eb7c484db1ae993648fc9b9d48a295f4d99afb8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 Jul 2024 07:26:50 +0200 Subject: nfs: simplify nfs_folio_find_and_lock_request nfs_folio_find_and_lock_request and the nfs_page_group_lock_head helper called by it spend quite some effort to deal with head vs subrequests. But given that only the head request can be stashed in the folio private data, non of that is required. Fold the locking logic from nfs_page_group_lock_head into nfs_folio_find_and_lock_request and simplify the result based on the invariant that we always find the head request in the folio private data. Signed-off-by: Christoph Hellwig Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 7bc31df457ea..e799d93626f1 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -155,7 +155,6 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); -extern struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req); extern int nfs_page_group_lock_subrequests(struct nfs_page *head); extern void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, -- cgit v1.2.3 From 25edbcac6e32eab345e470d56ca9974a577b878b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 Jul 2024 07:26:52 +0200 Subject: nfs: fold nfs_page_group_lock_subrequests into nfs_lock_and_join_requests Fold nfs_page_group_lock_subrequests into nfs_lock_and_join_requests to prepare for future changes to this code, and move the helpers to write.c as well. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index e799d93626f1..63eed97a18ad 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -155,7 +155,6 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); -extern int nfs_page_group_lock_subrequests(struct nfs_page *head); extern void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo, struct inode *inode); -- cgit v1.2.3 From f1b7c7552cbcf89e56b15ff481f3d19b53046291 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 Jul 2024 07:26:53 +0200 Subject: nfs: move nfs_wait_on_request to write.c nfs_wait_on_request is now only used in write.c. Move it there and mark it static. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Anna Schumaker --- include/linux/nfs_page.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 63eed97a18ad..169b4ae30ff4 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -152,7 +152,6 @@ extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t); extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req); -extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); extern void nfs_join_page_group(struct nfs_page *head, -- cgit v1.2.3 From 8e0c8d23952f338180d19718195d4f9fd10a1809 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 13 Jun 2024 14:34:30 -0400 Subject: sunrpc: fix up the special handling of sv_nrpools == 1 Only pooled services take a reference to the svc_pool_map. The sunrpc code has always used the sv_nrpools value to detect whether the service is pooled. The problem there is that nfsd is a pooled service, but when it's running in "global" pool_mode, it doesn't take a reference to the pool map because it has a sv_nrpools value of 1. This means that we have two separate codepaths for starting the server, depending on whether it's pooled or not. Fix this by adding a new flag to the svc_serv, that indicates whether the serv is pooled. With this we can have the nfsd service unconditionally take a reference, regardless of pool_mode. Note that this is a behavior change for /sys/module/sunrpc/parameters/pool_mode. Usually this file does not allow you to change the pool-mode while there are nfsd threads running, but if the pool-mode is "global" it's allowed. My assumption is that this is a bug, since it probably should never have worked this way. This patch changes the behavior such that you get back EBUSY even when nfsd is running in global mode. I think this is more reasonable behavior, and given that most people set this today using the module parameter, it's doubtful anyone will notice. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 23617da0e565..d0433e1642b3 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -85,6 +85,7 @@ struct svc_serv { char * sv_name; /* service name */ unsigned int sv_nrpools; /* number of thread pools */ + bool sv_is_pooled; /* is this a pooled service? */ struct svc_pool * sv_pools; /* array of thread pools */ int (*sv_threadfn)(void *data); -- cgit v1.2.3 From 5f71f3c325534da55dfda487a67208e2e0f0cd1b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 13 Jun 2024 14:34:33 -0400 Subject: sunrpc: refactor pool_mode setting code Allow the pool_mode setting code to be called from internal callers so we can call it from a new netlink op. Add a new svc_pool_map_get function to return the current setting. Change the existing module parameter handling to use the new interfaces under the hood. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index d0433e1642b3..a7d0406b9ef5 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -399,6 +399,8 @@ struct svc_procedure { /* * Function prototypes. */ +int sunrpc_set_pool_mode(const char *val); +int sunrpc_get_pool_mode(char *val, size_t size); int svc_rpcb_setup(struct svc_serv *serv, struct net *net); void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); int svc_bind(struct svc_serv *serv, struct net *net); -- cgit v1.2.3 From 00506072d70829196849e835ee4ef0b350b61fb9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 14 Jun 2024 08:59:10 -0400 Subject: nfsd: new netlink ops to get/set server pool_mode Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- include/uapi/linux/nfsd_netlink.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/nfsd_netlink.h b/include/uapi/linux/nfsd_netlink.h index 24c86dbc7ed5..887cbd12b695 100644 --- a/include/uapi/linux/nfsd_netlink.h +++ b/include/uapi/linux/nfsd_netlink.h @@ -70,6 +70,14 @@ enum { NFSD_A_SERVER_SOCK_MAX = (__NFSD_A_SERVER_SOCK_MAX - 1) }; +enum { + NFSD_A_POOL_MODE_MODE = 1, + NFSD_A_POOL_MODE_NPOOLS, + + __NFSD_A_POOL_MODE_MAX, + NFSD_A_POOL_MODE_MAX = (__NFSD_A_POOL_MODE_MAX - 1) +}; + enum { NFSD_CMD_RPC_STATUS_GET = 1, NFSD_CMD_THREADS_SET, @@ -78,6 +86,8 @@ enum { NFSD_CMD_VERSION_GET, NFSD_CMD_LISTENER_SET, NFSD_CMD_LISTENER_GET, + NFSD_CMD_POOL_MODE_SET, + NFSD_CMD_POOL_MODE_GET, __NFSD_CMD_MAX, NFSD_CMD_MAX = (__NFSD_CMD_MAX - 1) -- cgit v1.2.3 From 7aa291962f4c3b7afb9a12fa60b406b95e5eacb4 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 27 Jun 2024 13:04:22 +0200 Subject: dt-bindings: clock: airoha: Add reset support to EN7581 clock binding Introduce reset capability to EN7581 device-tree clock binding documentation. Add reset register mapping between misc scu and pb scu ones in order to follow the memory order. This change is not introducing any backward compatibility issue since the EN7581 dts is not upstream yet. Fixes: 0a382be005cf ("dt-bindings: clock: airoha: add EN7581 binding") Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Rob Herring (Arm) Signed-off-by: Lorenzo Bianconi Link: https://lore.kernel.org/r/28fef3e83062d5d71e7b4be4b47583f851a15bf8.1719485847.git.lorenzo@kernel.org Signed-off-by: Stephen Boyd --- include/dt-bindings/reset/airoha,en7581-reset.h | 66 +++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 include/dt-bindings/reset/airoha,en7581-reset.h (limited to 'include') diff --git a/include/dt-bindings/reset/airoha,en7581-reset.h b/include/dt-bindings/reset/airoha,en7581-reset.h new file mode 100644 index 000000000000..6544a1790b83 --- /dev/null +++ b/include/dt-bindings/reset/airoha,en7581-reset.h @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024 AIROHA Inc + * Author: Lorenzo Bianconi + */ + +#ifndef __DT_BINDINGS_RESET_CONTROLLER_AIROHA_EN7581_H_ +#define __DT_BINDINGS_RESET_CONTROLLER_AIROHA_EN7581_H_ + +/* RST_CTRL2 */ +#define EN7581_XPON_PHY_RST 0 +#define EN7581_CPU_TIMER2_RST 1 +#define EN7581_HSUART_RST 2 +#define EN7581_UART4_RST 3 +#define EN7581_UART5_RST 4 +#define EN7581_I2C2_RST 5 +#define EN7581_XSI_MAC_RST 6 +#define EN7581_XSI_PHY_RST 7 +#define EN7581_NPU_RST 8 +#define EN7581_I2S_RST 9 +#define EN7581_TRNG_RST 10 +#define EN7581_TRNG_MSTART_RST 11 +#define EN7581_DUAL_HSI0_RST 12 +#define EN7581_DUAL_HSI1_RST 13 +#define EN7581_HSI_RST 14 +#define EN7581_DUAL_HSI0_MAC_RST 15 +#define EN7581_DUAL_HSI1_MAC_RST 16 +#define EN7581_HSI_MAC_RST 17 +#define EN7581_WDMA_RST 18 +#define EN7581_WOE0_RST 19 +#define EN7581_WOE1_RST 20 +#define EN7581_HSDMA_RST 21 +#define EN7581_TDMA_RST 22 +#define EN7581_EMMC_RST 23 +#define EN7581_SOE_RST 24 +#define EN7581_PCIE2_RST 25 +#define EN7581_XFP_MAC_RST 26 +#define EN7581_USB_HOST_P1_RST 27 +#define EN7581_USB_HOST_P1_U3_PHY_RST 28 +/* RST_CTRL1 */ +#define EN7581_PCM1_ZSI_ISI_RST 29 +#define EN7581_FE_PDMA_RST 30 +#define EN7581_FE_QDMA_RST 31 +#define EN7581_PCM_SPIWP_RST 32 +#define EN7581_CRYPTO_RST 33 +#define EN7581_TIMER_RST 34 +#define EN7581_PCM1_RST 35 +#define EN7581_UART_RST 36 +#define EN7581_GPIO_RST 37 +#define EN7581_GDMA_RST 38 +#define EN7581_I2C_MASTER_RST 39 +#define EN7581_PCM2_ZSI_ISI_RST 40 +#define EN7581_SFC_RST 41 +#define EN7581_UART2_RST 42 +#define EN7581_GDMP_RST 43 +#define EN7581_FE_RST 44 +#define EN7581_USB_HOST_P0_RST 45 +#define EN7581_GSW_RST 46 +#define EN7581_SFC2_PCM_RST 47 +#define EN7581_PCIE0_RST 48 +#define EN7581_PCIE1_RST 49 +#define EN7581_CPU_TIMER_RST 50 +#define EN7581_PCIE_HB_RST 51 +#define EN7581_XPON_MAC_RST 52 + +#endif /* __DT_BINDINGS_RESET_CONTROLLER_AIROHA_EN7581_H_ */ -- cgit v1.2.3 From 1cb6f0bae50441f4b4b32a28315853b279c7404e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 8 Jul 2024 15:31:29 +0200 Subject: bpf: Fix too early release of tcx_entry Pedro Pinto and later independently also Hyunwoo Kim and Wongi Lee reported an issue that the tcx_entry can be released too early leading to a use after free (UAF) when an active old-style ingress or clsact qdisc with a shared tc block is later replaced by another ingress or clsact instance. Essentially, the sequence to trigger the UAF (one example) can be as follows: 1. A network namespace is created 2. An ingress qdisc is created. This allocates a tcx_entry, and &tcx_entry->miniq is stored in the qdisc's miniqp->p_miniq. At the same time, a tcf block with index 1 is created. 3. chain0 is attached to the tcf block. chain0 must be connected to the block linked to the ingress qdisc to later reach the function tcf_chain0_head_change_cb_del() which triggers the UAF. 4. Create and graft a clsact qdisc. This causes the ingress qdisc created in step 1 to be removed, thus freeing the previously linked tcx_entry: rtnetlink_rcv_msg() => tc_modify_qdisc() => qdisc_create() => clsact_init() [a] => qdisc_graft() => qdisc_destroy() => __qdisc_destroy() => ingress_destroy() [b] => tcx_entry_free() => kfree_rcu() // tcx_entry freed 5. Finally, the network namespace is closed. This registers the cleanup_net worker, and during the process of releasing the remaining clsact qdisc, it accesses the tcx_entry that was already freed in step 4, causing the UAF to occur: cleanup_net() => ops_exit_list() => default_device_exit_batch() => unregister_netdevice_many() => unregister_netdevice_many_notify() => dev_shutdown() => qdisc_put() => clsact_destroy() [c] => tcf_block_put_ext() => tcf_chain0_head_change_cb_del() => tcf_chain_head_change_item() => clsact_chain_head_change() => mini_qdisc_pair_swap() // UAF There are also other variants, the gist is to add an ingress (or clsact) qdisc with a specific shared block, then to replace that qdisc, waiting for the tcx_entry kfree_rcu() to be executed and subsequently accessing the current active qdisc's miniq one way or another. The correct fix is to turn the miniq_active boolean into a counter. What can be observed, at step 2 above, the counter transitions from 0->1, at step [a] from 1->2 (in order for the miniq object to remain active during the replacement), then in [b] from 2->1 and finally [c] 1->0 with the eventual release. The reference counter in general ranges from [0,2] and it does not need to be atomic since all access to the counter is protected by the rtnl mutex. With this in place, there is no longer a UAF happening and the tcx_entry is freed at the correct time. Fixes: e420bed02507 ("bpf: Add fd-based tcx multi-prog infra with link support") Reported-by: Pedro Pinto Co-developed-by: Pedro Pinto Signed-off-by: Pedro Pinto Signed-off-by: Daniel Borkmann Cc: Hyunwoo Kim Cc: Wongi Lee Cc: Martin KaFai Lau Link: https://lore.kernel.org/r/20240708133130.11609-1-daniel@iogearbox.net Signed-off-by: Martin KaFai Lau --- include/net/tcx.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/tcx.h b/include/net/tcx.h index 72a3e75e539f..5ce0ce9e0c02 100644 --- a/include/net/tcx.h +++ b/include/net/tcx.h @@ -13,7 +13,7 @@ struct mini_Qdisc; struct tcx_entry { struct mini_Qdisc __rcu *miniq; struct bpf_mprog_bundle bundle; - bool miniq_active; + u32 miniq_active; struct rcu_head rcu; }; @@ -125,11 +125,16 @@ static inline void tcx_skeys_dec(bool ingress) tcx_dec(); } -static inline void tcx_miniq_set_active(struct bpf_mprog_entry *entry, - const bool active) +static inline void tcx_miniq_inc(struct bpf_mprog_entry *entry) { ASSERT_RTNL(); - tcx_entry(entry)->miniq_active = active; + tcx_entry(entry)->miniq_active++; +} + +static inline void tcx_miniq_dec(struct bpf_mprog_entry *entry) +{ + ASSERT_RTNL(); + tcx_entry(entry)->miniq_active--; } static inline bool tcx_entry_is_active(struct bpf_mprog_entry *entry) -- cgit v1.2.3 From 1f9a8286bc0c3df7d789ea625d9d9db3d7779f2d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 28 May 2024 14:58:03 +0000 Subject: uaccess: always export _copy_[from|to]_user with CONFIG_RUST Rust code needs to be able to access _copy_from_user and _copy_to_user so that it can skip the check_copy_size check in cases where the length is known at compile-time, mirroring the logic for when C code will skip check_copy_size. To do this, we ensure that exported versions of these methods are available when CONFIG_RUST is enabled. Alice has verified that this patch passes the CONFIG_TEST_USER_COPY test on x86 using the Android cuttlefish emulator. Signed-off-by: Arnd Bergmann Tested-by: Alice Ryhl Reviewed-by: Boqun Feng Reviewed-by: Kees Cook Signed-off-by: Alice Ryhl Acked-by: Andrew Morton Link: https://lore.kernel.org/r/20240528-alice-mm-v7-2-78222c31b8f4@google.com Signed-off-by: Miguel Ojeda --- include/linux/uaccess.h | 46 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 3064314f4832..d8e4105a2f21 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -138,13 +139,26 @@ __copy_to_user(void __user *to, const void *from, unsigned long n) return raw_copy_to_user(to, from, n); } -#ifdef INLINE_COPY_FROM_USER +/* + * Architectures that #define INLINE_COPY_TO_USER use this function + * directly in the normal copy_to/from_user(), the other ones go + * through an extern _copy_to/from_user(), which expands the same code + * here. + * + * Rust code always uses the extern definition. + */ static inline __must_check unsigned long -_copy_from_user(void *to, const void __user *from, unsigned long n) +_inline_copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; might_fault(); if (!should_fail_usercopy() && likely(access_ok(from, n))) { + /* + * Ensure that bad access_ok() speculation will not + * lead to nasty side effects *after* the copy is + * finished: + */ + barrier_nospec(); instrument_copy_from_user_before(to, from, n); res = raw_copy_from_user(to, from, n); instrument_copy_from_user_after(to, from, n, res); @@ -153,14 +167,11 @@ _copy_from_user(void *to, const void __user *from, unsigned long n) memset(to + (n - res), 0, res); return res; } -#else extern __must_check unsigned long _copy_from_user(void *, const void __user *, unsigned long); -#endif -#ifdef INLINE_COPY_TO_USER static inline __must_check unsigned long -_copy_to_user(void __user *to, const void *from, unsigned long n) +_inline_copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); if (should_fail_usercopy()) @@ -171,25 +182,32 @@ _copy_to_user(void __user *to, const void *from, unsigned long n) } return n; } -#else extern __must_check unsigned long _copy_to_user(void __user *, const void *, unsigned long); -#endif static __always_inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { - if (check_copy_size(to, n, false)) - n = _copy_from_user(to, from, n); - return n; + if (!check_copy_size(to, n, false)) + return n; +#ifdef INLINE_COPY_FROM_USER + return _inline_copy_from_user(to, from, n); +#else + return _copy_from_user(to, from, n); +#endif } static __always_inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { - if (check_copy_size(from, n, true)) - n = _copy_to_user(to, from, n); - return n; + if (!check_copy_size(from, n, true)) + return n; + +#ifdef INLINE_COPY_TO_USER + return _inline_copy_to_user(to, from, n); +#else + return _copy_to_user(to, from, n); +#endif } #ifndef copy_mc_to_kernel -- cgit v1.2.3 From 14498e993fb77adce75f0106162902b2f8b1d480 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 3 Jul 2024 14:37:50 -0700 Subject: Input: make events() method return number of events processed In preparation to consolidating filtering and event processing in the input core change events() method to return number of events processed by it. Reviewed-by: Jeff LaBundy Reviewed-by: Benjamin Tissoires Link: https://lore.kernel.org/r/20240703213756.3375978-4-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/input.h b/include/linux/input.h index c22ac465254b..89a0be6ee0e2 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -275,7 +275,8 @@ struct input_handle; * it may not sleep * @events: event sequence handler. This method is being called by * input core with interrupts disabled and dev->event_lock - * spinlock held and so it may not sleep + * spinlock held and so it may not sleep. The method must return + * number of events passed to it. * @filter: similar to @event; separates normal event handlers from * "filters". * @match: called after comparing device's id with handler's id_table @@ -312,8 +313,8 @@ struct input_handler { void *private; void (*event)(struct input_handle *handle, unsigned int type, unsigned int code, int value); - void (*events)(struct input_handle *handle, - const struct input_value *vals, unsigned int count); + unsigned int (*events)(struct input_handle *handle, + struct input_value *vals, unsigned int count); bool (*filter)(struct input_handle *handle, unsigned int type, unsigned int code, int value); bool (*match)(struct input_handler *handler, struct input_dev *dev); int (*connect)(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id); -- cgit v1.2.3 From 6badc62f8fa4a1165f3f440943045b73c7a47735 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Mon, 27 May 2024 18:14:40 +0200 Subject: of: dynamic: Constify parameter in of_changeset_add_prop_string_array() The str_array parameter has no reason to be an un-const array. Indeed, elements of the 'str_array' array are not changed by the code. Constify the 'str_array' array parameter. With this const qualifier added, the following construction is allowed: static const char * const tab_str[] = { "string1", "string2" }; of_changeset_add_prop_string_array(..., tab_str, ARRAY_SIZE(tab_str)); Signed-off-by: Herve Codina Link: https://lore.kernel.org/r/20240527161450.326615-14-herve.codina@bootlin.com Signed-off-by: Rob Herring (Arm) --- include/linux/of.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/of.h b/include/linux/of.h index a0bedd038a05..ee9a385a13db 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1639,7 +1639,7 @@ int of_changeset_add_prop_string(struct of_changeset *ocs, int of_changeset_add_prop_string_array(struct of_changeset *ocs, struct device_node *np, const char *prop_name, - const char **str_array, size_t sz); + const char * const *str_array, size_t sz); int of_changeset_add_prop_u32_array(struct of_changeset *ocs, struct device_node *np, const char *prop_name, -- cgit v1.2.3 From f2b388d63e6c7c6151bb295e5cbf48a2ac91e51a Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Mon, 27 May 2024 18:14:42 +0200 Subject: of: dynamic: Introduce of_changeset_add_prop_bool() APIs to add some properties in a changeset exist but nothing to add a DT boolean property (i.e. a property without any values). Fill this lack with of_changeset_add_prop_bool(). Signed-off-by: Herve Codina Link: https://lore.kernel.org/r/20240527161450.326615-16-herve.codina@bootlin.com Signed-off-by: Rob Herring (Arm) --- include/linux/of.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/of.h b/include/linux/of.h index ee9a385a13db..13cf7a43b473 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1652,6 +1652,9 @@ static inline int of_changeset_add_prop_u32(struct of_changeset *ocs, return of_changeset_add_prop_u32_array(ocs, np, prop_name, &val, 1); } +int of_changeset_add_prop_bool(struct of_changeset *ocs, struct device_node *np, + const char *prop_name); + #else /* CONFIG_OF_DYNAMIC */ static inline int of_reconfig_notifier_register(struct notifier_block *nb) { -- cgit v1.2.3 From b4b1ddc9dfe997a5f492fa3a36487f8e7a5de30d Mon Sep 17 00:00:00 2001 From: Lizhe Date: Thu, 4 Jul 2024 12:23:55 +0530 Subject: cpufreq: Make cpufreq_driver->exit() return void The cpufreq core doesn't check the return type of the exit() callback and there is not much the core can do on failures at that point. Just drop the returned value and make it return void. Signed-off-by: Lizhe [ Viresh: Reworked the patches to fix all missing changes together. ] Signed-off-by: Viresh Kumar Reviewed-by: AngeloGioacchino Del Regno # Mediatek Acked-by: Sudeep Holla # scpi, scmi, vexpress Acked-by: Mario Limonciello # amd Reviewed-by: Florian Fainelli # bmips Acked-by: Rafael J. Wysocki Acked-by: Kevin Hilman # omap --- include/linux/cpufreq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 20f7e98ee8af..e3b3face9134 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -396,7 +396,7 @@ struct cpufreq_driver { int (*online)(struct cpufreq_policy *policy); int (*offline)(struct cpufreq_policy *policy); - int (*exit)(struct cpufreq_policy *policy); + void (*exit)(struct cpufreq_policy *policy); int (*suspend)(struct cpufreq_policy *policy); int (*resume)(struct cpufreq_policy *policy); -- cgit v1.2.3 From 4aa99c71e42ad60178c1154ec24e3df9c684fb67 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 24 Jun 2024 19:01:17 +0200 Subject: jbd2: make jbd2_journal_get_max_txn_bufs() internal There's no reason to have jbd2_journal_get_max_txn_bufs() public function. Currently all users are internal and can use journal->j_max_transaction_buffers instead. This saves some unnecessary recomputations of the limit as a bonus which becomes important as this function gets more complex in the following patch. CC: stable@vger.kernel.org Signed-off-by: Jan Kara Reviewed-by: Zhang Yi Link: https://patch.msgid.link/20240624170127.3253-1-jack@suse.cz Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index ab04c1c27fae..f91b930abe20 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1660,11 +1660,6 @@ int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); int jbd2_fc_wait_bufs(journal_t *journal, int num_blks); int jbd2_fc_release_bufs(journal_t *journal); -static inline int jbd2_journal_get_max_txn_bufs(journal_t *journal) -{ - return (journal->j_total_len - journal->j_fc_wbufsize) / 4; -} - /* * is_journal_abort * -- cgit v1.2.3 From e3a00a23781c1f2fcda98a7aecaac515558e7a35 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 24 Jun 2024 19:01:18 +0200 Subject: jbd2: precompute number of transaction descriptor blocks Instead of computing the number of descriptor blocks a transaction can have each time we need it (which is currently when starting each transaction but will become more frequent later) precompute the number once during journal initialization together with maximum transaction size. We perform the precomputation whenever journal feature set is updated similarly as for computation of journal->j_revoke_records_per_block. CC: stable@vger.kernel.org Signed-off-by: Jan Kara Reviewed-by: Zhang Yi Link: https://patch.msgid.link/20240624170127.3253-2-jack@suse.cz Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index f91b930abe20..b900c642210c 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1085,6 +1085,13 @@ struct journal_s */ int j_revoke_records_per_block; + /** + * @j_transaction_overhead: + * + * Number of blocks each transaction needs for its own bookkeeping + */ + int j_transaction_overhead_buffers; + /** * @j_commit_interval: * -- cgit v1.2.3 From fe3d508ba95bc63adba661355115be340275c0d1 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 8 Jul 2024 09:16:48 +0000 Subject: block: Validate logical block size in blk_validate_limits() Some drivers validate that their own logical block size. It is no harm to always do this, so validate in blk_validate_limits(). This allows us to remove the validation in most of those drivers. Add a comment to blk_validate_block_size() to inform users that self- validation of LBS is usually unnecessary. Signed-off-by: John Garry Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20240708091651.177447-3-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 02e04df27282..dce4a6bf7307 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -268,6 +268,7 @@ static inline dev_t disk_devt(struct gendisk *disk) return MKDEV(disk->major, disk->first_minor); } +/* blk_validate_limits() validates bsize, so drivers don't usually need to */ static inline int blk_validate_block_size(unsigned long bsize) { if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) -- cgit v1.2.3 From 97c4264f62a73664a2934203346a3e04c109b8ec Mon Sep 17 00:00:00 2001 From: Peter Griffin Date: Tue, 2 Jul 2024 08:35:09 +0200 Subject: soc: samsung: exynos-pmu: add support for PMU_ALIVE non atomic registers Not all registers in PMU_ALIVE block support atomic set/clear operations. GS101_SYSIP_DAT0 and GS101_SYSTEM_CONFIGURATION registers are two regs where attempting atomic access fails. As documentation on exactly which registers support atomic operations is not forthcoming. We default to atomic access, unless the register is explicitly added to the tensor_is_atomic() function. Update the comment to reflect this as well. Reviewed-by: Will McVicker Tested-by: Will McVicker Signed-off-by: Peter Griffin Link: https://lore.kernel.org/r/20240628223506.1237523-4-peter.griffin@linaro.org Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240702063514.6215-2-krzysztof.kozlowski@linaro.org Signed-off-by: Arnd Bergmann --- include/linux/soc/samsung/exynos-regs-pmu.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index aa840ed043e1..f411c176536d 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -657,4 +657,8 @@ #define EXYNOS5433_PAD_RETENTION_UFS_OPTION (0x3268) #define EXYNOS5433_PAD_RETENTION_FSYSGENIO_OPTION (0x32A8) +/* For Tensor GS101 */ +#define GS101_SYSIP_DAT0 (0x810) +#define GS101_SYSTEM_CONFIGURATION (0x3A00) + #endif /* __LINUX_SOC_EXYNOS_REGS_PMU_H */ -- cgit v1.2.3 From e6c06ca8f21d1cdb444c708e385d86a54bc5fc60 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 9 Jul 2024 10:38:30 +0200 Subject: wifi: cfg80211: add support for advertising multiple radios belonging to a wiphy The prerequisite for MLO support in cfg80211/mac80211 is that all the links participating in MLO must be from the same wiphy/ieee80211_hw. To meet this expectation, some drivers may need to group multiple discrete hardware each acting as a link in MLO under single wiphy. With this change, supported frequencies and interface combinations of each individual radio are reported to user space. This allows user space to figure out the limitations of what combination of channels can be used concurrently. Even for non-MLO devices, this improves support for devices capable of running on multiple channels at the same time. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/18a88f9ce82b1c9f7c12f1672430eaf2bb0be295.1720514221.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 42 +++++++++++++++++++++++++++- include/uapi/linux/nl80211.h | 65 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 6f992aff74ae..a00cf80e61dc 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5046,7 +5046,9 @@ struct ieee80211_iface_limit { * struct ieee80211_iface_combination - possible interface combination * * With this structure the driver can describe which interface - * combinations it supports concurrently. + * combinations it supports concurrently. When set in a struct wiphy_radio, + * the combinations refer to combinations of interfaces currently active on + * that radio. * * Examples: * @@ -5406,6 +5408,38 @@ struct wiphy_iftype_akm_suites { int n_akm_suites; }; +/** + * struct wiphy_radio_freq_range - wiphy frequency range + * @start_freq: start range edge frequency (kHz) + * @end_freq: end range edge frequency (kHz) + */ +struct wiphy_radio_freq_range { + u32 start_freq; + u32 end_freq; +}; + + +/** + * struct wiphy_radio - physical radio of a wiphy + * This structure describes a physical radio belonging to a wiphy. + * It is used to describe concurrent-channel capabilities. Only one channel + * can be active on the radio described by struct wiphy_radio. + * + * @freq_range: frequency range that the radio can operate on. + * @n_freq_range: number of elements in @freq_range + * + * @iface_combinations: Valid interface combinations array, should not + * list single interface types. + * @n_iface_combinations: number of entries in @iface_combinations array. + */ +struct wiphy_radio { + const struct wiphy_radio_freq_range *freq_range; + int n_freq_range; + + const struct ieee80211_iface_combination *iface_combinations; + int n_iface_combinations; +}; + #define CFG80211_HW_TIMESTAMP_ALL_PEERS 0xffff /** @@ -5624,6 +5658,9 @@ struct wiphy_iftype_akm_suites { * A value of %CFG80211_HW_TIMESTAMP_ALL_PEERS indicates the driver * supports enabling HW timestamping for all peers (i.e. no need to * specify a mac address). + * + * @radio: radios belonging to this wiphy + * @n_radio: number of radios */ struct wiphy { struct mutex mtx; @@ -5774,6 +5811,9 @@ struct wiphy { u16 hw_timestamp_max_peers; + int n_radio; + const struct wiphy_radio *radio; + char priv[] __aligned(NETDEV_ALIGN); }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 6ae3997061b6..f97f5adc8d51 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2052,6 +2052,10 @@ enum nl80211_commands { * @NL80211_ATTR_INTERFACE_COMBINATIONS: Nested attribute listing the supported * interface combinations. In each nested item, it contains attributes * defined in &enum nl80211_if_combination_attrs. + * If the wiphy uses multiple radios (@NL80211_ATTR_WIPHY_RADIOS is set), + * this attribute contains the interface combinations of the first radio. + * See @NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS for the global wiphy + * combinations for the sum of all radios. * @NL80211_ATTR_SOFTWARE_IFTYPES: Nested attribute (just like * %NL80211_ATTR_SUPPORTED_IFTYPES) containing the interface types that * are managed in software: interfaces of these types aren't subject to @@ -2856,6 +2860,14 @@ enum nl80211_commands { * %NL80211_CMD_ASSOCIATE indicating the SPP A-MSDUs * are used on this connection * + * @NL80211_ATTR_WIPHY_RADIOS: Nested attribute describing physical radios + * belonging to this wiphy. See &enum nl80211_wiphy_radio_attrs. + * + * @NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS: Nested attribute listing the + * supported interface combinations for all radios combined. In each + * nested item, it contains attributes defined in + * &enum nl80211_if_combination_attrs. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3401,6 +3413,9 @@ enum nl80211_attrs { NL80211_ATTR_ASSOC_SPP_AMSDU, + NL80211_ATTR_WIPHY_RADIOS, + NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -8005,4 +8020,54 @@ enum nl80211_ap_settings_flags { NL80211_AP_SETTINGS_SA_QUERY_OFFLOAD_SUPPORT = 1 << 1, }; +/** + * enum nl80211_wiphy_radio_attrs - wiphy radio attributes + * + * @__NL80211_WIPHY_RADIO_ATTR_INVALID: Invalid + * + * @NL80211_WIPHY_RADIO_ATTR_INDEX: Index of this radio (u32) + * @NL80211_WIPHY_RADIO_ATTR_FREQ_RANGE: Frequency range supported by this + * radio. Attribute may be present multiple times. + * @NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION: Supported interface + * combination for this radio. Attribute may be present multiple times + * and contains attributes defined in &enum nl80211_if_combination_attrs. + * + * @__NL80211_WIPHY_RADIO_ATTR_LAST: Internal + * @NL80211_WIPHY_RADIO_ATTR_MAX: Highest attribute + */ +enum nl80211_wiphy_radio_attrs { + __NL80211_WIPHY_RADIO_ATTR_INVALID, + + NL80211_WIPHY_RADIO_ATTR_INDEX, + NL80211_WIPHY_RADIO_ATTR_FREQ_RANGE, + NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION, + + /* keep last */ + __NL80211_WIPHY_RADIO_ATTR_LAST, + NL80211_WIPHY_RADIO_ATTR_MAX = __NL80211_WIPHY_RADIO_ATTR_LAST - 1, +}; + +/** + * enum nl80211_wiphy_radio_freq_range - wiphy radio frequency range + * + * @__NL80211_WIPHY_RADIO_FREQ_ATTR_INVALID: Invalid + * + * @NL80211_WIPHY_RADIO_FREQ_ATTR_START: Frequency range start (u32). + * The unit is kHz. + * @NL80211_WIPHY_RADIO_FREQ_ATTR_END: Frequency range end (u32). + * The unit is kHz. + * + * @__NL80211_WIPHY_RADIO_FREQ_ATTR_LAST: Internal + * @NL80211_WIPHY_RADIO_FREQ_ATTR_MAX: Highest attribute + */ +enum nl80211_wiphy_radio_freq_range { + __NL80211_WIPHY_RADIO_FREQ_ATTR_INVALID, + + NL80211_WIPHY_RADIO_FREQ_ATTR_START, + NL80211_WIPHY_RADIO_FREQ_ATTR_END, + + __NL80211_WIPHY_RADIO_FREQ_ATTR_LAST, + NL80211_WIPHY_RADIO_FREQ_ATTR_MAX = __NL80211_WIPHY_RADIO_FREQ_ATTR_LAST - 1, +}; + #endif /* __LINUX_NL80211_H */ -- cgit v1.2.3 From abb4cfe3661aa05426916b21164f88ca5a405a3a Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 9 Jul 2024 10:38:31 +0200 Subject: wifi: cfg80211: extend interface combination check for multi-radio Add a field in struct iface_combination_params to check per-radio interface combinations instead of per-wiphy ones. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/32b28da89c2d759b0324deeefe2be4cee91de18e.1720514221.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a00cf80e61dc..4767e2c76b01 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1598,6 +1598,7 @@ struct cfg80211_color_change_settings { * * Used to pass interface combination parameters * + * @radio_idx: wiphy radio index or -1 for global * @num_different_channels: the number of different channels we want * to use for verification * @radar_detect: a bitmap where each bit corresponds to a channel @@ -1611,6 +1612,7 @@ struct cfg80211_color_change_settings { * the verification */ struct iface_combination_params { + int radio_idx; int num_different_channels; u8 radar_detect; int iftype_num[NUM_NL80211_IFTYPES]; @@ -4580,6 +4582,8 @@ struct mgmt_frame_regs { * * @set_hw_timestamp: Enable/disable HW timestamping of TM/FTM frames. * @set_ttlm: set the TID to link mapping. + * @get_radio_mask: get bitmask of radios in use. + * (invoked with the wiphy mutex held) */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -4941,6 +4945,7 @@ struct cfg80211_ops { struct cfg80211_set_hw_timestamp *hwts); int (*set_ttlm)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ttlm_params *params); + u32 (*get_radio_mask)(struct wiphy *wiphy, struct net_device *dev); }; /* -- cgit v1.2.3 From 417d88189ccf645a157a3dc3da7e894c1f2829d1 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Thu, 4 Jul 2024 22:25:59 +0200 Subject: sctp: Fix typos and improve comments Fix typos s/steam/stream/ and spell out Schedule/Unschedule in the comments. Compile-tested only. Signed-off-by: Thorsten Blum Reviewed-by: Simon Horman Link: https://patch.msgid.link/20240704202558.62704-2-thorsten.blum@toblux.com Signed-off-by: Paolo Abeni --- include/net/sctp/stream_sched.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h index 572d73fdcd5e..8034bf5febbe 100644 --- a/include/net/sctp/stream_sched.h +++ b/include/net/sctp/stream_sched.h @@ -35,10 +35,10 @@ struct sctp_sched_ops { struct sctp_chunk *(*dequeue)(struct sctp_outq *q); /* Called only if the chunk fit the packet */ void (*dequeue_done)(struct sctp_outq *q, struct sctp_chunk *chunk); - /* Sched all chunks already enqueued */ - void (*sched_all)(struct sctp_stream *steam); - /* Unched all chunks already enqueued */ - void (*unsched_all)(struct sctp_stream *steam); + /* Schedule all chunks already enqueued */ + void (*sched_all)(struct sctp_stream *stream); + /* Unschedule all chunks already enqueued */ + void (*unsched_all)(struct sctp_stream *stream); }; int sctp_sched_set_sched(struct sctp_association *asoc, -- cgit v1.2.3 From 510dba80ed669d6123901ccf0476706122b008b1 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 9 Jul 2024 10:38:32 +0200 Subject: wifi: cfg80211: add helper for checking if a chandef is valid on a radio Check if the full channel width is in the radio's frequency range. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/7c8ea146feb6f37cee62e5ba6be5370403695797.1720514221.git-series.nbd@nbd.name [add missing Return: documentation] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 4767e2c76b01..192d72c8b465 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6508,6 +6508,17 @@ static inline bool cfg80211_channel_is_psc(struct ieee80211_channel *chan) return ieee80211_frequency_to_channel(chan->center_freq) % 16 == 5; } +/** + * cfg80211_radio_chandef_valid - Check if the radio supports the chandef + * + * @radio: wiphy radio + * @chandef: chandef for current channel + * + * Return: whether or not the given chandef is valid for the given radio + */ +bool cfg80211_radio_chandef_valid(const struct wiphy_radio *radio, + const struct cfg80211_chan_def *chandef); + /** * ieee80211_get_response_rate - get basic rate for a given rate * -- cgit v1.2.3 From 2920bc8d916d30b5273ec16e6878f13b24e3851f Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 9 Jul 2024 10:38:34 +0200 Subject: wifi: mac80211: add radio index to ieee80211_chanctx_conf Will be used to explicitly assign a channel context to a wiphy radio. Signed-off-by: Felix Fietkau Link: https://patch.msgid.link/59f76f57d935f155099276be22badfa671d5bfd9.1720514221.git-series.nbd@nbd.name Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index bd0f8aefa797..e78ccbe38d6d 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -250,6 +250,7 @@ struct ieee80211_chan_req { * @min_def: the minimum channel definition currently required. * @ap: the channel definition the AP actually is operating as, * for use with (wider bandwidth) OFDMA + * @radio_idx: index of the wiphy radio used used for this channel * @rx_chains_static: The number of RX chains that must always be * active on the channel to receive MIMO transmissions * @rx_chains_dynamic: The number of RX chains that must be enabled @@ -264,6 +265,7 @@ struct ieee80211_chanctx_conf { struct cfg80211_chan_def min_def; struct cfg80211_chan_def ap; + int radio_idx; u8 rx_chains_static, rx_chains_dynamic; bool radar_enabled; -- cgit v1.2.3 From ef6dfcbcbbf7d9a414feb44aa093d2d8592a2e20 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 13 Feb 2024 23:02:20 +0100 Subject: mfd: tmio: Remove obsolete platform_data With commit 8971bb812e3c ("mfd: remove toshiba tmio drivers"), all users of platform data for NAND and framebuffers are gone. So, remove definitions from the header, too. Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240213220221.2380-9-wsa+renesas@sang-engineering.com Signed-off-by: Lee Jones --- include/linux/mfd/tmio.h | 27 --------------------------- 1 file changed, 27 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index eace8ea6cda0..bc53323293a3 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -103,31 +103,4 @@ struct tmio_mmc_data { void (*set_pwr)(struct platform_device *host, int state); void (*set_clk_div)(struct platform_device *host, int state); }; - -/* - * data for the NAND controller - */ -struct tmio_nand_data { - struct nand_bbt_descr *badblock_pattern; - struct mtd_partition *partition; - unsigned int num_partitions; - const char *const *part_parsers; -}; - -#define FBIO_TMIO_ACC_WRITE 0x7C639300 -#define FBIO_TMIO_ACC_SYNC 0x7C639301 - -struct tmio_fb_data { - int (*lcd_set_power)(struct platform_device *fb_dev, - bool on); - int (*lcd_mode)(struct platform_device *fb_dev, - const struct fb_videomode *mode); - int num_modes; - struct fb_videomode *modes; - - /* in mm: size of screen */ - int height; - int width; -}; - #endif -- cgit v1.2.3 From 6bec678b9872271ce2e0879c360ad7a1a524c7fa Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 13 Feb 2024 23:02:21 +0100 Subject: mfd: tmio: Remove obsolete io accessors Since commit 568494db6809 ("mtd: remove tmio_nand driver") and commit aceae7848624 ("fbdev: remove tmiofb driver"), these accessors have no users anymore. Remove them. Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240213220221.2380-10-wsa+renesas@sang-engineering.com Signed-off-by: Lee Jones --- include/linux/mfd/tmio.h | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index bc53323293a3..4223315d2b2a 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -10,31 +10,6 @@ #include #include -#define tmio_ioread8(addr) readb(addr) -#define tmio_ioread16(addr) readw(addr) -#define tmio_ioread16_rep(r, b, l) readsw(r, b, l) -#define tmio_ioread32(addr) \ - (((u32)readw((addr))) | (((u32)readw((addr) + 2)) << 16)) - -#define tmio_iowrite8(val, addr) writeb((val), (addr)) -#define tmio_iowrite16(val, addr) writew((val), (addr)) -#define tmio_iowrite16_rep(r, b, l) writesw(r, b, l) -#define tmio_iowrite32(val, addr) \ - do { \ - writew((val), (addr)); \ - writew((val) >> 16, (addr) + 2); \ - } while (0) - -#define sd_config_write8(base, shift, reg, val) \ - tmio_iowrite8((val), (base) + ((reg) << (shift))) -#define sd_config_write16(base, shift, reg, val) \ - tmio_iowrite16((val), (base) + ((reg) << (shift))) -#define sd_config_write32(base, shift, reg, val) \ - do { \ - tmio_iowrite16((val), (base) + ((reg) << (shift))); \ - tmio_iowrite16((val) >> 16, (base) + ((reg + 2) << (shift))); \ - } while (0) - /* tmio MMC platform flags */ /* * Some controllers can support a 2-byte block size when the bus width -- cgit v1.2.3 From d411ccbe103d665a31861987e2f1b36944ab63f2 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 13 Feb 2024 23:02:23 +0100 Subject: mfd: tmio: Update include files Remove meanwhile unneeded includes, only add types.h for dma_addr_t. Also, remove an obsolete forward declaration while here. Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240213220221.2380-12-wsa+renesas@sang-engineering.com Signed-off-by: Lee Jones --- include/linux/mfd/tmio.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index 4223315d2b2a..f71d4e507dcb 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -2,13 +2,8 @@ #ifndef MFD_TMIO_H #define MFD_TMIO_H -#include -#include -#include -#include -#include #include -#include +#include /* tmio MMC platform flags */ /* @@ -59,8 +54,6 @@ /* Some controllers have a CBSY bit */ #define TMIO_MMC_HAVE_CBSY BIT(11) -struct dma_chan; - /* * data for the MMC controller */ -- cgit v1.2.3 From 763135b819ad3e3e0301b66094d50923e64092a7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 13 Feb 2024 23:02:24 +0100 Subject: mfd: tmio: Sanitize comments Reformat the comments to utilize the maximum line length and use single line comments where appropriate. Remove superfluous comments, too. Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240213220221.2380-13-wsa+renesas@sang-engineering.com Signed-off-by: Lee Jones --- include/linux/mfd/tmio.h | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h index f71d4e507dcb..1cf418643da9 100644 --- a/include/linux/mfd/tmio.h +++ b/include/linux/mfd/tmio.h @@ -5,23 +5,23 @@ #include #include -/* tmio MMC platform flags */ +/* TMIO MMC platform flags */ + /* - * Some controllers can support a 2-byte block size when the bus width - * is configured in 4-bit mode. + * Some controllers can support a 2-byte block size when the bus width is + * configured in 4-bit mode. */ #define TMIO_MMC_BLKSZ_2BYTES BIT(1) -/* - * Some controllers can support SDIO IRQ signalling. - */ + +/* Some controllers can support SDIO IRQ signalling */ #define TMIO_MMC_SDIO_IRQ BIT(2) /* Some features are only available or tested on R-Car Gen2 or later */ #define TMIO_MMC_MIN_RCAR2 BIT(3) /* - * Some controllers require waiting for the SD bus to become - * idle before writing to some registers. + * Some controllers require waiting for the SD bus to become idle before + * writing to some registers. */ #define TMIO_MMC_HAS_IDLE_WAIT BIT(4) @@ -32,31 +32,21 @@ */ #define TMIO_MMC_USE_BUSY_TIMEOUT BIT(5) -/* - * Some controllers have CMD12 automatically - * issue/non-issue register - */ +/* Some controllers have CMD12 automatically issue/non-issue register */ #define TMIO_MMC_HAVE_CMD12_CTRL BIT(7) /* Controller has some SDIO status bits which must be 1 */ #define TMIO_MMC_SDIO_STATUS_SETBITS BIT(8) -/* - * Some controllers have a 32-bit wide data port register - */ +/* Some controllers have a 32-bit wide data port register */ #define TMIO_MMC_32BIT_DATA_PORT BIT(9) -/* - * Some controllers allows to set SDx actual clock - */ +/* Some controllers allows to set SDx actual clock */ #define TMIO_MMC_CLK_ACTUAL BIT(10) /* Some controllers have a CBSY bit */ #define TMIO_MMC_HAVE_CBSY BIT(11) -/* - * data for the MMC controller - */ struct tmio_mmc_data { void *chan_priv_tx; void *chan_priv_rx; -- cgit v1.2.3 From 70b46487b1558eb25ea6e533c905131b10596dc0 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 13 Feb 2024 23:02:25 +0100 Subject: mfd: tmio: Move header to platform_data All the MFD components are gone from the header meanwhile. Only the MMC relevant data is left which makes it a platform_data for the MMC controller. Move the header to the now fitting directory. Signed-off-by: Wolfram Sang Acked-by: Ulf Hansson # For MMC Acked-by: John Paul Adrian Glaubitz Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240213220221.2380-14-wsa+renesas@sang-engineering.com Signed-off-by: Lee Jones --- include/linux/mfd/tmio.h | 64 -------------------------------------- include/linux/platform_data/tmio.h | 64 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 64 deletions(-) delete mode 100644 include/linux/mfd/tmio.h create mode 100644 include/linux/platform_data/tmio.h (limited to 'include') diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h deleted file mode 100644 index 1cf418643da9..000000000000 --- a/include/linux/mfd/tmio.h +++ /dev/null @@ -1,64 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef MFD_TMIO_H -#define MFD_TMIO_H - -#include -#include - -/* TMIO MMC platform flags */ - -/* - * Some controllers can support a 2-byte block size when the bus width is - * configured in 4-bit mode. - */ -#define TMIO_MMC_BLKSZ_2BYTES BIT(1) - -/* Some controllers can support SDIO IRQ signalling */ -#define TMIO_MMC_SDIO_IRQ BIT(2) - -/* Some features are only available or tested on R-Car Gen2 or later */ -#define TMIO_MMC_MIN_RCAR2 BIT(3) - -/* - * Some controllers require waiting for the SD bus to become idle before - * writing to some registers. - */ -#define TMIO_MMC_HAS_IDLE_WAIT BIT(4) - -/* - * Use the busy timeout feature. Probably all TMIO versions support it. Yet, - * we don't have documentation for old variants, so we enable only known good - * variants with this flag. Can be removed once all variants are known good. - */ -#define TMIO_MMC_USE_BUSY_TIMEOUT BIT(5) - -/* Some controllers have CMD12 automatically issue/non-issue register */ -#define TMIO_MMC_HAVE_CMD12_CTRL BIT(7) - -/* Controller has some SDIO status bits which must be 1 */ -#define TMIO_MMC_SDIO_STATUS_SETBITS BIT(8) - -/* Some controllers have a 32-bit wide data port register */ -#define TMIO_MMC_32BIT_DATA_PORT BIT(9) - -/* Some controllers allows to set SDx actual clock */ -#define TMIO_MMC_CLK_ACTUAL BIT(10) - -/* Some controllers have a CBSY bit */ -#define TMIO_MMC_HAVE_CBSY BIT(11) - -struct tmio_mmc_data { - void *chan_priv_tx; - void *chan_priv_rx; - unsigned int hclk; - unsigned long capabilities; - unsigned long capabilities2; - unsigned long flags; - u32 ocr_mask; /* available voltages */ - dma_addr_t dma_rx_offset; - unsigned int max_blk_count; - unsigned short max_segs; - void (*set_pwr)(struct platform_device *host, int state); - void (*set_clk_div)(struct platform_device *host, int state); -}; -#endif diff --git a/include/linux/platform_data/tmio.h b/include/linux/platform_data/tmio.h new file mode 100644 index 000000000000..1cf418643da9 --- /dev/null +++ b/include/linux/platform_data/tmio.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef MFD_TMIO_H +#define MFD_TMIO_H + +#include +#include + +/* TMIO MMC platform flags */ + +/* + * Some controllers can support a 2-byte block size when the bus width is + * configured in 4-bit mode. + */ +#define TMIO_MMC_BLKSZ_2BYTES BIT(1) + +/* Some controllers can support SDIO IRQ signalling */ +#define TMIO_MMC_SDIO_IRQ BIT(2) + +/* Some features are only available or tested on R-Car Gen2 or later */ +#define TMIO_MMC_MIN_RCAR2 BIT(3) + +/* + * Some controllers require waiting for the SD bus to become idle before + * writing to some registers. + */ +#define TMIO_MMC_HAS_IDLE_WAIT BIT(4) + +/* + * Use the busy timeout feature. Probably all TMIO versions support it. Yet, + * we don't have documentation for old variants, so we enable only known good + * variants with this flag. Can be removed once all variants are known good. + */ +#define TMIO_MMC_USE_BUSY_TIMEOUT BIT(5) + +/* Some controllers have CMD12 automatically issue/non-issue register */ +#define TMIO_MMC_HAVE_CMD12_CTRL BIT(7) + +/* Controller has some SDIO status bits which must be 1 */ +#define TMIO_MMC_SDIO_STATUS_SETBITS BIT(8) + +/* Some controllers have a 32-bit wide data port register */ +#define TMIO_MMC_32BIT_DATA_PORT BIT(9) + +/* Some controllers allows to set SDx actual clock */ +#define TMIO_MMC_CLK_ACTUAL BIT(10) + +/* Some controllers have a CBSY bit */ +#define TMIO_MMC_HAVE_CBSY BIT(11) + +struct tmio_mmc_data { + void *chan_priv_tx; + void *chan_priv_rx; + unsigned int hclk; + unsigned long capabilities; + unsigned long capabilities2; + unsigned long flags; + u32 ocr_mask; /* available voltages */ + dma_addr_t dma_rx_offset; + unsigned int max_blk_count; + unsigned short max_segs; + void (*set_pwr)(struct platform_device *host, int state); + void (*set_clk_div)(struct platform_device *host, int state); +}; +#endif -- cgit v1.2.3 From 32625ac9e110da530db4f0faf918b1f5674e7ca3 Mon Sep 17 00:00:00 2001 From: Xianwei Zhao Date: Thu, 27 Jun 2024 19:47:51 +0800 Subject: dt-bindings: power: add Amlogic A5 power domains Add devicetree binding document and related header file for Amlogic A5 secure power domains. Signed-off-by: Hongyu Chen Signed-off-by: Xianwei Zhao Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20240627-a5_secpower-v1-1-1f47dde1270c@amlogic.com Signed-off-by: Ulf Hansson --- include/dt-bindings/power/amlogic,a5-pwrc.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 include/dt-bindings/power/amlogic,a5-pwrc.h (limited to 'include') diff --git a/include/dt-bindings/power/amlogic,a5-pwrc.h b/include/dt-bindings/power/amlogic,a5-pwrc.h new file mode 100644 index 000000000000..3a6f53eb959f --- /dev/null +++ b/include/dt-bindings/power/amlogic,a5-pwrc.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR MIT) */ +/* + * Copyright (C) 2024 Amlogic, Inc. All rights reserved + */ + +#ifndef _DT_BINDINGS_AMLOGIC_A5_POWER_H +#define _DT_BINDINGS_AMLOGIC_A5_POWER_H + +#define PWRC_A5_NNA_ID 0 +#define PWRC_A5_AUDIO_ID 1 +#define PWRC_A5_SDIOA_ID 2 +#define PWRC_A5_EMMC_ID 3 +#define PWRC_A5_USB_COMB_ID 4 +#define PWRC_A5_ETH_ID 5 +#define PWRC_A5_RSA_ID 6 +#define PWRC_A5_AUDIO_PDM_ID 7 +#define PWRC_A5_DMC_ID 8 +#define PWRC_A5_SYS_WRAP_ID 9 +#define PWRC_A5_DSPA_ID 10 + +#endif -- cgit v1.2.3 From 95f6454da936e4e6418facddf66596442a842d74 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 24 Jun 2024 10:18:05 +0530 Subject: PM: domains: Allow devices attached to genpd to be managed by HW Some power-domains may be capable of relying on the HW to control the power for a device that's hooked up to it. Typically, for these kinds of configurations the consumer driver should be able to change the behavior of power domain at runtime, control the power domain in SW mode for certain configurations and handover the control to HW mode for other usecases. To allow a consumer driver to change the behaviour of the PM domain for its device, let's provide a new function, dev_pm_genpd_set_hwmode(). Moreover, let's add a corresponding optional genpd callback, ->set_hwmode_dev(), which the genpd provider should implement if it can support switching between HW controlled mode and SW controlled mode. Similarly, add the dev_pm_genpd_get_hwmode() to allow consumers to read the current mode and its corresponding optional genpd callback, ->get_hwmode_dev(), which the genpd provider can also implement to synchronize the initial HW mode state in genpd_add_device() by reading back the mode from the hardware. Signed-off-by: Ulf Hansson Signed-off-by: Abel Vesa Signed-off-by: Jagadeesh Kona Reviewed-by: Dmitry Baryshkov Reviewed-by: Dhruva Gole Reviewed-by: Taniya Das Link: https://lore.kernel.org/r/20240624044809.17751-2-quic_jkona@quicinc.com --- include/linux/pm_domain.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index f24546a3d3db..015751b64746 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -175,6 +175,10 @@ struct generic_pm_domain { int (*set_performance_state)(struct generic_pm_domain *genpd, unsigned int state); struct gpd_dev_ops dev_ops; + int (*set_hwmode_dev)(struct generic_pm_domain *domain, + struct device *dev, bool enable); + bool (*get_hwmode_dev)(struct generic_pm_domain *domain, + struct device *dev); int (*attach_dev)(struct generic_pm_domain *domain, struct device *dev); void (*detach_dev)(struct generic_pm_domain *domain, @@ -237,6 +241,7 @@ struct generic_pm_domain_data { unsigned int performance_state; unsigned int default_pstate; unsigned int rpm_pstate; + bool hw_mode; void *data; }; @@ -267,6 +272,8 @@ int dev_pm_genpd_remove_notifier(struct device *dev); void dev_pm_genpd_set_next_wakeup(struct device *dev, ktime_t next); ktime_t dev_pm_genpd_get_next_hrtimer(struct device *dev); void dev_pm_genpd_synced_poweroff(struct device *dev); +int dev_pm_genpd_set_hwmode(struct device *dev, bool enable); +bool dev_pm_genpd_get_hwmode(struct device *dev); extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; @@ -340,6 +347,16 @@ static inline ktime_t dev_pm_genpd_get_next_hrtimer(struct device *dev) static inline void dev_pm_genpd_synced_poweroff(struct device *dev) { } +static inline int dev_pm_genpd_set_hwmode(struct device *dev, bool enable) +{ + return -EOPNOTSUPP; +} + +static inline bool dev_pm_genpd_get_hwmode(struct device *dev) +{ + return false; +} + #define simple_qos_governor (*(struct dev_power_governor *)(NULL)) #define pm_domain_always_on_gov (*(struct dev_power_governor *)(NULL)) #endif -- cgit v1.2.3 From 68cbd415dd4b9c5b9df69f0f091879e56bf5907a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 21 Jun 2024 11:15:58 +0200 Subject: task_work: s/task_work_cancel()/task_work_cancel_func()/ A proper task_work_cancel() API that actually cancels a callback and not *any* callback pointing to a given function is going to be needed for perf events event freeing. Do the appropriate rename to prepare for that. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240621091601.18227-2-frederic@kernel.org --- include/linux/task_work.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/task_work.h b/include/linux/task_work.h index 795ef5a68429..23ab01ae185e 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -30,7 +30,7 @@ int task_work_add(struct task_struct *task, struct callback_head *twork, struct callback_head *task_work_cancel_match(struct task_struct *task, bool (*match)(struct callback_head *, void *data), void *data); -struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t); +struct callback_head *task_work_cancel_func(struct task_struct *, task_work_func_t); void task_work_run(void); static inline void exit_task_work(struct task_struct *task) -- cgit v1.2.3 From f409530e4db9dd11b88cb7703c97c8f326ff6566 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 21 Jun 2024 11:15:59 +0200 Subject: task_work: Introduce task_work_cancel() again Re-introduce task_work_cancel(), this time to cancel an actual callback and not *any* callback pointing to a given function. This is going to be needed for perf events event freeing. Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240621091601.18227-3-frederic@kernel.org --- include/linux/task_work.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/task_work.h b/include/linux/task_work.h index 23ab01ae185e..26b8a47f41fc 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -31,6 +31,7 @@ int task_work_add(struct task_struct *task, struct callback_head *twork, struct callback_head *task_work_cancel_match(struct task_struct *task, bool (*match)(struct callback_head *, void *data), void *data); struct callback_head *task_work_cancel_func(struct task_struct *, task_work_func_t); +bool task_work_cancel(struct task_struct *task, struct callback_head *cb); void task_work_run(void); static inline void exit_task_work(struct task_struct *task) -- cgit v1.2.3 From 3a5465418f5fd970e86a86c7f4075be262682840 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 21 Jun 2024 11:16:01 +0200 Subject: perf: Fix event leak upon exec and file release The perf pending task work is never waited upon the matching event release. In the case of a child event, released via free_event() directly, this can potentially result in a leaked event, such as in the following scenario that doesn't even require a weak IRQ work implementation to trigger: schedule() prepare_task_switch() =======> perf_event_overflow() event->pending_sigtrap = ... irq_work_queue(&event->pending_irq) <======= perf_event_task_sched_out() event_sched_out() event->pending_sigtrap = 0; atomic_long_inc_not_zero(&event->refcount) task_work_add(&event->pending_task) finish_lock_switch() =======> perf_pending_irq() //do nothing, rely on pending task work <======= begin_new_exec() perf_event_exit_task() perf_event_exit_event() // If is child event free_event() WARN(atomic_long_cmpxchg(&event->refcount, 1, 0) != 1) // event is leaked Similar scenarios can also happen with perf_event_remove_on_exec() or simply against concurrent perf_event_release(). Fix this with synchonizing against the possibly remaining pending task work while freeing the event, just like is done with remaining pending IRQ work. This means that the pending task callback neither need nor should hold a reference to the event, preventing it from ever beeing freed. Fixes: 517e6a301f34 ("perf: Fix perf_pending_task() UaF") Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240621091601.18227-5-frederic@kernel.org --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a5304ae8c654..393fb13733b0 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -786,6 +786,7 @@ struct perf_event { struct irq_work pending_irq; struct callback_head pending_task; unsigned int pending_work; + struct rcuwait pending_work_wait; atomic_t event_limit; -- cgit v1.2.3 From 466e4d801cd438a1ab2c8a2cce1bef6b65c31bbb Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 4 Jul 2024 19:03:36 +0200 Subject: task_work: Add TWA_NMI_CURRENT as an additional notify mode. Adding task_work from NMI context requires the following: - The kasan_record_aux_stack() is not NMU safe and must be avoided. - Using TWA_RESUME is NMI safe. If the NMI occurs while the CPU is in userland then it will continue in userland and not invoke the `work' callback. Add TWA_NMI_CURRENT as an additional notify mode. In this mode skip kasan and use irq_work in hardirq-mode to for needed interrupt. Set TIF_NOTIFY_RESUME within the irq_work callback due to k[ac]san instrumentation in test_and_set_bit() which does not look NMI safe in case of a report. Suggested-by: Peter Zijlstra Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240704170424.1466941-3-bigeasy@linutronix.de --- include/linux/task_work.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/task_work.h b/include/linux/task_work.h index 26b8a47f41fc..cf5e7e891a77 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -18,6 +18,7 @@ enum task_work_notify_mode { TWA_RESUME, TWA_SIGNAL, TWA_SIGNAL_NO_IPI, + TWA_NMI_CURRENT, }; static inline bool task_work_pending(struct task_struct *task) -- cgit v1.2.3 From c5d93d23a26012a98b77f9e354ab9b3afd420059 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 4 Jul 2024 19:03:37 +0200 Subject: perf: Enqueue SIGTRAP always via task_work. A signal is delivered by raising irq_work() which works from any context including NMI. irq_work() can be delayed if the architecture does not provide an interrupt vector. In order not to lose a signal, the signal is injected via task_work during event_sched_out(). Instead going via irq_work, the signal could be added directly via task_work. The signal is sent to current and can be enqueued on its return path to userland. Queue signal via task_work and consider possible NMI context. Remove perf_event::pending_sigtrap and and use perf_event::pending_work instead. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Tested-by: Marco Elver Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20240704170424.1466941-4-bigeasy@linutronix.de --- include/linux/perf_event.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 393fb13733b0..ea0d82418d85 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -781,7 +781,6 @@ struct perf_event { unsigned int pending_wakeup; unsigned int pending_kill; unsigned int pending_disable; - unsigned int pending_sigtrap; unsigned long pending_addr; /* SIGTRAP */ struct irq_work pending_irq; struct callback_head pending_task; @@ -963,7 +962,7 @@ struct perf_event_context { struct rcu_head rcu_head; /* - * Sum (event->pending_sigtrap + event->pending_work) + * Sum (event->pending_work + event->pending_work) * * The SIGTRAP is targeted at ctx->task, as such it won't do changing * that until the signal is delivered. -- cgit v1.2.3 From 0d40a6d83e3e6751f1107ba33587262d937c969f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 4 Jul 2024 19:03:39 +0200 Subject: perf: Move swevent_htable::recursion into task_struct. The swevent_htable::recursion counter is used to avoid creating an swevent while an event is processed to avoid recursion. The counter is per-CPU and preemption must be disabled to have a stable counter. perf_pending_task() disables preemption to access the counter and then signal. This is problematic on PREEMPT_RT because sending a signal uses a spinlock_t which must not be acquired in atomic on PREEMPT_RT because it becomes a sleeping lock. The atomic context can be avoided by moving the counter into the task_struct. There is a 4 byte hole between futex_state (usually always on) and the following perf pointer (perf_event_ctxp). After the recursion lost some weight it fits perfectly. Move swevent_htable::recursion into task_struct. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Tested-by: Marco Elver Link: https://lore.kernel.org/r/20240704170424.1466941-6-bigeasy@linutronix.de --- include/linux/perf_event.h | 6 ------ include/linux/sched.h | 7 +++++++ 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ea0d82418d85..99a7ea1d29ed 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -970,12 +970,6 @@ struct perf_event_context { local_t nr_pending; }; -/* - * Number of contexts where an event can trigger: - * task, softirq, hardirq, nmi. - */ -#define PERF_NR_CONTEXTS 4 - struct perf_cpu_pmu_context { struct perf_event_pmu_context epc; struct perf_event_pmu_context *task_epc; diff --git a/include/linux/sched.h b/include/linux/sched.h index 61591ac6eab6..afb1087f5831 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -734,6 +734,12 @@ enum perf_event_task_context { perf_nr_task_contexts, }; +/* + * Number of contexts where an event can trigger: + * task, softirq, hardirq, nmi. + */ +#define PERF_NR_CONTEXTS 4 + struct wake_q_node { struct wake_q_node *next; }; @@ -1256,6 +1262,7 @@ struct task_struct { unsigned int futex_state; #endif #ifdef CONFIG_PERF_EVENTS + u8 perf_recursion[PERF_NR_CONTEXTS]; struct perf_event_context *perf_event_ctxp; struct mutex perf_event_mutex; struct list_head perf_event_list; -- cgit v1.2.3 From 2b84def990d388bed4afe4f21ae383a01991046c Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 4 Jul 2024 19:03:41 +0200 Subject: perf: Split __perf_pending_irq() out of perf_pending_irq() perf_pending_irq() invokes perf_event_wakeup() and __perf_pending_irq(). The former is in charge of waking any tasks which waits to be woken up while the latter disables perf-events. The irq_work perf_pending_irq(), while this an irq_work, the callback is invoked in thread context on PREEMPT_RT. This is needed because all the waking functions (wake_up_all(), kill_fasync()) acquire sleep locks which must not be used with disabled interrupts. Disabling events, as done by __perf_pending_irq(), expects a hardirq context and disabled interrupts. This requirement is not fulfilled on PREEMPT_RT. Split functionality based on perf_event::pending_disable into irq_work named `pending_disable_irq' and invoke it in hardirq context on PREEMPT_RT. Rename the split out callback to perf_pending_disable(). Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Tested-by: Marco Elver Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/20240704170424.1466941-8-bigeasy@linutronix.de --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 99a7ea1d29ed..65ece0d5b4b6 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -783,6 +783,7 @@ struct perf_event { unsigned int pending_disable; unsigned long pending_addr; /* SIGTRAP */ struct irq_work pending_irq; + struct irq_work pending_disable_irq; struct callback_head pending_task; unsigned int pending_work; struct rcuwait pending_work_wait; -- cgit v1.2.3 From f70080c5bc39716f434e7c0888662aa077eb4405 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Wed, 26 Jun 2024 13:26:46 +0300 Subject: vdpa/mlx5: Add support for modifying the virtio_version VQ field This is done in preparation for the pre-creation of hardware virtqueues at device add time. Signed-off-by: Dragos Tatulea Reviewed-by: Cosmin Ratiu Message-Id: <20240626-stage-vdpa-vq-precreate-v2-10-560c491078df@nvidia.com> Signed-off-by: Michael S. Tsirkin --- include/linux/mlx5/mlx5_ifc_vdpa.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h index 40371c916cf9..34f27c01cec9 100644 --- a/include/linux/mlx5/mlx5_ifc_vdpa.h +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h @@ -148,6 +148,7 @@ enum { MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS = (u64)1 << 6, MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX = (u64)1 << 7, MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX = (u64)1 << 8, + MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION = (u64)1 << 10, MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY = (u64)1 << 11, MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY = (u64)1 << 14, }; -- cgit v1.2.3 From cdc3c7eaae695738b37f374866950602ec5af9c2 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Wed, 26 Jun 2024 13:26:47 +0300 Subject: vdpa/mlx5: Add support for modifying the VQ features field This is done in preparation for the pre-creation of hardware virtqueues at device add time. Signed-off-by: Dragos Tatulea Reviewed-by: Cosmin Ratiu Message-Id: <20240626-stage-vdpa-vq-precreate-v2-11-560c491078df@nvidia.com> Signed-off-by: Michael S. Tsirkin --- include/linux/mlx5/mlx5_ifc_vdpa.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h index 34f27c01cec9..58dfa2ee7c83 100644 --- a/include/linux/mlx5/mlx5_ifc_vdpa.h +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h @@ -150,6 +150,7 @@ enum { MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX = (u64)1 << 8, MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION = (u64)1 << 10, MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY = (u64)1 << 11, + MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES = (u64)1 << 12, MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY = (u64)1 << 14, }; -- cgit v1.2.3 From 444b89875fc0937ece181fa865c75c9d22649986 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 28 Jun 2024 11:08:48 -0700 Subject: ARM: spitz: Use software nodes to describe MMC GPIOs Convert Spitz to use software nodes for specifying GPIOs for the MMC. Signed-off-by: Dmitry Torokhov Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20240628180852.1738922-9-dmitry.torokhov@gmail.com Signed-off-by: Arnd Bergmann --- include/linux/platform_data/mmc-pxamci.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/platform_data/mmc-pxamci.h b/include/linux/platform_data/mmc-pxamci.h index 7e44e84e7150..652f323b5ecc 100644 --- a/include/linux/platform_data/mmc-pxamci.h +++ b/include/linux/platform_data/mmc-pxamci.h @@ -7,6 +7,7 @@ struct device; struct mmc_host; +struct property_entry; struct pxamci_platform_data { unsigned int ocr_mask; /* available voltages */ @@ -18,7 +19,8 @@ struct pxamci_platform_data { bool gpio_card_ro_invert; /* gpio ro is inverted */ }; -extern void pxa_set_mci_info(struct pxamci_platform_data *info); +extern void pxa_set_mci_info(const struct pxamci_platform_data *info, + const struct property_entry *props); extern void pxa3xx_set_mci2_info(struct pxamci_platform_data *info); extern void pxa3xx_set_mci3_info(struct pxamci_platform_data *info); -- cgit v1.2.3 From d05374dee295d771261d382f97c0c23cb15aa104 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 5 Jul 2024 21:44:50 +0200 Subject: thermal: core: Change passive_delay and polling_delay data type It is better to use unsigned int as the data type for the passive_delay and polling_delay arguments of thermal_zone_device_register_with_trips() because they are implicitly cast to unsigned int anyway in thermal_set_delay_jiffies() and if they happen to be negative at that point, the resulting behavior may not be as desired. Update the thermal_zone_device_register_with_trips() definition accordingly. Signed-off-by: Rafael J. Wysocki Acked-by: Daniel Lezcano Link: https://patch.msgid.link/5803791.DvuYhMxLoT@rjwysocki.net Signed-off-by: Rafael J. Wysocki --- include/linux/thermal.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index f732dab20368..cd0045915af5 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -221,7 +221,8 @@ struct thermal_zone_device *thermal_zone_device_register_with_trips( int num_trips, void *devdata, const struct thermal_zone_device_ops *ops, const struct thermal_zone_params *tzp, - int passive_delay, int polling_delay); + unsigned int passive_delay, + unsigned int polling_delay); struct thermal_zone_device *thermal_tripless_zone_device_register( const char *type, -- cgit v1.2.3 From 463b86fed2b25ddb5f576376bfea00134dd23030 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 2 Jul 2024 16:39:55 +0200 Subject: thermal: helpers: Introduce thermal_trip_is_bound_to_cdev() Introduce a new helper function thermal_trip_is_bound_to_cdev() for checking whether or not a given trip point has been bound to a given cooling device. The primary user of it will be the Tegra thermal driver. Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/13545762.uLZWGnKmhe@rjwysocki.net --- include/linux/thermal.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index cd0045915af5..5a0b66bd34f0 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -270,6 +270,9 @@ struct thermal_zone_device *thermal_zone_get_zone_by_name(const char *name); int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp); int thermal_zone_get_slope(struct thermal_zone_device *tz); int thermal_zone_get_offset(struct thermal_zone_device *tz); +bool thermal_trip_is_bound_to_cdev(struct thermal_zone_device *tz, + const struct thermal_trip *trip, + struct thermal_cooling_device *cdev); int thermal_zone_device_enable(struct thermal_zone_device *tz); int thermal_zone_device_disable(struct thermal_zone_device *tz); -- cgit v1.2.3 From d1fbf18a0f9403df2edeffa1c7f5a2d66e82c20a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 2 Jul 2024 16:41:03 +0200 Subject: thermal: trip: Add conversion macros for thermal trip priv field Some drivers will need to store integers in the priv field of struct thermal_trip, so add conversion macros for doing this in a consistent way and switch over the int340x_thermal driver that already does it and uses custom conversion functions to using the new macros. Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/3297884.aeNJFYEL58@rjwysocki.net --- include/linux/thermal.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 5a0b66bd34f0..3bd1b361ec34 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -79,6 +79,9 @@ struct thermal_trip { #define THERMAL_TRIP_FLAG_RW (THERMAL_TRIP_FLAG_RW_TEMP | \ THERMAL_TRIP_FLAG_RW_HYST) +#define THERMAL_TRIP_PRIV_TO_INT(_val_) (uintptr_t)(_val_) +#define THERMAL_INT_TO_TRIP_PRIV(_val_) (void *)(uintptr_t)(_val_) + struct thermal_zone_device; struct thermal_zone_device_ops { -- cgit v1.2.3 From be5db7581f59621ed9cb9cbf6bebccda38263eb5 Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Tue, 9 Jul 2024 12:33:40 +0800 Subject: ASoc: TAS2781: rename the tas2781_reset as tasdevice_reset Rename the tas2781_reset as tasdevice_reset in case of misunderstanding. RESET register for both tas2563 and tas2781 is same and the use of reset pin is also same. Signed-off-by: Shenghao Ding Link: https://patch.msgid.link/20240709043342.946-1-shenghao-ding@ti.com Signed-off-by: Mark Brown --- include/sound/tas2781.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index cd8ce522b78e..a43ad6dcb7c7 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -2,7 +2,7 @@ // // ALSA SoC Texas Instruments TAS2563/TAS2781 Audio Smart Amplifier // -// Copyright (C) 2022 - 2023 Texas Instruments Incorporated +// Copyright (C) 2022 - 2024 Texas Instruments Incorporated // https://www.ti.com // // The TAS2563/TAS2781 driver implements a flexible and configurable @@ -43,8 +43,8 @@ (page * 128)) + reg) /*Software Reset */ -#define TAS2781_REG_SWRESET TASDEVICE_REG(0x0, 0X0, 0x01) -#define TAS2781_REG_SWRESET_RESET BIT(0) +#define TASDEVICE_REG_SWRESET TASDEVICE_REG(0x0, 0X0, 0x01) +#define TASDEVICE_REG_SWRESET_RESET BIT(0) /*I2C Checksum */ #define TASDEVICE_I2CChecksum TASDEVICE_REG(0x0, 0x0, 0x7E) @@ -140,7 +140,7 @@ struct tasdevice_priv { void (*apply_calibration)(struct tasdevice_priv *tas_priv); }; -void tas2781_reset(struct tasdevice_priv *tas_dev); +void tasdevice_reset(struct tasdevice_priv *tas_dev); int tascodec_init(struct tasdevice_priv *tas_priv, void *codec, struct module *module, void (*cont)(const struct firmware *fw, void *context)); -- cgit v1.2.3 From ca52aa4c60f76566601b42e935b8a78f0fb4f8eb Mon Sep 17 00:00:00 2001 From: David Lechner Date: Mon, 8 Jul 2024 20:05:29 -0500 Subject: spi: add defer_optimize_message controller flag Adding spi_optimize_message() broke the spi-mux driver because it calls spi_async() from it's transfer_one_message() callback. This resulted in passing an incorrectly optimized message to the controller. For example, if the underlying controller has an optimize_message() callback, this would have not been called and can cause a crash when the underlying controller driver tries to transfer the message. Also, since the spi-mux driver swaps out the controller pointer by replacing msg->spi, __spi_unoptimize_message() was being called with a different controller than the one used in __spi_optimize_message(). This could cause a crash when attempting to free the message resources when __spi_unoptimize_message() is called in spi_finalize_current_message() since it is being called with a controller that did not allocate the resources. This is fixed by adding a defer_optimize_message flag for controllers. This flag causes all of the spi_[maybe_][un]optimize_message() calls to be a no-op (other than attaching a pointer to the spi device to the message). This allows the spi-mux driver to pass an unmodified message to spi_async() in spi_mux_transfer_one_message() after the spi device has been swapped out. This causes __spi_optimize_message() and __spi_unoptimize_message() to be called only once per message and with the correct/same controller in each case. Reported-by: Oleksij Rempel Closes: https://lore.kernel.org/linux-spi/Zn6HMrYG2b7epUxT@pengutronix.de/ Reported-by: Marc Kleine-Budde Closes: https://lore.kernel.org/linux-spi/20240628-awesome-discerning-bear-1621f9-mkl@pengutronix.de/ Fixes: 7b1d87af14d9 ("spi: add spi_optimize_message() APIs") Signed-off-by: David Lechner Link: https://patch.msgid.link/20240708-spi-mux-fix-v1-2-6c8845193128@baylibre.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 98fdef6e28f2..67b9a15a5330 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -533,6 +533,9 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @queue_empty: signal green light for opportunistically skipping the queue * for spi_sync transfers. * @must_async: disable all fast paths in the core + * @defer_optimize_message: set to true if controller cannot pre-optimize messages + * and needs to defer the optimization step until the message is actually + * being transferred * * Each SPI controller can communicate with one or more @spi_device * children. These make a small bus, sharing MOSI, MISO and SCK signals @@ -776,6 +779,7 @@ struct spi_controller { /* Flag for enabling opportunistic skipping of the queue in spi_sync */ bool queue_empty; bool must_async; + bool defer_optimize_message; }; static inline void *spi_controller_get_devdata(struct spi_controller *ctlr) -- cgit v1.2.3 From c714f15860fcca02fe0fd7c3f1f1fc35b1768ac1 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 14:34:39 +0800 Subject: iommufd: Add fault and response message definitions iommu_hwpt_pgfaults represent fault messages that the userspace can retrieve. Multiple iommu_hwpt_pgfaults might be put in an iopf group, with the IOMMU_PGFAULT_FLAGS_LAST_PAGE flag set only for the last iommu_hwpt_pgfault. An iommu_hwpt_page_response is a response message that the userspace should send to the kernel after finishing handling a group of fault messages. The @dev_id, @pasid, and @grpid fields in the message identify an outstanding iopf group for a device. The @cookie field, which matches the cookie field of the last fault in the group, will be used by the kernel to look up the pending message. Link: https://lore.kernel.org/r/20240702063444.105814-6-baolu.lu@linux.intel.com Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 83 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 1dfeaa2e649e..4d89ed97b533 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -692,4 +692,87 @@ struct iommu_hwpt_invalidate { __u32 __reserved; }; #define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE) + +/** + * enum iommu_hwpt_pgfault_flags - flags for struct iommu_hwpt_pgfault + * @IOMMU_PGFAULT_FLAGS_PASID_VALID: The pasid field of the fault data is + * valid. + * @IOMMU_PGFAULT_FLAGS_LAST_PAGE: It's the last fault of a fault group. + */ +enum iommu_hwpt_pgfault_flags { + IOMMU_PGFAULT_FLAGS_PASID_VALID = (1 << 0), + IOMMU_PGFAULT_FLAGS_LAST_PAGE = (1 << 1), +}; + +/** + * enum iommu_hwpt_pgfault_perm - perm bits for struct iommu_hwpt_pgfault + * @IOMMU_PGFAULT_PERM_READ: request for read permission + * @IOMMU_PGFAULT_PERM_WRITE: request for write permission + * @IOMMU_PGFAULT_PERM_EXEC: (PCIE 10.4.1) request with a PASID that has the + * Execute Requested bit set in PASID TLP Prefix. + * @IOMMU_PGFAULT_PERM_PRIV: (PCIE 10.4.1) request with a PASID that has the + * Privileged Mode Requested bit set in PASID TLP + * Prefix. + */ +enum iommu_hwpt_pgfault_perm { + IOMMU_PGFAULT_PERM_READ = (1 << 0), + IOMMU_PGFAULT_PERM_WRITE = (1 << 1), + IOMMU_PGFAULT_PERM_EXEC = (1 << 2), + IOMMU_PGFAULT_PERM_PRIV = (1 << 3), +}; + +/** + * struct iommu_hwpt_pgfault - iommu page fault data + * @flags: Combination of enum iommu_hwpt_pgfault_flags + * @dev_id: id of the originated device + * @pasid: Process Address Space ID + * @grpid: Page Request Group Index + * @perm: Combination of enum iommu_hwpt_pgfault_perm + * @addr: Fault address + * @length: a hint of how much data the requestor is expecting to fetch. For + * example, if the PRI initiator knows it is going to do a 10MB + * transfer, it could fill in 10MB and the OS could pre-fault in + * 10MB of IOVA. It's default to 0 if there's no such hint. + * @cookie: kernel-managed cookie identifying a group of fault messages. The + * cookie number encoded in the last page fault of the group should + * be echoed back in the response message. + */ +struct iommu_hwpt_pgfault { + __u32 flags; + __u32 dev_id; + __u32 pasid; + __u32 grpid; + __u32 perm; + __u64 addr; + __u32 length; + __u32 cookie; +}; + +/** + * enum iommufd_page_response_code - Return status of fault handlers + * @IOMMUFD_PAGE_RESP_SUCCESS: Fault has been handled and the page tables + * populated, retry the access. This is the + * "Success" defined in PCI 10.4.2.1. + * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the + * access. This is the "Invalid Request" in PCI + * 10.4.2.1. + * @IOMMUFD_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from + * this device if possible. This is the "Response + * Failure" in PCI 10.4.2.1. + */ +enum iommufd_page_response_code { + IOMMUFD_PAGE_RESP_SUCCESS = 0, + IOMMUFD_PAGE_RESP_INVALID, + IOMMUFD_PAGE_RESP_FAILURE, +}; + +/** + * struct iommu_hwpt_page_response - IOMMU page fault response + * @cookie: The kernel-managed cookie reported in the fault message. + * @code: One of response code in enum iommufd_page_response_code. + */ +struct iommu_hwpt_page_response { + __u32 cookie; + __u32 code; +}; #endif -- cgit v1.2.3 From 07838f7fd529c8a6de44b601d4b7057e6c8d36ed Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 14:34:40 +0800 Subject: iommufd: Add iommufd fault object An iommufd fault object provides an interface for delivering I/O page faults to user space. These objects are created and destroyed by user space, and they can be associated with or dissociated from hardware page table objects during page table allocation or destruction. User space interacts with the fault object through a file interface. This interface offers a straightforward and efficient way for user space to handle page faults. It allows user space to read fault messages sequentially and respond to them by writing to the same file. The file interface supports reading messages in poll mode, so it's recommended that user space applications use io_uring to enhance read and write efficiency. A fault object can be associated with any iopf-capable iommufd_hw_pgtable during the pgtable's allocation. All I/O page faults triggered by devices when accessing the I/O addresses of an iommufd_hw_pgtable are routed through the fault object to user space. Similarly, user space's responses to these page faults are routed back to the iommu device driver through the same fault object. Link: https://lore.kernel.org/r/20240702063444.105814-7-baolu.lu@linux.intel.com Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- include/linux/iommu.h | 4 ++++ include/uapi/linux/iommufd.h | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 910aec80886e..73bc3aee95a1 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -124,12 +124,16 @@ struct iopf_fault { struct iopf_group { struct iopf_fault last_fault; struct list_head faults; + size_t fault_count; /* list node for iommu_fault_param::faults */ struct list_head pending_node; struct work_struct work; struct iommu_attach_handle *attach_handle; /* The device's fault data parameter. */ struct iommu_fault_param *fault_param; + /* Used by handler provider to hook the group on its own lists. */ + struct list_head node; + u32 cookie; }; /** diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 4d89ed97b533..70b8a38fcd46 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -50,6 +50,7 @@ enum { IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING, IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP, IOMMUFD_CMD_HWPT_INVALIDATE, + IOMMUFD_CMD_FAULT_QUEUE_ALLOC, }; /** @@ -775,4 +776,21 @@ struct iommu_hwpt_page_response { __u32 cookie; __u32 code; }; + +/** + * struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC) + * @size: sizeof(struct iommu_fault_alloc) + * @flags: Must be 0 + * @out_fault_id: The ID of the new FAULT + * @out_fault_fd: The fd of the new FAULT + * + * Explicitly allocate a fault handling object. + */ +struct iommu_fault_alloc { + __u32 size; + __u32 flags; + __u32 out_fault_id; + __u32 out_fault_fd; +}; +#define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC) #endif -- cgit v1.2.3 From 34765cbc679c59ea5d952d738d2d16bf4aadc497 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 14:34:42 +0800 Subject: iommufd: Associate fault object with iommufd_hw_pgtable When allocating a user iommufd_hw_pagetable, the user space is allowed to associate a fault object with the hw_pagetable by specifying the fault object ID in the page table allocation data and setting the IOMMU_HWPT_FAULT_ID_VALID flag bit. On a successful return of hwpt allocation, the user can retrieve and respond to page faults by reading and writing the file interface of the fault object. Once a fault object has been associated with a hwpt, the hwpt is iopf-capable, indicated by hwpt->fault is non NULL. Attaching, detaching, or replacing an iopf-capable hwpt to an RID or PASID will differ from those that are not iopf-capable. Link: https://lore.kernel.org/r/20240702063444.105814-9-baolu.lu@linux.intel.com Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 70b8a38fcd46..ede2b464a761 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -357,10 +357,13 @@ struct iommu_vfio_ioas { * the parent HWPT in a nesting configuration. * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is * enforced on device attachment + * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is + * valid. */ enum iommufd_hwpt_alloc_flags { IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0, IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1, + IOMMU_HWPT_FAULT_ID_VALID = 1 << 2, }; /** @@ -412,6 +415,9 @@ enum iommu_hwpt_data_type { * @data_type: One of enum iommu_hwpt_data_type * @data_len: Length of the type specific data * @data_uptr: User pointer to the type specific data + * @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of + * IOMMU_HWPT_FAULT_ID_VALID is set. + * @__reserved2: Padding to 64-bit alignment. Must be 0. * * Explicitly allocate a hardware page table object. This is the same object * type that is returned by iommufd_device_attach() and represents the @@ -442,6 +448,8 @@ struct iommu_hwpt_alloc { __u32 data_type; __u32 data_len; __aligned_u64 data_uptr; + __u32 fault_id; + __u32 __reserved2; }; #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC) -- cgit v1.2.3 From 5a5aa3c3769051c02a2210cc1b7e12a0833b76c9 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 4 Jul 2024 06:25:05 -0700 Subject: sched.h: always_inline alloc_tag_{save|restore} to fix modpost warnings Mark alloc_tag_{save|restore} as always_inline to fix the following modpost warnings: WARNING: modpost: vmlinux: section mismatch in reference: alloc_tag_save+0x1c (section: .text.unlikely) -> initcall_level_names (section: .init.data) WARNING: modpost: vmlinux: section mismatch in reference: alloc_tag_restore+0x3c (section: .text.unlikely) -> initcall_level_names (section: .init.data) The warnings happen when these functions are called from an __init function and they don't get inlined (remain in the .text section) while the value returned by get_current() points into .init.data section. Assuming get_current() always returns a valid address, this situation can happen only during init stage and accessing .init.data from .text section during that stage should pose no issues. Link: https://lkml.kernel.org/r/20240704132506.1011978-1-surenb@google.com Fixes: 22d407b164ff ("lib: add allocation tagging support for memory allocation profiling") Signed-off-by: Suren Baghdasaryan Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202407032306.gi9nZsBi-lkp@intel.com/ Cc: Kent Overstreet Cc: Chris Zankel Cc: Ingo Molnar Cc: Juri Lelli Cc: Max Filippov Cc: Peter Zijlstra Cc: Vincent Guittot Signed-off-by: Andrew Morton --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 61591ac6eab6..a5f4b48fca18 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2192,13 +2192,13 @@ static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); } extern void sched_set_stop_task(int cpu, struct task_struct *stop); #ifdef CONFIG_MEM_ALLOC_PROFILING -static inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag) +static __always_inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag) { swap(current->alloc_tag, tag); return tag; } -static inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old) +static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old) { #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG WARN(current->alloc_tag != tag, "current->alloc_tag was changed:\n"); -- cgit v1.2.3 From 1b9469cf15976a7cb7378caaa8a1772e7901514d Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Thu, 13 Jun 2024 13:50:21 +0200 Subject: PCI: Move struct pci_devres.pinned bit to struct pci_dev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bit describing whether the PCI device is currently pinned is stored in struct pci_devres. To clean up and simplify the PCI devres API, it's better if this information is stored in struct pci_dev. This will later permit simplifying pcim_enable_device(). Move the 'pinned' boolean bit to struct pci_dev. Restructure bits in struct pci_dev so the pm / pme fields are next to each other. Link: https://lore.kernel.org/r/20240613115032.29098-9-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index fb004fd4e889..0c19f0717899 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -367,10 +367,11 @@ struct pci_dev { this is D0-D3, D0 being fully functional, and D3 being off. */ u8 pm_cap; /* PM capability offset */ - unsigned int imm_ready:1; /* Supports Immediate Readiness */ unsigned int pme_support:5; /* Bitmask of states from which PME# can be generated */ unsigned int pme_poll:1; /* Poll device's PME status bit */ + unsigned int pinned:1; /* Whether this dev is pinned */ + unsigned int imm_ready:1; /* Supports Immediate Readiness */ unsigned int d1_support:1; /* Low power state D1 is supported */ unsigned int d2_support:1; /* Low power state D2 is supported */ unsigned int no_d1d2:1; /* D1 and D2 are forbidden */ -- cgit v1.2.3 From 7296f2301a057493e97b07739213c6e864f76891 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Mon, 8 Jul 2024 12:41:00 -0700 Subject: swiotlb: reduce swiotlb pool lookups With CONFIG_SWIOTLB_DYNAMIC enabled, each round-trip map/unmap pair in the swiotlb results in 6 calls to swiotlb_find_pool(). In multiple places, the pool is found and used in one function, and then must be found again in the next function that is called because only the tlb_addr is passed as an argument. These are the six call sites: dma_direct_map_page: 1. swiotlb_map -> swiotlb_tbl_map_single -> swiotlb_bounce dma_direct_unmap_page: 2. dma_direct_sync_single_for_cpu -> is_swiotlb_buffer 3. dma_direct_sync_single_for_cpu -> swiotlb_sync_single_for_cpu -> swiotlb_bounce 4. is_swiotlb_buffer 5. swiotlb_tbl_unmap_single -> swiotlb_del_transient 6. swiotlb_tbl_unmap_single -> swiotlb_release_slots Reduce the number of calls by finding the pool at a higher level, and passing it as an argument instead of searching again. A key change is for is_swiotlb_buffer() to return a pool pointer instead of a boolean, and then pass this pool pointer to subsequent swiotlb functions. There are 9 occurrences of is_swiotlb_buffer() used to test if a buffer is a swiotlb buffer before calling a swiotlb function. To reduce code duplication in getting the pool pointer and passing it as an argument, introduce inline wrappers for this pattern. The generated code is essentially unchanged. Since is_swiotlb_buffer() no longer returns a boolean, rename some functions to reflect the change: * swiotlb_find_pool() becomes __swiotlb_find_pool() * is_swiotlb_buffer() becomes swiotlb_find_pool() * is_xen_swiotlb_buffer() becomes xen_swiotlb_find_pool() With these changes, a round-trip map/unmap pair requires only 2 pool lookups (listed using the new names and wrappers): dma_direct_unmap_page: 1. dma_direct_sync_single_for_cpu -> swiotlb_find_pool 2. swiotlb_tbl_unmap_single -> swiotlb_find_pool These changes come from noticing the inefficiencies in a code review, not from performance measurements. With CONFIG_SWIOTLB_DYNAMIC, __swiotlb_find_pool() is not trivial, and it uses an RCU read lock, so avoiding the redundant calls helps performance in a hot path. When CONFIG_SWIOTLB_DYNAMIC is *not* set, the code size reduction is minimal and the perf benefits are likely negligible, but no harm is done. No functional change is intended. Signed-off-by: Michael Kelley Reviewed-by: Petr Tesarik Signed-off-by: Christoph Hellwig --- include/linux/scatterlist.h | 2 +- include/linux/swiotlb.h | 105 ++++++++++++++++++++++++++------------------ 2 files changed, 63 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 77df3d7b18a6..e61d164622db 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -332,7 +332,7 @@ static inline void sg_dma_unmark_bus_address(struct scatterlist *sg) * Description: * Returns true if the scatterlist was marked for SWIOTLB bouncing. Not all * elements may have been bounced, so the caller would have to check - * individual SG entries with is_swiotlb_buffer(). + * individual SG entries with swiotlb_find_pool(). */ static inline bool sg_dma_is_swiotlb(struct scatterlist *sg) { diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 14bc10c1bb23..3dae0f592063 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -42,24 +42,6 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask, int (*remap)(void *tlb, unsigned long nslabs)); extern void __init swiotlb_update_mem_attributes(void); -phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, - size_t mapping_size, - unsigned int alloc_aligned_mask, enum dma_data_direction dir, - unsigned long attrs); - -extern void swiotlb_tbl_unmap_single(struct device *hwdev, - phys_addr_t tlb_addr, - size_t mapping_size, - enum dma_data_direction dir, - unsigned long attrs); - -void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir); -void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir); -dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, - size_t size, enum dma_data_direction dir, unsigned long attrs); - #ifdef CONFIG_SWIOTLB /** @@ -143,37 +125,27 @@ struct io_tlb_mem { #endif }; -#ifdef CONFIG_SWIOTLB_DYNAMIC - -struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr); - -#else - -static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev, - phys_addr_t paddr) -{ - return &dev->dma_io_tlb_mem->defpool; -} - -#endif +struct io_tlb_pool *__swiotlb_find_pool(struct device *dev, phys_addr_t paddr); /** - * is_swiotlb_buffer() - check if a physical address belongs to a swiotlb + * swiotlb_find_pool() - find swiotlb pool to which a physical address belongs * @dev: Device which has mapped the buffer. * @paddr: Physical address within the DMA buffer. * - * Check if @paddr points into a bounce buffer. + * Find the swiotlb pool that @paddr points into. * * Return: - * * %true if @paddr points into a bounce buffer - * * %false otherwise + * * pool address if @paddr points into a bounce buffer + * * NULL if @paddr does not point into a bounce buffer. As such, this function + * can be used to determine if @paddr denotes a swiotlb bounce buffer. */ -static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) +static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev, + phys_addr_t paddr) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; if (!mem) - return false; + return NULL; #ifdef CONFIG_SWIOTLB_DYNAMIC /* @@ -182,16 +154,19 @@ static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) * If a SWIOTLB address is checked on another CPU, then it was * presumably loaded by the device driver from an unspecified private * data structure. Make sure that this load is ordered before reading - * dev->dma_uses_io_tlb here and mem->pools in swiotlb_find_pool(). + * dev->dma_uses_io_tlb here and mem->pools in __swiotlb_find_pool(). * * This barrier pairs with smp_mb() in swiotlb_find_slots(). */ smp_rmb(); - return READ_ONCE(dev->dma_uses_io_tlb) && - swiotlb_find_pool(dev, paddr); + if (READ_ONCE(dev->dma_uses_io_tlb)) + return __swiotlb_find_pool(dev, paddr); #else - return paddr >= mem->defpool.start && paddr < mem->defpool.end; + if (paddr >= mem->defpool.start && paddr < mem->defpool.end) + return &mem->defpool; #endif + + return NULL; } static inline bool is_swiotlb_force_bounce(struct device *dev) @@ -219,9 +194,10 @@ static inline void swiotlb_dev_init(struct device *dev) { } -static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr) +static inline struct io_tlb_pool *swiotlb_find_pool(struct device *dev, + phys_addr_t paddr) { - return false; + return NULL; } static inline bool is_swiotlb_force_bounce(struct device *dev) { @@ -260,6 +236,49 @@ static inline phys_addr_t default_swiotlb_limit(void) } #endif /* CONFIG_SWIOTLB */ +phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys, + size_t mapping_size, unsigned int alloc_aligned_mask, + enum dma_data_direction dir, unsigned long attrs); +dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir, unsigned long attrs); + +void __swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, + size_t mapping_size, enum dma_data_direction dir, + unsigned long attrs, struct io_tlb_pool *pool); +static inline void swiotlb_tbl_unmap_single(struct device *dev, + phys_addr_t addr, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct io_tlb_pool *pool = swiotlb_find_pool(dev, addr); + + if (unlikely(pool)) + __swiotlb_tbl_unmap_single(dev, addr, size, dir, attrs, pool); +} + +void __swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, + struct io_tlb_pool *pool); +static inline void swiotlb_sync_single_for_device(struct device *dev, + phys_addr_t addr, size_t size, enum dma_data_direction dir) +{ + struct io_tlb_pool *pool = swiotlb_find_pool(dev, addr); + + if (unlikely(pool)) + __swiotlb_sync_single_for_device(dev, addr, size, dir, pool); +} + +void __swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr, + size_t size, enum dma_data_direction dir, + struct io_tlb_pool *pool); +static inline void swiotlb_sync_single_for_cpu(struct device *dev, + phys_addr_t addr, size_t size, enum dma_data_direction dir) +{ + struct io_tlb_pool *pool = swiotlb_find_pool(dev, addr); + + if (unlikely(pool)) + __swiotlb_sync_single_for_cpu(dev, addr, size, dir, pool); +} + extern void swiotlb_print_info(void); #ifdef CONFIG_DMA_RESTRICTED_POOL -- cgit v1.2.3 From ab7a880263c30b1675850a584c206770f5545c2f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 8 Jul 2024 10:15:45 +0200 Subject: driver core: make driver_[create|remove]_file take a const * The functions driver_create_file() and driver_remove_file() do not modify the struct device_driver structure directly, so they are safe to be marked as a constant pointer type. Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/2024070844-volley-hatchling-c812@gregkh Signed-off-by: Greg Kroah-Hartman --- include/linux/device/driver.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index 7738f458995f..dceb36f1c42c 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -146,9 +146,9 @@ struct driver_attribute { #define DRIVER_ATTR_WO(_name) \ struct driver_attribute driver_attr_##_name = __ATTR_WO(_name) -int __must_check driver_create_file(struct device_driver *driver, +int __must_check driver_create_file(const struct device_driver *driver, const struct driver_attribute *attr); -void driver_remove_file(struct device_driver *driver, +void driver_remove_file(const struct device_driver *driver, const struct driver_attribute *attr); int driver_set_override(struct device *dev, const char **override, -- cgit v1.2.3 From f8fb469147e7db57e3f78d46f3f427705b4a1935 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 8 Jul 2024 10:15:46 +0200 Subject: driver core: make driver_find_device() take a const * The function driver_find_device() does not modify the struct device_driver structure directly, so it is safe to be marked as a constant pointer type. As that is fixed up, also change the function signature on the inline functions that call this, which are: driver_find_device_by_name() driver_find_device_by_of_node() driver_find_device_by_devt() driver_find_next_device() driver_find_device_by_acpi_dev() Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/2024070849-broken-front-9eb5@gregkh Signed-off-by: Greg Kroah-Hartman --- include/linux/device/driver.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h index dceb36f1c42c..1fc8b68786de 100644 --- a/include/linux/device/driver.h +++ b/include/linux/device/driver.h @@ -155,7 +155,7 @@ int driver_set_override(struct device *dev, const char **override, const char *s, size_t len); int __must_check driver_for_each_device(struct device_driver *drv, struct device *start, void *data, int (*fn)(struct device *dev, void *)); -struct device *driver_find_device(struct device_driver *drv, +struct device *driver_find_device(const struct device_driver *drv, struct device *start, const void *data, int (*match)(struct device *dev, const void *data)); @@ -165,7 +165,7 @@ struct device *driver_find_device(struct device_driver *drv, * @drv: the driver we're iterating * @name: name of the device to match */ -static inline struct device *driver_find_device_by_name(struct device_driver *drv, +static inline struct device *driver_find_device_by_name(const struct device_driver *drv, const char *name) { return driver_find_device(drv, NULL, name, device_match_name); @@ -178,7 +178,7 @@ static inline struct device *driver_find_device_by_name(struct device_driver *dr * @np: of_node pointer to match. */ static inline struct device * -driver_find_device_by_of_node(struct device_driver *drv, +driver_find_device_by_of_node(const struct device_driver *drv, const struct device_node *np) { return driver_find_device(drv, NULL, np, device_match_of_node); @@ -203,13 +203,13 @@ driver_find_device_by_fwnode(struct device_driver *drv, * @drv: the driver we're iterating * @devt: devt pointer to match. */ -static inline struct device *driver_find_device_by_devt(struct device_driver *drv, +static inline struct device *driver_find_device_by_devt(const struct device_driver *drv, dev_t devt) { return driver_find_device(drv, NULL, &devt, device_match_devt); } -static inline struct device *driver_find_next_device(struct device_driver *drv, +static inline struct device *driver_find_next_device(const struct device_driver *drv, struct device *start) { return driver_find_device(drv, start, NULL, device_match_any); @@ -223,14 +223,14 @@ static inline struct device *driver_find_next_device(struct device_driver *drv, * @adev: ACPI_COMPANION device to match. */ static inline struct device * -driver_find_device_by_acpi_dev(struct device_driver *drv, +driver_find_device_by_acpi_dev(const struct device_driver *drv, const struct acpi_device *adev) { return driver_find_device(drv, NULL, adev, device_match_acpi_dev); } #else static inline struct device * -driver_find_device_by_acpi_dev(struct device_driver *drv, const void *adev) +driver_find_device_by_acpi_dev(const struct device_driver *drv, const void *adev) { return NULL; } -- cgit v1.2.3 From c9add2e607a1ca63cc84bd1a7ab04d513096f37e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 10 Jul 2024 09:34:14 +0200 Subject: zorro: make match function take a const pointer In commit d69d80484598 ("driver core: have match() callback in struct bus_type take a const *"), the match callback for busses was changed to take a const pointer to struct device_driver. Unfortunately I missed fixing up the zorro code, and was only noticed after-the-fact by the kernel test robot. Resolve this issue by properly changing the zorro_bus_match() function. Cc: Geert Uytterhoeven Fixes: d69d80484598 ("driver core: have match() callback in struct bus_type take a const *") Reported-by: kernel test robot Reviewed-by: Geert Uytterhoeven Acked-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240710073413.495541-2-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- include/linux/zorro.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/zorro.h b/include/linux/zorro.h index db7416ed6057..f36c8d39553d 100644 --- a/include/linux/zorro.h +++ b/include/linux/zorro.h @@ -52,7 +52,7 @@ struct zorro_driver { struct device_driver driver; }; -#define to_zorro_driver(drv) container_of(drv, struct zorro_driver, driver) +#define to_zorro_driver(drv) container_of_const(drv, struct zorro_driver, driver) #define zorro_for_each_dev(dev) \ -- cgit v1.2.3 From 1c076f1f4d7fc7cfb45dba10b3b49d574b4c4c28 Mon Sep 17 00:00:00 2001 From: Hongzhen Luo Date: Wed, 10 Jul 2024 16:34:59 +0800 Subject: erofs: get rid of z_erofs_map_blocks_iter_* tracepoints Consolidate them under erofs_map_blocks_* for simplicity since we have many other ways to know if a given inode is compressed or not. Signed-off-by: Hongzhen Luo Reviewed-by: Gao Xiang Link: https://lore.kernel.org/r/20240710083459.208362-1-hongzhen@linux.alibaba.com Signed-off-by: Gao Xiang --- include/trace/events/erofs.h | 32 +++----------------------------- 1 file changed, 3 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h index b9bbfd855f2a..57df3843e650 100644 --- a/include/trace/events/erofs.h +++ b/include/trace/events/erofs.h @@ -143,7 +143,8 @@ TRACE_EVENT(erofs_readpages, __entry->raw) ); -DECLARE_EVENT_CLASS(erofs__map_blocks_enter, +TRACE_EVENT(erofs_map_blocks_enter, + TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, unsigned int flags), @@ -171,21 +172,8 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_enter, __entry->flags ? show_map_flags(__entry->flags) : "NULL") ); -DEFINE_EVENT(erofs__map_blocks_enter, erofs_map_blocks_enter, - TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, - unsigned flags), - - TP_ARGS(inode, map, flags) -); - -DEFINE_EVENT(erofs__map_blocks_enter, z_erofs_map_blocks_iter_enter, - TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, - unsigned int flags), - - TP_ARGS(inode, map, flags) -); +TRACE_EVENT(erofs_map_blocks_exit, -DECLARE_EVENT_CLASS(erofs__map_blocks_exit, TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, unsigned int flags, int ret), @@ -223,20 +211,6 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_exit, show_mflags(__entry->mflags), __entry->ret) ); -DEFINE_EVENT(erofs__map_blocks_exit, erofs_map_blocks_exit, - TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, - unsigned flags, int ret), - - TP_ARGS(inode, map, flags, ret) -); - -DEFINE_EVENT(erofs__map_blocks_exit, z_erofs_map_blocks_iter_exit, - TP_PROTO(struct inode *inode, struct erofs_map_blocks *map, - unsigned int flags, int ret), - - TP_ARGS(inode, map, flags, ret) -); - TRACE_EVENT(erofs_destroy_inode, TP_PROTO(struct inode *inode), -- cgit v1.2.3 From 396a27e91265a6632be17bebacb6743f0b9447be Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 4 Jul 2024 15:45:00 +0200 Subject: dm: Remove max_write_zeroes_granularity The max_write_zeroes_granularity boolean of struct dm_target is used in __process_abnormal_io() but never set by any target. Remove this field and the dead code using it. Signed-off-by: Damien Le Moal Signed-off-by: Mikulas Patocka --- include/linux/device-mapper.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 3611b230d0aa..5b7e96653ec6 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -369,12 +369,6 @@ struct dm_target { */ bool max_secure_erase_granularity:1; - /* - * Set if this target requires that write_zeroes be split on - * 'max_write_zeroes_sectors' boundaries. - */ - bool max_write_zeroes_granularity:1; - /* * Set if we need to limit the number of in-flight bios when swapping. */ -- cgit v1.2.3 From 9d45db03acf9cee4f83148c403d105b1a38a0f23 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 4 Jul 2024 15:45:25 +0200 Subject: dm: Remove max_secure_erase_granularity The max_secure_erase_granularity boolean of struct dm_target is used in __process_abnormal_io() but never set by any target. Remove this field and the dead code using it. Signed-off-by: Damien Le Moal Signed-off-by: Mikulas Patocka --- include/linux/device-mapper.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 5b7e96653ec6..4ba2e73993bd 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -363,12 +363,6 @@ struct dm_target { */ bool max_discard_granularity:1; - /* - * Set if this target requires that secure_erases be split on - * 'max_secure_erase_sectors' boundaries. - */ - bool max_secure_erase_granularity:1; - /* * Set if we need to limit the number of in-flight bios when swapping. */ -- cgit v1.2.3 From a21f9edb13b0d8066775cbd5efa7261e41871182 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Thu, 4 Jul 2024 16:17:15 +0200 Subject: dm: factor out helper function from dm_get_device Factor out a helper function, dm_devt_from_path(), from dm_get_device() for use in dm targets. Signed-off-by: Benjamin Marzinski Signed-off-by: Mikulas Patocka --- include/linux/device-mapper.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 4ba2e73993bd..384649a61bfa 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -179,6 +179,11 @@ int dm_get_device(struct dm_target *ti, const char *path, blk_mode_t mode, struct dm_dev **result); void dm_put_device(struct dm_target *ti, struct dm_dev *d); +/* + * Helper function for getting devices + */ +int dm_devt_from_path(const char *path, dev_t *dev_p); + /* * Information about a target type */ -- cgit v1.2.3 From 2bb6b10ebe5d42c119827b57ee8123175b7ecc3d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 9 Jul 2024 08:49:56 -0700 Subject: usb: gadget: Use u16 types for 16-bit fields Since the beginning of time, struct usb_ep::maxpacket was a bitfield, and when new 16-bit members were added, the convention was followed: 1da177e4c3f41 (Linus Torvalds 2005-04-16 236) unsigned maxpacket:16; e117e742d3106 (Robert Baldyga 2013-12-13 237) unsigned maxpacket_limit:16; a59d6b91cbca5 (Tatyana Brokhman 2011-06-28 238) unsigned max_streams:16; However, there is no need for this as a simple u16 can be used instead, simplifying the struct and the resulting compiler binary output. Switch to u16 for all three, and rearrange struct slightly to minimize holes. No change in the final size of the struct results; the 2 byte gap is just moved to the end, as seen with pahole: - /* XXX 2 bytes hole, try to pack */ ... /* size: 72, cachelines: 2, members: 15 */ ... + /* padding: 2 */ Changing this simplifies future introspection[1] of maxpacket's type during allocations: drivers/usb/gadget/function/f_tcm.c:330:24: error: 'typeof' applied to a bit-field 330 | fu->cmd.buf = kmalloc(fu->ep_out->maxpacket, GFP_KERNEL); Link: https://lore.kernel.org/all/202407090928.6UaOAZAJ-lkp@intel.com [1] Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20240709154953.work.953-kees@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/gadget.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 56dda8e1562d..df33333650a0 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -229,18 +229,18 @@ struct usb_ep { const char *name; const struct usb_ep_ops *ops; + const struct usb_endpoint_descriptor *desc; + const struct usb_ss_ep_comp_descriptor *comp_desc; struct list_head ep_list; struct usb_ep_caps caps; bool claimed; bool enabled; - unsigned maxpacket:16; - unsigned maxpacket_limit:16; - unsigned max_streams:16; unsigned mult:2; unsigned maxburst:5; u8 address; - const struct usb_endpoint_descriptor *desc; - const struct usb_ss_ep_comp_descriptor *comp_desc; + u16 maxpacket; + u16 maxpacket_limit; + u16 max_streams; }; /*-------------------------------------------------------------------------*/ -- cgit v1.2.3 From a5f81642a7228489292f842a106e33c558121e8b Mon Sep 17 00:00:00 2001 From: Kerem Karabay Date: Sat, 6 Jul 2024 12:03:23 +0000 Subject: USB: core: add 'shutdown' callback to usb_driver Currently there is no standardized method for USB drivers to handle shutdown events. This patch simplifies running code on shutdown for USB devices by adding a shutdown callback to usb_driver. Signed-off-by: Kerem Karabay Signed-off-by: Aditya Garg Link: https://lore.kernel.org/r/7AAC1BF4-8B60-448D-A3C1-B7E80330BE42@live.com Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/usb.h b/include/linux/usb.h index 1913a13833f2..832997a9da0a 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1171,6 +1171,7 @@ extern ssize_t usb_show_dynids(struct usb_dynids *dynids, char *buf); * post_reset method is called. * @post_reset: Called by usb_reset_device() after the device * has been reset + * @shutdown: Called at shut-down time to quiesce the device. * @id_table: USB drivers use ID table to support hotplugging. * Export this with MODULE_DEVICE_TABLE(usb,...). This must be set * or your driver's probe function will never get called. @@ -1222,6 +1223,8 @@ struct usb_driver { int (*pre_reset)(struct usb_interface *intf); int (*post_reset)(struct usb_interface *intf); + void (*shutdown)(struct usb_interface *intf); + const struct usb_device_id *id_table; const struct attribute_group **dev_groups; -- cgit v1.2.3 From b70f12e962bc73a091a7b853f24ae2049613c684 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 26 Apr 2024 08:44:51 +0200 Subject: kbuild: verify asm-generic header list In order to integrate the system call header generation with generating the asm-generic wrappers, restrict the generated headers to those that actually exist in include/asm-generic/. The path is already known, so add these as a dependency. The asm-generic/bugs.h header was removed in commit 61235b24b9cb ("init: Remove check_bugs() leftovers"), which now causes a build failure, so drop it from the list. Signed-off-by: Arnd Bergmann --- include/asm-generic/Kbuild | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index b20fa25a7e8d..df7ed81c4589 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -9,7 +9,6 @@ mandatory-y += archrandom.h mandatory-y += barrier.h mandatory-y += bitops.h mandatory-y += bug.h -mandatory-y += bugs.h mandatory-y += cacheflush.h mandatory-y += cfi.h mandatory-y += checksum.h -- cgit v1.2.3 From 505d66d1abfb90853e24ab6cbdf83b611473d6fc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 8 May 2024 17:13:34 +0200 Subject: clone3: drop __ARCH_WANT_SYS_CLONE3 macro When clone3() was introduced, it was not obvious how each architecture deals with setting up the stack and keeping the register contents in a fork()-like system call, so this was left for the architecture maintainers to implement, with __ARCH_WANT_SYS_CLONE3 defined by those that already implement it. Five years later, we still have a few architectures left that are missing clone3(), and the macro keeps getting in the way as it's fundamentally different from all the other __ARCH_WANT_SYS_* macros that are meant to provide backwards-compatibility with applications using older syscalls that are no longer provided by default. Address this by reversing the polarity of the macro, adding an __ARCH_BROKEN_SYS_CLONE3 macro to all architectures that don't already provide the syscall, and remove __ARCH_WANT_SYS_CLONE3 from all the other ones. Acked-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann --- include/uapi/asm-generic/unistd.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index d4cc26932ff4..5bf6148cac2b 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -776,12 +776,8 @@ __SYSCALL(__NR_fsmount, sys_fsmount) __SYSCALL(__NR_fspick, sys_fspick) #define __NR_pidfd_open 434 __SYSCALL(__NR_pidfd_open, sys_pidfd_open) - -#ifdef __ARCH_WANT_SYS_CLONE3 #define __NR_clone3 435 __SYSCALL(__NR_clone3, sys_clone3) -#endif - #define __NR_close_range 436 __SYSCALL(__NR_close_range, sys_close_range) #define __NR_openat2 437 -- cgit v1.2.3 From 5f67eef6dff39421215e9134f1eaae51b67a73b7 Mon Sep 17 00:00:00 2001 From: Vamsi Attunuru Date: Sat, 6 Jul 2024 08:30:09 -0700 Subject: misc: mrvl-cn10k-dpi: add Octeon CN10K DPI administrative driver Adds a misc driver for Marvell CN10K DPI(DMA Engine) device's physical function which initializes DPI DMA hardware's global configuration and enables hardware mailbox channels between physical function (PF) and it's virtual functions (VF). VF device drivers (User space drivers) use this hw mailbox to communicate any required device configuration on it's respective VF device. Accordingly, this DPI PF driver provisions the VF device resources. At the hardware level, the DPI physical function (PF) acts as a management interface to setup the VF device resources, VF devices are only provisioned to handle or control the actual DMA Engine's data transfer capabilities. Signed-off-by: Vamsi Attunuru Reviewed-by: Srujana Challa Link: https://lore.kernel.org/r/20240706153009.3775333-1-vattunuru@marvell.com Signed-off-by: Greg Kroah-Hartman --- include/uapi/misc/mrvl_cn10k_dpi.h | 39 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 include/uapi/misc/mrvl_cn10k_dpi.h (limited to 'include') diff --git a/include/uapi/misc/mrvl_cn10k_dpi.h b/include/uapi/misc/mrvl_cn10k_dpi.h new file mode 100644 index 000000000000..8db902eaa907 --- /dev/null +++ b/include/uapi/misc/mrvl_cn10k_dpi.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Marvell Octeon CN10K DPI driver + * + * Copyright (C) 2024 Marvell. + * + */ + +#ifndef __MRVL_CN10K_DPI_H__ +#define __MRVL_CN10K_DPI_H__ + +#include + +#define DPI_MAX_ENGINES 6 + +struct dpi_mps_mrrs_cfg { + __u16 max_read_req_sz; /* Max read request size */ + __u16 max_payload_sz; /* Max payload size */ + __u16 port; /* Ebus port */ + __u16 reserved; /* Reserved */ +}; + +struct dpi_engine_cfg { + __u64 fifo_mask; /* FIFO size mask in KBytes */ + __u16 molr[DPI_MAX_ENGINES]; /* Max outstanding load requests */ + __u16 update_molr; /* '1' to update engine MOLR */ + __u16 reserved; /* Reserved */ +}; + +/* DPI ioctl numbers */ +#define DPI_MAGIC_NUM 0xB8 + +/* Set MPS & MRRS parameters */ +#define DPI_MPS_MRRS_CFG _IOW(DPI_MAGIC_NUM, 1, struct dpi_mps_mrrs_cfg) + +/* Set Engine FIFO configuration */ +#define DPI_ENGINE_CFG _IOW(DPI_MAGIC_NUM, 2, struct dpi_engine_cfg) + +#endif /* __MRVL_CN10K_DPI_H__ */ -- cgit v1.2.3 From af46fe8c41de5b79358c3007e3854dda9c61c7dc Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 10 Jul 2024 09:44:52 +0200 Subject: dio: Have dio_bus_match() callback take a const * MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/dio/dio-driver.c:128:11: error: initialization of ‘int (*)(struct device *, const struct device_driver *)’ from incompatible pointer type ‘int (*)(struct device *, struct device_driver *)’ [-Werror=incompatible-pointer-types] 128 | .match = dio_bus_match, | ^~~~~~~~~~~~~ drivers/dio/dio-driver.c:128:11: note: (near initialization for ‘dio_bus_type.match’) Reported-by: noreply@ellerman.id.au Fixes: d69d804845985c29 ("driver core: have match() callback in struct bus_type take a const *") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240710074452.2841173-1-geert@linux-m68k.org [ added dio.h change - gregkh ] Signed-off-by: Greg Kroah-Hartman --- include/linux/dio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dio.h b/include/linux/dio.h index 2b5923909f96..464331c4c4a7 100644 --- a/include/linux/dio.h +++ b/include/linux/dio.h @@ -93,7 +93,7 @@ struct dio_driver { struct device_driver driver; }; -#define to_dio_driver(drv) container_of(drv, struct dio_driver, driver) +#define to_dio_driver(drv) container_of_const(drv, struct dio_driver, driver) /* DIO/DIO-II boards all have the following 8bit registers. * These are offsets from the base of the device. -- cgit v1.2.3 From 29f1c1ae6d2fff3bf4f89d265f4a1a7c8ab78a8e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 29 Jun 2024 22:12:09 -0400 Subject: closures: fix closure_sync + closure debugging originally, stack closures were only used synchronously, and with the original implementation of closure_sync() the ref never hit 0; thus, closure_put_after_sub() assumes that if the ref hits 0 it's on the debug list, in debug mode. that's no longer true with the current implementation of closure_sync, so we need a new magic so closure_debug_destroy() doesn't pop an assert. Signed-off-by: Kent Overstreet --- include/linux/closure.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/closure.h b/include/linux/closure.h index 59b8c06b11ff..2af44427107d 100644 --- a/include/linux/closure.h +++ b/include/linux/closure.h @@ -159,6 +159,7 @@ struct closure { #ifdef CONFIG_DEBUG_CLOSURES #define CLOSURE_MAGIC_DEAD 0xc054dead #define CLOSURE_MAGIC_ALIVE 0xc054a11e +#define CLOSURE_MAGIC_STACK 0xc05451cc unsigned int magic; struct list_head all; @@ -323,12 +324,18 @@ static inline void closure_init_stack(struct closure *cl) { memset(cl, 0, sizeof(struct closure)); atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); +#ifdef CONFIG_DEBUG_CLOSURES + cl->magic = CLOSURE_MAGIC_STACK; +#endif } static inline void closure_init_stack_release(struct closure *cl) { memset(cl, 0, sizeof(struct closure)); atomic_set_release(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); +#ifdef CONFIG_DEBUG_CLOSURES + cl->magic = CLOSURE_MAGIC_STACK; +#endif } /** -- cgit v1.2.3 From 33c651a3fba9a3d380cb0e35ea207e048d39bed9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 7 Jun 2024 18:00:13 +0200 Subject: pwm: Make use of a symbol namespace for the core MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define all pwm core's symbols in the namespace "PWM". The necessary module import statement is just added to the main header, this way every file that knows about the public functions automatically has this namespace available. Thanks to Biju Das for pointing out a cut'n'paste failure in my initial patch. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20240607160012.1206874-2-u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- include/linux/pwm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 60b92c2c75ef..812c550de60c 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -4,9 +4,12 @@ #include #include +#include #include #include +MODULE_IMPORT_NS(PWM); + struct pwm_chip; /** -- cgit v1.2.3 From d6f66e292676db976c4d42df2631427236c36fdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 7 Jun 2024 10:44:17 +0200 Subject: pwm: Make pwm_request_from_chip() private to the core MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The last user of this function outside of core.c is gone, so it can be made static. Signed-off-by: Uwe Kleine-König Reviewed-by: Tzung-Bi Shih Link: https://lore.kernel.org/r/20240607084416.897777-8-u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- include/linux/pwm.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 812c550de60c..2e225f3b4282 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -410,10 +410,6 @@ void pwmchip_remove(struct pwm_chip *chip); int __devm_pwmchip_add(struct device *dev, struct pwm_chip *chip, struct module *owner); #define devm_pwmchip_add(dev, chip) __devm_pwmchip_add(dev, chip, THIS_MODULE) -struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip, - unsigned int index, - const char *label); - struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *chip, const struct of_phandle_args *args); struct pwm_device *of_pwm_single_xlate(struct pwm_chip *chip, @@ -508,14 +504,6 @@ static inline int devm_pwmchip_add(struct device *dev, struct pwm_chip *chip) return -EINVAL; } -static inline struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip, - unsigned int index, - const char *label) -{ - might_sleep(); - return ERR_PTR(-ENODEV); -} - static inline struct pwm_device *pwm_get(struct device *dev, const char *consumer) { -- cgit v1.2.3 From a96d3659c9437673dbef5c05cba31a3c0b5a0a7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 7 Jun 2024 12:42:31 +0200 Subject: pwm: Remove wrong implementation details from pwm_ops's documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When .get_state() is called is an implementation detail that implementors and users shouldn't care about and rely on. Additionally it's wrong, because with PWM_DEBUG enabled it is called more often. Just drop the wrong statement. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/611ba758d7e9fb2695e96b23cb7ceeefb6ba8513.1717756902.git.u.kleine-koenig@baylibre.com Signed-off-by: Uwe Kleine-König --- include/linux/pwm.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 2e225f3b4282..75ad0d2fd949 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -252,9 +252,7 @@ struct pwm_capture { * @free: optional hook for freeing a PWM * @capture: capture and report PWM signal * @apply: atomically apply a new PWM config - * @get_state: get the current PWM state. This function is only - * called once per PWM device when the PWM chip is - * registered. + * @get_state: get the current PWM state. */ struct pwm_ops { int (*request)(struct pwm_chip *chip, struct pwm_device *pwm); -- cgit v1.2.3 From da804fa9bc718e4210ec27cc0457ecc1eb073a14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 14 Jun 2024 17:39:00 +0200 Subject: pwm: Drop pwm_apply_state() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function is not supposed to be used any more since commit c748a6d77c06 ("pwm: Rename pwm_apply_state() to pwm_apply_might_sleep()") that is included in v6.8-rc1. Two kernel releases should be enough for everyone to adapt, so drop the old function that was introduced as a compatibility stub for the transition. Signed-off-by: Uwe Kleine-König --- include/linux/pwm.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 75ad0d2fd949..f8c2dc12dbd3 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -563,13 +563,6 @@ static inline void pwm_apply_args(struct pwm_device *pwm) pwm_apply_might_sleep(pwm, &state); } -/* only for backwards-compatibility, new code should not use this */ -static inline int pwm_apply_state(struct pwm_device *pwm, - const struct pwm_state *state) -{ - return pwm_apply_might_sleep(pwm, state); -} - struct pwm_lookup { struct list_head list; const char *provider; -- cgit v1.2.3 From 2cb13dec8c5e5e104fd2f71c2dee761d6ed9a333 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 20 Jun 2024 15:53:34 +0200 Subject: cache: add __cacheline_group_{begin, end}_aligned() (+ couple more) __cacheline_group_begin(), unfortunately, doesn't align the group anyhow. If it is wanted, then you need to do something like __cacheline_group_begin(grp) __aligned(ALIGN) which isn't really convenient nor compact. Add the _aligned() counterparts to align the groups automatically to either the specified alignment (optional) or ``SMP_CACHE_BYTES``. Note that the actual struct layout will then be (on x64 with 64-byte CL): struct x { u32 y; // offset 0, size 4, padding 56 __cacheline_group_begin__grp; // offset 64, size 0 u32 z; // offset 64, size 4, padding 4 __cacheline_group_end__grp; // offset 72, size 0 __cacheline_group_pad__grp; // offset 72, size 0, padding 56 u32 w; // offset 128 }; The end marker is aligned to long, so that you can assert the struct size more strictly, but the offset of the next field in the structure will be aligned to the group alignment, so that the next field won't fall into the group it's not intended to. Add __LARGEST_ALIGN definition and LARGEST_ALIGN() macro. __LARGEST_ALIGN is the value to which the compilers align fields when __aligned_largest is specified. Sometimes, it might be needed to get this value outside of variable definitions. LARGEST_ALIGN() is macro which just aligns a value to __LARGEST_ALIGN. Also add SMP_CACHE_ALIGN(), similar to L1_CACHE_ALIGN(), but using ``SMP_CACHE_BYTES`` instead of ``L1_CACHE_BYTES`` as the former also accounts L2, needed in some cases. Signed-off-by: Alexander Lobakin Reviewed-by: Przemek Kitszel Signed-off-by: Tony Nguyen --- include/linux/cache.h | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'include') diff --git a/include/linux/cache.h b/include/linux/cache.h index 0ecb17bb6883..ca2a05682a54 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -13,6 +13,32 @@ #define SMP_CACHE_BYTES L1_CACHE_BYTES #endif +/** + * SMP_CACHE_ALIGN - align a value to the L2 cacheline size + * @x: value to align + * + * On some architectures, L2 ("SMP") CL size is bigger than L1, and sometimes, + * this needs to be accounted. + * + * Return: aligned value. + */ +#ifndef SMP_CACHE_ALIGN +#define SMP_CACHE_ALIGN(x) ALIGN(x, SMP_CACHE_BYTES) +#endif + +/* + * ``__aligned_largest`` aligns a field to the value most optimal for the + * target architecture to perform memory operations. Get the actual value + * to be able to use it anywhere else. + */ +#ifndef __LARGEST_ALIGN +#define __LARGEST_ALIGN sizeof(struct { long x; } __aligned_largest) +#endif + +#ifndef LARGEST_ALIGN +#define LARGEST_ALIGN(x) ALIGN(x, __LARGEST_ALIGN) +#endif + /* * __read_mostly is used to keep rarely changing variables out of frequently * updated cachelines. Its use should be reserved for data that is used @@ -95,6 +121,39 @@ __u8 __cacheline_group_end__##GROUP[0] #endif +/** + * __cacheline_group_begin_aligned - declare an aligned group start + * @GROUP: name of the group + * @...: optional group alignment + * + * The following block inside a struct: + * + * __cacheline_group_begin_aligned(grp); + * field a; + * field b; + * __cacheline_group_end_aligned(grp); + * + * will always be aligned to either the specified alignment or + * ``SMP_CACHE_BYTES``. + */ +#define __cacheline_group_begin_aligned(GROUP, ...) \ + __cacheline_group_begin(GROUP) \ + __aligned((__VA_ARGS__ + 0) ? : SMP_CACHE_BYTES) + +/** + * __cacheline_group_end_aligned - declare an aligned group end + * @GROUP: name of the group + * @...: optional alignment (same as was in __cacheline_group_begin_aligned()) + * + * Note that the end marker is aligned to sizeof(long) to allow more precise + * size assertion. It also declares a padding at the end to avoid next field + * falling into this cacheline. + */ +#define __cacheline_group_end_aligned(GROUP, ...) \ + __cacheline_group_end(GROUP) __aligned(sizeof(long)); \ + struct { } __cacheline_group_pad__##GROUP \ + __aligned((__VA_ARGS__ + 0) ? : SMP_CACHE_BYTES) + #ifndef CACHELINE_ASSERT_GROUP_MEMBER #define CACHELINE_ASSERT_GROUP_MEMBER(TYPE, GROUP, MEMBER) \ BUILD_BUG_ON(!(offsetof(TYPE, MEMBER) >= \ -- cgit v1.2.3 From 39daa09d34ada1bc7227d68def63e0a2105b5496 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 20 Jun 2024 15:53:35 +0200 Subject: page_pool: use __cacheline_group_{begin, end}_aligned() Instead of doing __cacheline_group_begin() __aligned(), use the new __cacheline_group_{begin,end}_aligned(), so that it will take care of the group alignment itself. Also replace open-coded `4 * sizeof(long)` in two places with a definition. Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- include/net/page_pool/types.h | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index b70bcc14ceda..50569fed7868 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -129,6 +129,16 @@ struct page_pool_stats { }; #endif +/* The whole frag API block must stay within one cacheline. On 32-bit systems, + * sizeof(long) == sizeof(int), so that the block size is ``3 * sizeof(long)``. + * On 64-bit systems, the actual size is ``2 * sizeof(long) + sizeof(int)``. + * The closest pow-2 to both of them is ``4 * sizeof(long)``, so just use that + * one for simplicity. + * Having it aligned to a cacheline boundary may be excessive and doesn't bring + * any good. + */ +#define PAGE_POOL_FRAG_GROUP_ALIGN (4 * sizeof(long)) + struct page_pool { struct page_pool_params_fast p; @@ -142,19 +152,11 @@ struct page_pool { bool system:1; /* This is a global percpu pool */ #endif - /* The following block must stay within one cacheline. On 32-bit - * systems, sizeof(long) == sizeof(int), so that the block size is - * ``3 * sizeof(long)``. On 64-bit systems, the actual size is - * ``2 * sizeof(long) + sizeof(int)``. The closest pow-2 to both of - * them is ``4 * sizeof(long)``, so just use that one for simplicity. - * Having it aligned to a cacheline boundary may be excessive and - * doesn't bring any good. - */ - __cacheline_group_begin(frag) __aligned(4 * sizeof(long)); + __cacheline_group_begin_aligned(frag, PAGE_POOL_FRAG_GROUP_ALIGN); long frag_users; netmem_ref frag_page; unsigned int frag_offset; - __cacheline_group_end(frag); + __cacheline_group_end_aligned(frag, PAGE_POOL_FRAG_GROUP_ALIGN); struct delayed_work release_dw; void (*disconnect)(void *pool); -- cgit v1.2.3 From 62c884256ea1f898b00f20729a306a4eeb1840b1 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 20 Jun 2024 15:53:36 +0200 Subject: libeth: add cacheline / struct layout assertion helpers Add helpers to assert struct field layout, a bit more crazy and networking-specific than in . They assume you have 3 CL-aligned groups (read-mostly, read-write, cold) in a struct you want to assert, and nothing besides them. For 64-bit with 64-byte cachelines, the assertions are as strict as possible, as the size can then be easily predicted. For the rest, make sure they don't cross the specified bound. Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- include/net/libeth/cache.h | 66 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 include/net/libeth/cache.h (limited to 'include') diff --git a/include/net/libeth/cache.h b/include/net/libeth/cache.h new file mode 100644 index 000000000000..bdb0c043ce61 --- /dev/null +++ b/include/net/libeth/cache.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2024 Intel Corporation */ + +#ifndef __LIBETH_CACHE_H +#define __LIBETH_CACHE_H + +#include + +/** + * libeth_cacheline_group_assert - make sure cacheline group size is expected + * @type: type of the structure containing the group + * @grp: group name inside the struct + * @sz: expected group size + */ +#if defined(CONFIG_64BIT) && SMP_CACHE_BYTES == 64 +#define libeth_cacheline_group_assert(type, grp, sz) \ + static_assert(offsetof(type, __cacheline_group_end__##grp) - \ + offsetofend(type, __cacheline_group_begin__##grp) == \ + (sz)) +#define __libeth_cacheline_struct_assert(type, sz) \ + static_assert(sizeof(type) == (sz)) +#else /* !CONFIG_64BIT || SMP_CACHE_BYTES != 64 */ +#define libeth_cacheline_group_assert(type, grp, sz) \ + static_assert(offsetof(type, __cacheline_group_end__##grp) - \ + offsetofend(type, __cacheline_group_begin__##grp) <= \ + (sz)) +#define __libeth_cacheline_struct_assert(type, sz) \ + static_assert(sizeof(type) <= (sz)) +#endif /* !CONFIG_64BIT || SMP_CACHE_BYTES != 64 */ + +#define __libeth_cls1(sz1) SMP_CACHE_ALIGN(sz1) +#define __libeth_cls2(sz1, sz2) (SMP_CACHE_ALIGN(sz1) + SMP_CACHE_ALIGN(sz2)) +#define __libeth_cls3(sz1, sz2, sz3) \ + (SMP_CACHE_ALIGN(sz1) + SMP_CACHE_ALIGN(sz2) + SMP_CACHE_ALIGN(sz3)) +#define __libeth_cls(...) \ + CONCATENATE(__libeth_cls, COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__) + +/** + * libeth_cacheline_struct_assert - make sure CL-based struct size is expected + * @type: type of the struct + * @...: from 1 to 3 CL group sizes (read-mostly, read-write, cold) + * + * When a struct contains several CL groups, it's difficult to predict its size + * on different architectures. The macro instead takes sizes of all of the + * groups the structure contains and generates the final struct size. + */ +#define libeth_cacheline_struct_assert(type, ...) \ + __libeth_cacheline_struct_assert(type, __libeth_cls(__VA_ARGS__)); \ + static_assert(__alignof(type) >= SMP_CACHE_BYTES) + +/** + * libeth_cacheline_set_assert - make sure CL-based struct layout is expected + * @type: type of the struct + * @ro: expected size of the read-mostly group + * @rw: expected size of the read-write group + * @c: expected size of the cold group + * + * Check that each group size is expected and then do final struct size check. + */ +#define libeth_cacheline_set_assert(type, ro, rw, c) \ + libeth_cacheline_group_assert(type, read_mostly, ro); \ + libeth_cacheline_group_assert(type, read_write, rw); \ + libeth_cacheline_group_assert(type, cold, c); \ + libeth_cacheline_struct_assert(type, ro, rw, c) + +#endif /* __LIBETH_CACHE_H */ -- cgit v1.2.3 From 584e86e14c59d36688633002613792923620d8c0 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 10 Jul 2024 11:36:38 +0100 Subject: firmware: cs_dsp: Make wmfw and bin filename arguments const char * The wmfw_filename and bin_filename strings passed into cs_dsp_power_up() and cs_dsp_adsp1_power_up() should be const char *. Signed-off-by: Richard Fitzgerald Reviewed-by: Charles Keepax Link: https://lore.kernel.org/r/20240710103640.78197-3-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/linux/firmware/cirrus/cs_dsp.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h index 82687e07a7c2..8078dc377948 100644 --- a/include/linux/firmware/cirrus/cs_dsp.h +++ b/include/linux/firmware/cirrus/cs_dsp.h @@ -213,13 +213,13 @@ int cs_dsp_adsp2_init(struct cs_dsp *dsp); int cs_dsp_halo_init(struct cs_dsp *dsp); int cs_dsp_adsp1_power_up(struct cs_dsp *dsp, - const struct firmware *wmfw_firmware, char *wmfw_filename, - const struct firmware *coeff_firmware, char *coeff_filename, + const struct firmware *wmfw_firmware, const char *wmfw_filename, + const struct firmware *coeff_firmware, const char *coeff_filename, const char *fw_name); void cs_dsp_adsp1_power_down(struct cs_dsp *dsp); int cs_dsp_power_up(struct cs_dsp *dsp, - const struct firmware *wmfw_firmware, char *wmfw_filename, - const struct firmware *coeff_firmware, char *coeff_filename, + const struct firmware *wmfw_firmware, const char *wmfw_filename, + const struct firmware *coeff_firmware, const char *coeff_filename, const char *fw_name); void cs_dsp_power_down(struct cs_dsp *dsp); int cs_dsp_run(struct cs_dsp *dsp); -- cgit v1.2.3 From dc0e5ca8856dc6a97e3b117879dfb2b52bda06b6 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 10 Jul 2024 11:36:40 +0100 Subject: firmware: cs_dsp: Rename fw_ver to wmfw_ver Rename the confusingly named struct member fw_ver to wmfw_ver. It contains the wmfw format version of the loaded wmfw file. This commit also contains an update to wm_adsp for the new name. Signed-off-by: Richard Fitzgerald Reviewed-by: Charles Keepax Link: https://lore.kernel.org/r/20240710103640.78197-5-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- include/linux/firmware/cirrus/cs_dsp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h index 8078dc377948..2e649810169f 100644 --- a/include/linux/firmware/cirrus/cs_dsp.h +++ b/include/linux/firmware/cirrus/cs_dsp.h @@ -167,7 +167,7 @@ struct cs_dsp { const struct cs_dsp_region *mem; int num_mems; - int fw_ver; + int wmfw_ver; bool booted; bool running; -- cgit v1.2.3 From 3c1ff93b4deea502cd8b0869839557cab2a28b71 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 9 Jul 2024 18:56:20 -0700 Subject: regmap: Implement regmap_multi_reg_read() regmap_multi_reg_read() is similar to regmap_bilk_read() but reads from an array of non-sequential registers. Signed-off-by: Guenter Roeck Link: https://lore.kernel.org/r/20240710015622.1960522-2-linux@roeck-us.net Signed-off-by: Mark Brown --- include/linux/regmap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index a6bc2980a98b..9c1ddbf82719 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1237,6 +1237,8 @@ int regmap_noinc_read(struct regmap *map, unsigned int reg, void *val, size_t val_len); int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val, size_t val_count); +int regmap_multi_reg_read(struct regmap *map, unsigned int *reg, void *val, + size_t val_count); int regmap_update_bits_base(struct regmap *map, unsigned int reg, unsigned int mask, unsigned int val, bool *change, bool async, bool force); -- cgit v1.2.3 From 5aaac1aece4e4db9d620791a33f7a4173c660e65 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Thu, 20 Jun 2024 15:53:45 +0200 Subject: libeth: support different types of buffers for Rx Unlike previous generations, idpf requires more buffer types for optimal performance. This includes: header buffers, short buffers, and no-overhead buffers (w/o headroom and tailroom, for TCP zerocopy when the header split is enabled). Introduce libeth Rx buffer type and calculate page_pool params accordingly. All the HW-related details like buffer alignment are still accounted. For the header buffers, pick 256 bytes as in most places in the kernel (have you ever seen frames with bigger headers?). Reviewed-by: Przemek Kitszel Signed-off-by: Alexander Lobakin Signed-off-by: Tony Nguyen --- include/net/libeth/rx.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/net/libeth/rx.h b/include/net/libeth/rx.h index f29ea3e34c6c..43574bd6612f 100644 --- a/include/net/libeth/rx.h +++ b/include/net/libeth/rx.h @@ -17,6 +17,8 @@ #define LIBETH_MAX_HEADROOM LIBETH_SKB_HEADROOM /* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */ #define LIBETH_RX_LL_LEN (ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN) +/* Maximum supported L2-L4 header length */ +#define LIBETH_MAX_HEAD roundup_pow_of_two(max(MAX_HEADER, 256)) /* Always use order-0 pages */ #define LIBETH_RX_PAGE_ORDER 0 @@ -43,6 +45,18 @@ struct libeth_fqe { u32 truesize; } __aligned_largest; +/** + * enum libeth_fqe_type - enum representing types of Rx buffers + * @LIBETH_FQE_MTU: buffer size is determined by MTU + * @LIBETH_FQE_SHORT: buffer size is smaller than MTU, for short frames + * @LIBETH_FQE_HDR: buffer size is ```LIBETH_MAX_HEAD```-sized, for headers + */ +enum libeth_fqe_type { + LIBETH_FQE_MTU = 0U, + LIBETH_FQE_SHORT, + LIBETH_FQE_HDR, +}; + /** * struct libeth_fq - structure representing a buffer (fill) queue * @fp: hotpath part of the structure @@ -50,6 +64,8 @@ struct libeth_fqe { * @fqes: array of Rx buffers * @truesize: size to allocate per buffer, w/overhead * @count: number of descriptors/buffers the queue has + * @type: type of the buffers this queue has + * @hsplit: flag whether header split is enabled * @buf_len: HW-writeable length per each buffer * @nid: ID of the closest NUMA node with memory */ @@ -63,6 +79,9 @@ struct libeth_fq { ); /* Cold fields */ + enum libeth_fqe_type type:2; + bool hsplit:1; + u32 buf_len; int nid; }; -- cgit v1.2.3 From 9fa001cf3bb0598aad09d15b2896f7db9ef87637 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 1 Jul 2024 18:59:31 +0000 Subject: mm: memcg: drop obsolete cache line padding in struct mem_cgroup After the grouping of the cgroup v1-related fields and the corresponding reorganization of the struct mem_cgroup, the existing cache line padding doesn't make much sense anymore. Let's drop it for now and put back to new places, if necessary. Link: https://lkml.kernel.org/r/20240701185932.704807-1-roman.gushchin@linux.dev Signed-off-by: Roman Gushchin Suggested-by: Shakeel Butt Acked-by: Shakeel Butt Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d0c9365ff039..8b5b3ddeba05 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -220,8 +220,6 @@ struct mem_cgroup { /* handle for "memory.swap.events" */ struct cgroup_file swap_events_file; - CACHELINE_PADDING(_pad1_); - /* memory.stat */ struct memcg_vmstats *vmstats; @@ -305,8 +303,6 @@ struct mem_cgroup { bool tcpmem_active; int tcpmem_pressure; - CACHELINE_PADDING(_pad2_); - /* * set > 0 if pages under this cgroup are moving to other cgroup. */ -- cgit v1.2.3 From 6df13230b612af81ce04f20bb37a02e58ef71925 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 1 Jul 2024 18:59:32 +0000 Subject: mm: memcg: add cache line padding to mem_cgroup_per_node Memcg v1-specific fields serve a buffer function between read-mostly and update often parts of the mem_cgroup_per_node structure. If CONFIG_MEMCG_V1 is not set and these fields are not present, an explicit cacheline padding is needed. Link: https://lkml.kernel.org/r/20240701185932.704807-2-roman.gushchin@linux.dev Signed-off-by: Roman Gushchin Suggested-by: Shakeel Butt Acked-by: Shakeel Butt Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8b5b3ddeba05..60418934827c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -95,14 +95,16 @@ struct mem_cgroup_per_node { #ifdef CONFIG_MEMCG_V1 /* * Memcg-v1 only stuff in middle as buffer between read mostly fields - * and update often fields to avoid false sharing. Once v1 stuff is - * moved in a separate struct, an explicit padding is needed. + * and update often fields to avoid false sharing. If v1 stuff is + * not present, an explicit padding is needed. */ struct rb_node tree_node; /* RB tree node */ unsigned long usage_in_excess;/* Set to the value by which */ /* the soft limit is exceeded*/ bool on_tree; +#else + CACHELINE_PADDING(_pad1_); #endif /* Fields which get updated often at the end. */ -- cgit v1.2.3 From 3a3b7fec3974f954600844e41d773c00857ef48a Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 1 Jul 2024 11:31:15 -0400 Subject: mm: remove CONFIG_MEMCG_KMEM CONFIG_MEMCG_KMEM used to be a user-visible option for whether slab tracking is enabled. It has been default-enabled and equivalent to CONFIG_MEMCG for almost a decade. We've only grown more kernel memory accounting sites since, and there is no imaginable cgroup usecase going forward that wants to track user pages but not the multitude of user-drivable kernel allocations. Link: https://lkml.kernel.org/r/20240701153148.452230-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Roman Gushchin Acked-by: Michal Hocko Acked-by: Shakeel Butt Acked-by: David Hildenbrand Cc: Muchun Song Signed-off-by: Andrew Morton --- include/linux/bpf.h | 4 ++-- include/linux/list_lru.h | 2 +- include/linux/memcontrol.h | 22 +++++----------------- include/linux/sched.h | 3 +-- include/linux/slab.h | 12 ++++++------ include/trace/events/kmem.h | 4 ++-- 6 files changed, 17 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5e694a308081..b8637555c9c2 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -275,7 +275,7 @@ struct bpf_map { u32 btf_value_type_id; u32 btf_vmlinux_value_type_id; struct btf *btf; -#ifdef CONFIG_MEMCG_KMEM +#ifdef CONFIG_MEMCG struct obj_cgroup *objcg; #endif char name[BPF_OBJ_NAME_LEN]; @@ -2252,7 +2252,7 @@ struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid, unsigned long nr_pages, struct page **page_array); -#ifdef CONFIG_MEMCG_KMEM +#ifdef CONFIG_MEMCG void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, int node); void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags); diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h index 792b67ceb631..5099a8ccd5f4 100644 --- a/include/linux/list_lru.h +++ b/include/linux/list_lru.h @@ -50,7 +50,7 @@ struct list_lru_node { struct list_lru { struct list_lru_node *node; -#ifdef CONFIG_MEMCG_KMEM +#ifdef CONFIG_MEMCG struct list_head list; int shrinker_id; bool memcg_aware; diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 60418934827c..7e2eb091049a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -195,7 +195,7 @@ struct mem_cgroup { /* Range enforcement for interrupt charges */ struct work_struct high_work; -#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP) +#ifdef CONFIG_ZSWAP unsigned long zswap_max; /* @@ -236,7 +236,6 @@ struct mem_cgroup { */ unsigned long socket_pressure; -#ifdef CONFIG_MEMCG_KMEM int kmemcg_id; /* * memcg->objcg is wiped out as a part of the objcg repaprenting @@ -247,7 +246,6 @@ struct mem_cgroup { struct obj_cgroup *orig_objcg; /* list of inherited objcgs, protected by objcg_lock */ struct list_head objcg_list; -#endif struct memcg_vmstats_percpu __percpu *vmstats_percpu; @@ -532,7 +530,6 @@ retry: return memcg; } -#ifdef CONFIG_MEMCG_KMEM /* * folio_memcg_kmem - Check if the folio has the memcg_kmem flag set. * @folio: Pointer to the folio. @@ -548,15 +545,6 @@ static inline bool folio_memcg_kmem(struct folio *folio) return folio->memcg_data & MEMCG_DATA_KMEM; } - -#else -static inline bool folio_memcg_kmem(struct folio *folio) -{ - return false; -} - -#endif - static inline bool PageMemcgKmem(struct page *page) { return folio_memcg_kmem(page_folio(page)); @@ -1488,7 +1476,7 @@ static inline void split_page_memcg(struct page *head, int old_order, int new_or * if MEMCG_DATA_OBJEXTS is set. */ struct slabobj_ext { -#ifdef CONFIG_MEMCG_KMEM +#ifdef CONFIG_MEMCG struct obj_cgroup *objcg; #endif #ifdef CONFIG_MEM_ALLOC_PROFILING @@ -1663,7 +1651,7 @@ static inline void set_shrinker_bit(struct mem_cgroup *memcg, } #endif -#ifdef CONFIG_MEMCG_KMEM +#ifdef CONFIG_MEMCG bool mem_cgroup_kmem_disabled(void); int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order); void __memcg_kmem_uncharge_page(struct page *page, int order); @@ -1806,9 +1794,9 @@ static inline void count_objcg_event(struct obj_cgroup *objcg, { } -#endif /* CONFIG_MEMCG_KMEM */ +#endif /* CONFIG_MEMCG */ -#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP) +#if defined(CONFIG_MEMCG) && defined(CONFIG_ZSWAP) bool obj_cgroup_may_zswap(struct obj_cgroup *objcg); void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size); void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size); diff --git a/include/linux/sched.h b/include/linux/sched.h index a7770c566c4d..82da65131a6b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1457,9 +1457,8 @@ struct task_struct { /* Used by memcontrol for targeted memcg charge: */ struct mem_cgroup *active_memcg; -#endif -#ifdef CONFIG_MEMCG_KMEM + /* Cache for current->cgroups->memcg->objcg lookups: */ struct obj_cgroup *objcg; #endif diff --git a/include/linux/slab.h b/include/linux/slab.h index 7247e217e21b..a332dd2fa6cd 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -41,7 +41,7 @@ enum _slab_flag_bits { #ifdef CONFIG_FAILSLAB _SLAB_FAILSLAB, #endif -#ifdef CONFIG_MEMCG_KMEM +#ifdef CONFIG_MEMCG _SLAB_ACCOUNT, #endif #ifdef CONFIG_KASAN_GENERIC @@ -171,7 +171,7 @@ enum _slab_flag_bits { # define SLAB_FAILSLAB __SLAB_FLAG_UNUSED #endif /* Account to memcg */ -#ifdef CONFIG_MEMCG_KMEM +#ifdef CONFIG_MEMCG # define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT) #else # define SLAB_ACCOUNT __SLAB_FLAG_UNUSED @@ -407,7 +407,7 @@ enum kmalloc_cache_type { #ifndef CONFIG_ZONE_DMA KMALLOC_DMA = KMALLOC_NORMAL, #endif -#ifndef CONFIG_MEMCG_KMEM +#ifndef CONFIG_MEMCG KMALLOC_CGROUP = KMALLOC_NORMAL, #endif KMALLOC_RANDOM_START = KMALLOC_NORMAL, @@ -420,7 +420,7 @@ enum kmalloc_cache_type { #ifdef CONFIG_ZONE_DMA KMALLOC_DMA, #endif -#ifdef CONFIG_MEMCG_KMEM +#ifdef CONFIG_MEMCG KMALLOC_CGROUP, #endif NR_KMALLOC_TYPES @@ -435,7 +435,7 @@ kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1]; #define KMALLOC_NOT_NORMAL_BITS \ (__GFP_RECLAIMABLE | \ (IS_ENABLED(CONFIG_ZONE_DMA) ? __GFP_DMA : 0) | \ - (IS_ENABLED(CONFIG_MEMCG_KMEM) ? __GFP_ACCOUNT : 0)) + (IS_ENABLED(CONFIG_MEMCG) ? __GFP_ACCOUNT : 0)) extern unsigned long random_kmalloc_seed; @@ -463,7 +463,7 @@ static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags, unsigne */ if (IS_ENABLED(CONFIG_ZONE_DMA) && (flags & __GFP_DMA)) return KMALLOC_DMA; - if (!IS_ENABLED(CONFIG_MEMCG_KMEM) || (flags & __GFP_RECLAIMABLE)) + if (!IS_ENABLED(CONFIG_MEMCG) || (flags & __GFP_RECLAIMABLE)) return KMALLOC_RECLAIM; else return KMALLOC_CGROUP; diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 8a829e0f6e55..b37eb0a7060f 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -36,7 +36,7 @@ TRACE_EVENT(kmem_cache_alloc, __entry->bytes_alloc = s->size; __entry->gfp_flags = (__force unsigned long)gfp_flags; __entry->node = node; - __entry->accounted = IS_ENABLED(CONFIG_MEMCG_KMEM) ? + __entry->accounted = IS_ENABLED(CONFIG_MEMCG) ? ((gfp_flags & __GFP_ACCOUNT) || (s->flags & SLAB_ACCOUNT)) : false; ), @@ -87,7 +87,7 @@ TRACE_EVENT(kmalloc, __entry->bytes_alloc, show_gfp_flags(__entry->gfp_flags), __entry->node, - (IS_ENABLED(CONFIG_MEMCG_KMEM) && + (IS_ENABLED(CONFIG_MEMCG) && (__entry->gfp_flags & (__force unsigned long)__GFP_ACCOUNT)) ? "true" : "false") ); -- cgit v1.2.3 From 259043e3b730e0aa6408bff27af7edf7a5c9101c Mon Sep 17 00:00:00 2001 From: Barry Song Date: Sun, 30 Jun 2024 11:22:31 +1200 Subject: mm: zswap: fix zswap_never_enabled() for CONFIG_ZSWAP==N If CONFIG_ZSWAP is set to N, it means zswap cannot be enabled. zswap_never_enabled() should return true. The only effect of this issue is that with Barry's latest large folio swapin patches for zram ("mm: support mTHP swap-in for zRAM-like swapfile"), we will always fallback to order-0 swapin, even mistakenly when !CONFIG_ZSWAP. Basically this bug makes Barry's in progress patches not work at all. The API was created to inform the mm core that zswap has never been enabled, allowing the mm core to perform mTHP swap-in. This is a transitional solution until zswap supports mTHP. If zswap has been enabled, performing mTHP swap-in will result in corrupted data. You may find the answer in the mTHP swap-in series: https://lore.kernel.org/linux-mm/CAJD7tkZ4FQr6HZpduOdvmqgg_-whuZYE-Bz5O2t6yzw6Yg+v1A@mail.gmail.com/ Link: https://lkml.kernel.org/r/20240629232231.42394-1-21cnbao@gmail.com Fixes: 0300e17d67c3 ("mm: zswap: add zswap_never_enabled()") Signed-off-by: Barry Song Reviewed-by: Chengming Zhou Acked-by: Yosry Ahmed Acked-by: Chris Li Acked-by: David Hildenbrand Reviewed-by: Nhat Pham Cc: Johannes Weiner Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- include/linux/zswap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/zswap.h b/include/linux/zswap.h index bf83ae5e285d..6cecb4a4f68b 100644 --- a/include/linux/zswap.h +++ b/include/linux/zswap.h @@ -68,7 +68,7 @@ static inline bool zswap_is_enabled(void) static inline bool zswap_never_enabled(void) { - return false; + return true; } #endif -- cgit v1.2.3 From a43fe27d650375cd9e5ea915c538f6f9eabd185e Mon Sep 17 00:00:00 2001 From: Xiao Wang Date: Fri, 21 Jun 2024 13:47:07 +0800 Subject: riscv: Optimize crc32 with Zbc extension As suggested by the B-ext spec, the Zbc (carry-less multiplication) instructions can be used to accelerate CRC calculations. Currently, the crc32 is the most widely used crc function inside kernel, so this patch focuses on the optimization of just the crc32 APIs. Compared with the current table-lookup based optimization, Zbc based optimization can also achieve large stride during CRC calculation loop, meantime, it avoids the memory access latency of the table-lookup based implementation and it reduces memory footprint. If Zbc feature is not supported in a runtime environment, then the table-lookup based implementation would serve as fallback via alternative mechanism. By inspecting the vmlinux built by gcc v12.2.0 with default optimization level (-O2), we can see below instruction count change for each 8-byte stride in the CRC32 loop: rv64: crc32_be (54->31), crc32_le (54->13), __crc32c_le (54->13) rv32: crc32_be (50->32), crc32_le (50->16), __crc32c_le (50->16) The compile target CPU is little endian, extra effort is needed for byte swapping for the crc32_be API, thus, the instruction count change is not as significant as that in the *_le cases. This patch is tested on QEMU VM with the kernel CRC32 selftest for both rv64 and rv32. Running the CRC32 selftest on a real hardware (SpacemiT K1) with Zbc extension shows 65% and 125% performance improvement respectively on crc32_test() and crc32c_test(). Signed-off-by: Xiao Wang Reviewed-by: Charlie Jenkins Link: https://lore.kernel.org/r/20240621054707.1847548-1-xiao.w.wang@intel.com Signed-off-by: Palmer Dabbelt --- include/linux/crc32.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 9e8a032c1788..87f788c0d607 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -9,7 +9,9 @@ #include u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len); +u32 __pure crc32_le_base(u32 crc, unsigned char const *p, size_t len); u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len); +u32 __pure crc32_be_base(u32 crc, unsigned char const *p, size_t len); /** * crc32_le_combine - Combine two crc32 check values into one. For two @@ -37,6 +39,7 @@ static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2) } u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len); +u32 __pure __crc32c_le_base(u32 crc, unsigned char const *p, size_t len); /** * __crc32c_le_combine - Combine two crc32c check values into one. For two -- cgit v1.2.3 From bed6b0317441d82c32506750ccd868d83850e6f4 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Jun 2024 11:16:03 +0800 Subject: f2fs: clean up addrs_per_{inode,block}() Introduce a new help addrs_per_page() to wrap common code from addrs_per_inode() and addrs_per_block() for cleanup. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 41d1d71c36ff..01bee2b289c2 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -259,15 +259,14 @@ struct node_footer { #define CUR_ADDRS_PER_INODE(inode) (DEF_ADDRS_PER_INODE - \ get_extra_isize(inode)) #define DEF_NIDS_PER_INODE 5 /* Node IDs in an Inode */ -#define ADDRS_PER_INODE(inode) addrs_per_inode(inode) +#define ADDRS_PER_INODE(inode) addrs_per_page(inode, true) /* Address Pointers in a Direct Block */ #define DEF_ADDRS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) -#define ADDRS_PER_BLOCK(inode) addrs_per_block(inode) +#define ADDRS_PER_BLOCK(inode) addrs_per_page(inode, false) /* Node IDs in an Indirect Block */ #define NIDS_PER_BLOCK ((F2FS_BLKSIZE - sizeof(struct node_footer)) / sizeof(__le32)) -#define ADDRS_PER_PAGE(page, inode) \ - (IS_INODE(page) ? ADDRS_PER_INODE(inode) : ADDRS_PER_BLOCK(inode)) +#define ADDRS_PER_PAGE(page, inode) (addrs_per_page(inode, IS_INODE(page))) #define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1) #define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2) -- cgit v1.2.3 From 1037885b309cfca5b770137209e9b51d1b50cc27 Mon Sep 17 00:00:00 2001 From: Drew Fustini Date: Sun, 23 Jun 2024 19:12:31 -0700 Subject: dt-bindings: clock: Document T-Head TH1520 AP_SUBSYS controller Document bindings for the T-Head TH1520 AP sub-system clock controller. Link: https://openbeagle.org/beaglev-ahead/beaglev-ahead/-/blob/main/docs/TH1520%20System%20User%20Manual.pdf Co-developed-by: Yangtao Li Signed-off-by: Yangtao Li Reviewed-by: Conor Dooley Signed-off-by: Drew Fustini Link: https://lore.kernel.org/r/20240623-th1520-clk-v2-1-ad8d6432d9fb@tenstorrent.com Signed-off-by: Stephen Boyd --- include/dt-bindings/clock/thead,th1520-clk-ap.h | 96 +++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 include/dt-bindings/clock/thead,th1520-clk-ap.h (limited to 'include') diff --git a/include/dt-bindings/clock/thead,th1520-clk-ap.h b/include/dt-bindings/clock/thead,th1520-clk-ap.h new file mode 100644 index 000000000000..a199784b3512 --- /dev/null +++ b/include/dt-bindings/clock/thead,th1520-clk-ap.h @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (C) 2023 Vivo Communication Technology Co. Ltd. + * Authors: Yangtao Li + */ + +#ifndef _DT_BINDINGS_CLK_TH1520_H_ +#define _DT_BINDINGS_CLK_TH1520_H_ + +#define CLK_CPU_PLL0 0 +#define CLK_CPU_PLL1 1 +#define CLK_GMAC_PLL 2 +#define CLK_VIDEO_PLL 3 +#define CLK_DPU0_PLL 4 +#define CLK_DPU1_PLL 5 +#define CLK_TEE_PLL 6 +#define CLK_C910_I0 7 +#define CLK_C910 8 +#define CLK_BROM 9 +#define CLK_BMU 10 +#define CLK_AHB2_CPUSYS_HCLK 11 +#define CLK_APB3_CPUSYS_PCLK 12 +#define CLK_AXI4_CPUSYS2_ACLK 13 +#define CLK_AON2CPU_A2X 14 +#define CLK_X2X_CPUSYS 15 +#define CLK_AXI_ACLK 16 +#define CLK_CPU2AON_X2H 17 +#define CLK_PERI_AHB_HCLK 18 +#define CLK_CPU2PERI_X2H 19 +#define CLK_PERI_APB_PCLK 20 +#define CLK_PERI2APB_PCLK 21 +#define CLK_PERISYS_APB1_HCLK 22 +#define CLK_PERISYS_APB2_HCLK 23 +#define CLK_PERISYS_APB3_HCLK 24 +#define CLK_PERISYS_APB4_HCLK 25 +#define CLK_OSC12M 26 +#define CLK_OUT1 27 +#define CLK_OUT2 28 +#define CLK_OUT3 29 +#define CLK_OUT4 30 +#define CLK_APB_PCLK 31 +#define CLK_NPU 32 +#define CLK_NPU_AXI 33 +#define CLK_VI 34 +#define CLK_VI_AHB 35 +#define CLK_VO_AXI 36 +#define CLK_VP_APB 37 +#define CLK_VP_AXI 38 +#define CLK_CPU2VP 39 +#define CLK_VENC 40 +#define CLK_DPU0 41 +#define CLK_DPU1 42 +#define CLK_EMMC_SDIO 43 +#define CLK_GMAC1 44 +#define CLK_PADCTRL1 45 +#define CLK_DSMART 46 +#define CLK_PADCTRL0 47 +#define CLK_GMAC_AXI 48 +#define CLK_GPIO3 49 +#define CLK_GMAC0 50 +#define CLK_PWM 51 +#define CLK_QSPI0 52 +#define CLK_QSPI1 53 +#define CLK_SPI 54 +#define CLK_UART0_PCLK 55 +#define CLK_UART1_PCLK 56 +#define CLK_UART2_PCLK 57 +#define CLK_UART3_PCLK 58 +#define CLK_UART4_PCLK 59 +#define CLK_UART5_PCLK 60 +#define CLK_GPIO0 61 +#define CLK_GPIO1 62 +#define CLK_GPIO2 63 +#define CLK_I2C0 64 +#define CLK_I2C1 65 +#define CLK_I2C2 66 +#define CLK_I2C3 67 +#define CLK_I2C4 68 +#define CLK_I2C5 69 +#define CLK_SPINLOCK 70 +#define CLK_DMA 71 +#define CLK_MBOX0 72 +#define CLK_MBOX1 73 +#define CLK_MBOX2 74 +#define CLK_MBOX3 75 +#define CLK_WDT0 76 +#define CLK_WDT1 77 +#define CLK_TIMER0 78 +#define CLK_TIMER1 79 +#define CLK_SRAM0 80 +#define CLK_SRAM1 81 +#define CLK_SRAM2 82 +#define CLK_SRAM3 83 +#define CLK_PLL_GMAC_100M 84 +#define CLK_UART_SCLK 85 +#endif -- cgit v1.2.3 From 93ef12d92f65facf8e02ad6578a898a190aa6cf4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Jul 2024 14:15:57 -0700 Subject: scsi: ufs: core: Initialize struct uic_command once Instead of first zero-initializing struct uic_command and next initializing it memberwise, initialize all members at once. Reviewed-by: Daejun Park Reviewed-by: Avri Altman Reviewed-by: Manivannan Sadhasivam Reviewed-by: Peter Wang Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240708211716.2827751-3-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 9e0581115b34..d4d63507d090 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -73,8 +73,8 @@ enum ufs_event_type { * @done: UIC command completion */ struct uic_command { - u32 command; - u32 argument1; + const u32 command; + const u32 argument1; u32 argument2; u32 argument3; int cmd_active; -- cgit v1.2.3 From b53eb9a050d76fc695258e04384e71b3fbb1e7d5 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Jul 2024 14:15:59 -0700 Subject: scsi: ufs: core: Rename the MASK_TRANSFER_REQUESTS_SLOTS constant Rename this constant to prepare for the introduction of the MASK_TRANSFER_REQUESTS_SLOTS_MCQ constant. The acronym "SDB" stands for "single doorbell" (mode). Reviewed-by: Peter Wang Reviewed-by: Manivannan Sadhasivam Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240708211716.2827751-5-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- include/ufs/ufshci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h index c50f92bf2e1d..8d0cc73537c6 100644 --- a/include/ufs/ufshci.h +++ b/include/ufs/ufshci.h @@ -67,7 +67,7 @@ enum { /* Controller capability masks */ enum { - MASK_TRANSFER_REQUESTS_SLOTS = 0x0000001F, + MASK_TRANSFER_REQUESTS_SLOTS_SDB = 0x0000001F, MASK_NUMBER_OUTSTANDING_RTT = 0x0000FF00, MASK_TASK_MANAGEMENT_REQUEST_SLOTS = 0x00070000, MASK_EHSLUTRD_SUPPORTED = 0x00400000, -- cgit v1.2.3 From 0fca3318e550d48e9a6c844763bf9eab91c30f07 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Jul 2024 14:16:01 -0700 Subject: scsi: ufs: core: Inline is_mcq_enabled() Improve code readability by inlining is_mcq_enabled(). Cc: Peter Wang Cc: Manivannan Sadhasivam Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240708211716.2827751-7-bvanassche@acm.org Reviewed-by: Manivannan Sadhasivam Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index d4d63507d090..c0e28a512b3c 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1132,11 +1132,6 @@ struct ufs_hw_queue { #define MCQ_QCFG_SIZE 0x40 -static inline bool is_mcq_enabled(struct ufs_hba *hba) -{ - return hba->mcq_enabled; -} - static inline unsigned int ufshcd_mcq_opr_offset(struct ufs_hba *hba, enum ufshcd_mcq_opr opr, int idx) { -- cgit v1.2.3 From af568c7e8292bd96f5a4f3d81dcc099bcfe7cb73 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 8 Jul 2024 14:16:05 -0700 Subject: scsi: ufs: mcq: Make .get_hba_mac() optional UFSHCI controllers that are compliant with the UFSHCI 4.0 standard report the maximum number of supported commands in the controller capabilities register. Use that value if .get_hba_mac == NULL. Reviewed-by: Peter Wang Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240708211716.2827751-11-bvanassche@acm.org Reviewed-by: Manivannan Sadhasivam Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 4 +++- include/ufs/ufshci.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index c0e28a512b3c..6518997930f3 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -325,7 +325,9 @@ struct ufs_pwr_mode_info { * @event_notify: called to notify important events * @reinit_notify: called to notify reinit of UFSHCD during max gear switch * @mcq_config_resource: called to configure MCQ platform resources - * @get_hba_mac: called to get vendor specific mac value, mandatory for mcq mode + * @get_hba_mac: reports maximum number of outstanding commands supported by + * the controller. Should be implemented for UFSHCI 4.0 or later + * controllers that are not compliant with the UFSHCI 4.0 specification. * @op_runtime_config: called to config Operation and runtime regs Pointers * @get_outstanding_cqs: called to get outstanding completion queues * @config_esi: called to config Event Specific Interrupt diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h index 8d0cc73537c6..38fe97971a65 100644 --- a/include/ufs/ufshci.h +++ b/include/ufs/ufshci.h @@ -68,6 +68,7 @@ enum { /* Controller capability masks */ enum { MASK_TRANSFER_REQUESTS_SLOTS_SDB = 0x0000001F, + MASK_TRANSFER_REQUESTS_SLOTS_MCQ = 0x000000FF, MASK_NUMBER_OUTSTANDING_RTT = 0x0000FF00, MASK_TASK_MANAGEMENT_REQUEST_SLOTS = 0x00070000, MASK_EHSLUTRD_SUPPORTED = 0x00400000, -- cgit v1.2.3 From c2a90eee29f41630225c9a64d26c425e1d50b401 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 8 Jul 2024 16:53:25 -0700 Subject: scsi: ufs: core: Add UFSHCD_QUIRK_CUSTOM_CRYPTO_PROFILE Add UFSHCD_QUIRK_CUSTOM_CRYPTO_PROFILE which lets UFS host drivers initialize the blk_crypto_profile themselves rather than have it be initialized by ufshcd-core according to the UFSHCI standard. This is needed to support inline encryption on the "Exynos" UFS controller which has a nonstandard interface. Reviewed-by: Bart Van Assche Reviewed-by: Peter Griffin Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20240708235330.103590-2-ebiggers@kernel.org Reviewed-by: Alim Akhtar Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 9e0581115b34..4f8982e52ac4 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -645,6 +645,15 @@ enum ufshcd_quirks { * thus need this quirk to skip related flow. */ UFSHCD_QUIRK_MCQ_BROKEN_RTC = 1 << 21, + + /* + * This quirk needs to be enabled if the host controller supports inline + * encryption but it needs to initialize the crypto capabilities in a + * nonstandard way and/or needs to override blk_crypto_ll_ops. If + * enabled, the standard code won't initialize the blk_crypto_profile; + * ufs_hba_variant_ops::init() must do it instead. + */ + UFSHCD_QUIRK_CUSTOM_CRYPTO_PROFILE = 1 << 22, }; enum ufshcd_caps { -- cgit v1.2.3 From e95881e0081a30e132b5ca087f1e07fc08608a7e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 8 Jul 2024 16:53:27 -0700 Subject: scsi: ufs: core: Add UFSHCD_QUIRK_BROKEN_CRYPTO_ENABLE Add UFSHCD_QUIRK_BROKEN_CRYPTO_ENABLE which tells the UFS core to not use the crypto enable bit defined by the UFS specification. This is needed to support inline encryption on the "Exynos" UFS controller. Reviewed-by: Bart Van Assche Reviewed-by: Peter Griffin Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20240708235330.103590-4-ebiggers@kernel.org Reviewed-by: Alim Akhtar Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 4f8982e52ac4..e07104f355c0 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -654,6 +654,13 @@ enum ufshcd_quirks { * ufs_hba_variant_ops::init() must do it instead. */ UFSHCD_QUIRK_CUSTOM_CRYPTO_PROFILE = 1 << 22, + + /* + * This quirk needs to be enabled if the host controller supports inline + * encryption but does not support the CRYPTO_GENERAL_ENABLE bit, i.e. + * host controller initialization fails if that bit is set. + */ + UFSHCD_QUIRK_BROKEN_CRYPTO_ENABLE = 1 << 23, }; enum ufshcd_caps { -- cgit v1.2.3 From 8ecea3da1567e0648b5d37a6faec73fc9c8571ba Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 8 Jul 2024 16:53:28 -0700 Subject: scsi: ufs: core: Add fill_crypto_prdt variant op Add a variant op to allow host drivers to initialize nonstandard crypto-related fields in the PRDT. This is needed to support inline encryption on the "Exynos" UFS controller. Note that this will be used together with the support for overriding the PRDT entry size that was already added by commit ada1e653a5ea ("scsi: ufs: core: Allow UFS host drivers to override the sg entry size"). Reviewed-by: Bart Van Assche Reviewed-by: Peter Griffin Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20240708235330.103590-5-ebiggers@kernel.org Reviewed-by: Alim Akhtar Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index e07104f355c0..fb791d1a6d00 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -322,6 +322,7 @@ struct ufs_pwr_mode_info { * @device_reset: called to issue a reset pulse on the UFS device * @config_scaling_param: called to configure clock scaling parameters * @program_key: program or evict an inline encryption key + * @fill_crypto_prdt: initialize crypto-related fields in the PRDT * @event_notify: called to notify important events * @reinit_notify: called to notify reinit of UFSHCD during max gear switch * @mcq_config_resource: called to configure MCQ platform resources @@ -367,6 +368,9 @@ struct ufs_hba_variant_ops { struct devfreq_simple_ondemand_data *data); int (*program_key)(struct ufs_hba *hba, const union ufs_crypto_cfg_entry *cfg, int slot); + int (*fill_crypto_prdt)(struct ufs_hba *hba, + const struct bio_crypt_ctx *crypt_ctx, + void *prdt, unsigned int num_segments); void (*event_notify)(struct ufs_hba *hba, enum ufs_event_type evt, void *data); void (*reinit_notify)(struct ufs_hba *); -- cgit v1.2.3 From 4c45dba50a3750a0834353c4187e7896b158bc0c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 8 Jul 2024 16:53:29 -0700 Subject: scsi: ufs: core: Add UFSHCD_QUIRK_KEYS_IN_PRDT Since the nonstandard inline encryption support on Exynos SoCs requires that raw cryptographic keys be copied into the PRDT, it is desirable to zeroize those keys after each request to keep them from being left in memory. Therefore, add a quirk bit that enables the zeroization. We could instead do the zeroization unconditionally. However, using a quirk bit avoids adding the zeroization overhead to standard devices. Reviewed-by: Bart Van Assche Reviewed-by: Peter Griffin Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20240708235330.103590-6-ebiggers@kernel.org Reviewed-by: Alim Akhtar Signed-off-by: Martin K. Petersen --- include/ufs/ufshcd.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index fb791d1a6d00..80accfbe48aa 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -665,6 +665,14 @@ enum ufshcd_quirks { * host controller initialization fails if that bit is set. */ UFSHCD_QUIRK_BROKEN_CRYPTO_ENABLE = 1 << 23, + + /* + * This quirk needs to be enabled if the host controller driver copies + * cryptographic keys into the PRDT in order to send them to hardware, + * and therefore the PRDT should be zeroized after each request (as per + * the standard best practice for managing keys). + */ + UFSHCD_QUIRK_KEYS_IN_PRDT = 1 << 24, }; enum ufshcd_caps { -- cgit v1.2.3 From a93c2e5fe766825d6264823fd1dca9aae747cf5d Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 6 Jul 2024 13:20:01 +0200 Subject: i2c: reword i2c_algorithm according to newest specification Start changing the wording of the I2C main header wrt. the newest I2C v7 and SMBus 3.2 specifications and replace "master/slave" with more appropriate terms. The first step renames the members of struct i2c_algorithm. Once all in-tree users are converted, the anonymous union will go away again. All this work will also pave the way for finally seperating the monolithic header into more fine-grained headers like "i2c/clients.h" etc. Signed-off-by: Wolfram Sang Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti --- include/linux/i2c.h | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 424acb98c7c2..9d45b7b912dd 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -548,10 +548,18 @@ struct i2c_algorithm { * master_xfer should return the number of messages successfully * processed, or a negative value on error */ - int (*master_xfer)(struct i2c_adapter *adap, struct i2c_msg *msgs, - int num); - int (*master_xfer_atomic)(struct i2c_adapter *adap, + union { + int (*xfer)(struct i2c_adapter *adap, struct i2c_msg *msgs, + int num); + int (*master_xfer)(struct i2c_adapter *adap, struct i2c_msg *msgs, + int num); + }; + union { + int (*xfer_atomic)(struct i2c_adapter *adap, struct i2c_msg *msgs, int num); + int (*master_xfer_atomic)(struct i2c_adapter *adap, + struct i2c_msg *msgs, int num); + }; int (*smbus_xfer)(struct i2c_adapter *adap, u16 addr, unsigned short flags, char read_write, u8 command, int size, union i2c_smbus_data *data); @@ -563,8 +571,14 @@ struct i2c_algorithm { u32 (*functionality)(struct i2c_adapter *adap); #if IS_ENABLED(CONFIG_I2C_SLAVE) - int (*reg_slave)(struct i2c_client *client); - int (*unreg_slave)(struct i2c_client *client); + union { + int (*reg_target)(struct i2c_client *client); + int (*reg_slave)(struct i2c_client *client); + }; + union { + int (*unreg_target)(struct i2c_client *client); + int (*unreg_slave)(struct i2c_client *client); + }; #endif }; -- cgit v1.2.3 From 4aa7b5d1784f510c0f42afc1d74efb41947221d7 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 30 Apr 2024 07:53:04 +0930 Subject: btrfs: remove extent_map::orig_start member Since we have extent_map::offset, the old extent_map::orig_start is just extent_map::start - extent_map::offset for non-hole/inline extents. And since the new extent_map::offset is already verified by validate_extent_map() while the old orig_start is not, let's just remove the old member from all call sites. Reviewed-by: Johannes Thumshirn Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index fadf406b5260..44016b6dda60 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -291,7 +291,6 @@ TRACE_EVENT_CONDITION(btrfs_get_extent, __field( u64, ino ) __field( u64, start ) __field( u64, len ) - __field( u64, orig_start ) __field( u64, block_start ) __field( u64, block_len ) __field( u32, flags ) @@ -303,21 +302,18 @@ TRACE_EVENT_CONDITION(btrfs_get_extent, __entry->ino = btrfs_ino(inode); __entry->start = map->start; __entry->len = map->len; - __entry->orig_start = map->orig_start; __entry->block_start = map->block_start; __entry->block_len = map->block_len; __entry->flags = map->flags; __entry->refs = refcount_read(&map->refs); ), - TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu len=%llu " - "orig_start=%llu block_start=%llu(%s) " - "block_len=%llu flags=%s refs=%u", + TP_printk_btrfs( +"root=%llu(%s) ino=%llu start=%llu len=%llu block_start=%llu(%s) block_len=%llu flags=%s refs=%u", show_root_type(__entry->root_objectid), __entry->ino, __entry->start, __entry->len, - __entry->orig_start, show_map_type(__entry->block_start), __entry->block_len, show_map_flags(__entry->flags), -- cgit v1.2.3 From e28b851ed9b232c3b84cb8d0fedbdfa8ca881386 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 30 Apr 2024 07:53:05 +0930 Subject: btrfs: remove extent_map::block_len member The extent_map::block_len is either extent_map::len (non-compressed extent) or extent_map::disk_num_bytes (compressed extent). Since we already have sanity checks to do the cross-checks between the new and old members, we can drop the old extent_map::block_len now. For most call sites, they can manually select extent_map::len or extent_map::disk_num_bytes, since most if not all of them have checked if the extent is compressed. Reviewed-by: Johannes Thumshirn Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 44016b6dda60..69bef548818c 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -292,7 +292,6 @@ TRACE_EVENT_CONDITION(btrfs_get_extent, __field( u64, start ) __field( u64, len ) __field( u64, block_start ) - __field( u64, block_len ) __field( u32, flags ) __field( int, refs ) ), @@ -303,19 +302,17 @@ TRACE_EVENT_CONDITION(btrfs_get_extent, __entry->start = map->start; __entry->len = map->len; __entry->block_start = map->block_start; - __entry->block_len = map->block_len; __entry->flags = map->flags; __entry->refs = refcount_read(&map->refs); ), TP_printk_btrfs( -"root=%llu(%s) ino=%llu start=%llu len=%llu block_start=%llu(%s) block_len=%llu flags=%s refs=%u", +"root=%llu(%s) ino=%llu start=%llu len=%llu block_start=%llu(%s) flags=%s refs=%u", show_root_type(__entry->root_objectid), __entry->ino, __entry->start, __entry->len, show_map_type(__entry->block_start), - __entry->block_len, show_map_flags(__entry->flags), __entry->refs) ); -- cgit v1.2.3 From c77a8c61002e91d859e118008fd495efbe1d9373 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 30 Apr 2024 07:53:06 +0930 Subject: btrfs: remove extent_map::block_start member The member extent_map::block_start can be calculated from extent_map::disk_bytenr + extent_map::offset for regular extents. And otherwise just extent_map::disk_bytenr. And this is already validated by the validate_extent_map(). Now we can remove the member. However there is a special case in btrfs_create_dio_extent() where we for NOCOW/PREALLOC ordered extents cannot directly use the resulting btrfs_file_extent, as btrfs_split_ordered_extent() cannot handle them yet. So for that call site, we pass file_extent->disk_bytenr + file_extent->num_bytes as disk_bytenr for the ordered extent, and 0 for offset. Reviewed-by: Johannes Thumshirn Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 69bef548818c..eeb56975bee7 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -291,7 +291,6 @@ TRACE_EVENT_CONDITION(btrfs_get_extent, __field( u64, ino ) __field( u64, start ) __field( u64, len ) - __field( u64, block_start ) __field( u32, flags ) __field( int, refs ) ), @@ -301,18 +300,15 @@ TRACE_EVENT_CONDITION(btrfs_get_extent, __entry->ino = btrfs_ino(inode); __entry->start = map->start; __entry->len = map->len; - __entry->block_start = map->block_start; __entry->flags = map->flags; __entry->refs = refcount_read(&map->refs); ), - TP_printk_btrfs( -"root=%llu(%s) ino=%llu start=%llu len=%llu block_start=%llu(%s) flags=%s refs=%u", + TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu len=%llu flags=%s refs=%u", show_root_type(__entry->root_objectid), __entry->ino, __entry->start, __entry->len, - show_map_type(__entry->block_start), show_map_flags(__entry->flags), __entry->refs) ); @@ -2608,7 +2604,6 @@ TRACE_EVENT(btrfs_extent_map_shrinker_remove_em, __field( u64, root_id ) __field( u64, start ) __field( u64, len ) - __field( u64, block_start ) __field( u32, flags ) ), @@ -2617,15 +2612,12 @@ TRACE_EVENT(btrfs_extent_map_shrinker_remove_em, __entry->root_id = inode->root->root_key.objectid; __entry->start = em->start; __entry->len = em->len; - __entry->block_start = em->block_start; __entry->flags = em->flags; ), - TP_printk_btrfs( -"ino=%llu root=%llu(%s) start=%llu len=%llu block_start=%llu(%s) flags=%s", + TP_printk_btrfs("ino=%llu root=%llu(%s) start=%llu len=%llu flags=%s", __entry->ino, show_root_type(__entry->root_id), __entry->start, __entry->len, - show_map_type(__entry->block_start), show_map_flags(__entry->flags)) ); -- cgit v1.2.3 From 87128f520a6b7573f8086f2c89b9381ee7e85e51 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 10 Jun 2024 06:22:56 +0930 Subject: btrfs: uapi: record temporary super flags used by btrfstune [BUG] There is a bug report that a canceled checksum conversion (still experimental feature) results in unexpected super block flags: csum_type 0 (crc32c) csum_size 4 csum 0x14973811 [match] bytenr 65536 flags 0x1000000001 ( WRITTEN | CHANGING_FSID_V2 ) magic _BHRfS_M [match] While for a filesystem with ongoing checksum conversion it should have either CHANGING_DATA_CSUM or CHANGING_META_CSUM. [CAUSE] It turns out that, due to btrfs-progs keeps its own extra flags inside its own ctree.h headers, not the shared uapi headers, we have conflicting super flags: kernel-shared/uapi/btrfs_tree.h:#define BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34) kernel-shared/uapi/btrfs_tree.h:#define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35) kernel-shared/uapi/btrfs_tree.h:#define BTRFS_SUPER_FLAG_CHANGING_FSID_V2 (1ULL << 36) kernel-shared/ctree.h:#define BTRFS_SUPER_FLAG_CHANGING_DATA_CSUM (1ULL << 36) kernel-shared/ctree.h:#define BTRFS_SUPER_FLAG_CHANGING_META_CSUM (1ULL << 37) Note that CHANGING_FSID_V2 is conflicting with CHANGING_DATA_CSUM. [FIX] The proper fix would be done inside btrfs-progs, but to keep everything properly recorded, we should have everything inside the same uapi header. Copy all the new flags into uapi header, and change the value for CHANGING_DATA_CSUM and CHANGING_META_CSUM, while keep the value of CHANGING_BG_TREE untouched. Thankfully checksum change is still only experimental and all those CHANGING_* flags are transient (only for btrfs-progs to resume the conversion, and kernel will reject them all), the damage is still minor. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index d24e8e121507..c7636331e566 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -777,6 +777,14 @@ struct btrfs_stripe_extent { #define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35) #define BTRFS_SUPER_FLAG_CHANGING_FSID_V2 (1ULL << 36) +/* + * Those are temporaray flags utilized by btrfs-progs to do offline conversion. + * They are rejected by kernel. + * But still keep them all here to avoid conflicts. + */ +#define BTRFS_SUPER_FLAG_CHANGING_BG_TREE (1ULL << 38) +#define BTRFS_SUPER_FLAG_CHANGING_DATA_CSUM (1ULL << 39) +#define BTRFS_SUPER_FLAG_CHANGING_META_CSUM (1ULL << 40) /* * items in the extent btree are used to record the objectid of the -- cgit v1.2.3 From 2422547e99f99f952d1a37f26b63289f749d5fcd Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 13 May 2024 18:01:43 +0200 Subject: btrfs: remove raid-stripe-tree encoding field from stripe_extent Remove the encoding field from 'struct btrfs_stripe_extent'. It was originally intended to encode the RAID type as well as if we're a data or a parity stripe. But the RAID type can be inferred form the block-group and the data vs. parity differentiation can be done easier with adding a new key type for parity stripes in the RAID stripe tree. Signed-off-by: Johannes Thumshirn Signed-off-by: David Sterba --- include/uapi/linux/btrfs_tree.h | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index c7636331e566..fc29d273845d 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -747,21 +747,9 @@ struct btrfs_raid_stride { __le64 physical; } __attribute__ ((__packed__)); -/* The stripe_extent::encoding, 1:1 mapping of enum btrfs_raid_types. */ -#define BTRFS_STRIPE_RAID0 1 -#define BTRFS_STRIPE_RAID1 2 -#define BTRFS_STRIPE_DUP 3 -#define BTRFS_STRIPE_RAID10 4 -#define BTRFS_STRIPE_RAID5 5 -#define BTRFS_STRIPE_RAID6 6 -#define BTRFS_STRIPE_RAID1C3 7 -#define BTRFS_STRIPE_RAID1C4 8 - struct btrfs_stripe_extent { - __u8 encoding; - __u8 reserved[7]; /* An array of raid strides this stripe is composed of. */ - struct btrfs_raid_stride strides[]; + __DECLARE_FLEX_ARRAY(struct btrfs_raid_stride, strides); } __attribute__ ((__packed__)); #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0) -- cgit v1.2.3 From 63347fe031e3bd738a2a16aee8eba889376e49a8 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Wed, 3 Jul 2024 09:48:01 -0700 Subject: drm/xe/uapi: Rename xe perf layer as xe observation layer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Xe, the perf layer allows capture of HW counter streams. These HW counters are generally performance related but don't have to be necessarily so. Also, the name "perf" is a carryover from i915 and is not preferred. Here we propose the name "observation" for this common layer which allows capture of different types of these counter streams. v2: Rename observability layer to observation layer (Lucas/Rodrigo) v3: Rename sysctl file to "observation_paranoid" (Jose) Fixes: 52c2e956dceb ("drm/xe/perf/uapi: "Perf" layer to support multiple perf counter stream types") Fixes: fe8929bdf835 ("drm/xe/perf/uapi: Add perf_stream_paranoid sysctl") Acked-by: Lucas De Marchi Acked-by: Rodrigo Vivi Signed-off-by: Ashutosh Dixit Reviewed-by: Umesh Nerlige Ramappa Acked-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240703164801.2561423-1-ashutosh.dixit@intel.com (cherry picked from commit 8169b2097d88d99d7e4a72e20e4b549efe9eb8d7) Signed-off-by: Rodrigo Vivi --- include/uapi/drm/xe_drm.h | 102 +++++++++++++++++++++++----------------------- 1 file changed, 52 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 12eaa8532b5c..33544ef78d3e 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -80,7 +80,7 @@ extern "C" { * - &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY * - &DRM_IOCTL_XE_EXEC * - &DRM_IOCTL_XE_WAIT_USER_FENCE - * - &DRM_IOCTL_XE_PERF + * - &DRM_IOCTL_XE_OBSERVATION */ /* @@ -101,7 +101,7 @@ extern "C" { #define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x08 #define DRM_XE_EXEC 0x09 #define DRM_XE_WAIT_USER_FENCE 0x0a -#define DRM_XE_PERF 0x0b +#define DRM_XE_OBSERVATION 0x0b /* Must be kept compact -- no holes */ @@ -116,7 +116,7 @@ extern "C" { #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property) #define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) -#define DRM_IOCTL_XE_PERF DRM_IOW(DRM_COMMAND_BASE + DRM_XE_PERF, struct drm_xe_perf_param) +#define DRM_IOCTL_XE_OBSERVATION DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OBSERVATION, struct drm_xe_observation_param) /** * DOC: Xe IOCTL Extensions @@ -1376,66 +1376,67 @@ struct drm_xe_wait_user_fence { }; /** - * enum drm_xe_perf_type - Perf stream types + * enum drm_xe_observation_type - Observation stream types */ -enum drm_xe_perf_type { - /** @DRM_XE_PERF_TYPE_OA: OA perf stream type */ - DRM_XE_PERF_TYPE_OA, +enum drm_xe_observation_type { + /** @DRM_XE_OBSERVATION_TYPE_OA: OA observation stream type */ + DRM_XE_OBSERVATION_TYPE_OA, }; /** - * enum drm_xe_perf_op - Perf stream ops + * enum drm_xe_observation_op - Observation stream ops */ -enum drm_xe_perf_op { - /** @DRM_XE_PERF_OP_STREAM_OPEN: Open a perf counter stream */ - DRM_XE_PERF_OP_STREAM_OPEN, +enum drm_xe_observation_op { + /** @DRM_XE_OBSERVATION_OP_STREAM_OPEN: Open an observation stream */ + DRM_XE_OBSERVATION_OP_STREAM_OPEN, - /** @DRM_XE_PERF_OP_ADD_CONFIG: Add perf stream config */ - DRM_XE_PERF_OP_ADD_CONFIG, + /** @DRM_XE_OBSERVATION_OP_ADD_CONFIG: Add observation stream config */ + DRM_XE_OBSERVATION_OP_ADD_CONFIG, - /** @DRM_XE_PERF_OP_REMOVE_CONFIG: Remove perf stream config */ - DRM_XE_PERF_OP_REMOVE_CONFIG, + /** @DRM_XE_OBSERVATION_OP_REMOVE_CONFIG: Remove observation stream config */ + DRM_XE_OBSERVATION_OP_REMOVE_CONFIG, }; /** - * struct drm_xe_perf_param - Input of &DRM_XE_PERF + * struct drm_xe_observation_param - Input of &DRM_XE_OBSERVATION * - * The perf layer enables multiplexing perf counter streams of multiple - * types. The actual params for a particular stream operation are supplied - * via the @param pointer (use __copy_from_user to get these params). + * The observation layer enables multiplexing observation streams of + * multiple types. The actual params for a particular stream operation are + * supplied via the @param pointer (use __copy_from_user to get these + * params). */ -struct drm_xe_perf_param { +struct drm_xe_observation_param { /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; - /** @perf_type: Perf stream type, of enum @drm_xe_perf_type */ - __u64 perf_type; - /** @perf_op: Perf op, of enum @drm_xe_perf_op */ - __u64 perf_op; + /** @observation_type: observation stream type, of enum @drm_xe_observation_type */ + __u64 observation_type; + /** @observation_op: observation stream op, of enum @drm_xe_observation_op */ + __u64 observation_op; /** @param: Pointer to actual stream params */ __u64 param; }; /** - * enum drm_xe_perf_ioctls - Perf fd ioctl's + * enum drm_xe_observation_ioctls - Observation stream fd ioctl's * - * Information exchanged between userspace and kernel for perf fd ioctl's - * is stream type specific + * Information exchanged between userspace and kernel for observation fd + * ioctl's is stream type specific */ -enum drm_xe_perf_ioctls { - /** @DRM_XE_PERF_IOCTL_ENABLE: Enable data capture for a stream */ - DRM_XE_PERF_IOCTL_ENABLE = _IO('i', 0x0), +enum drm_xe_observation_ioctls { + /** @DRM_XE_OBSERVATION_IOCTL_ENABLE: Enable data capture for an observation stream */ + DRM_XE_OBSERVATION_IOCTL_ENABLE = _IO('i', 0x0), - /** @DRM_XE_PERF_IOCTL_DISABLE: Disable data capture for a stream */ - DRM_XE_PERF_IOCTL_DISABLE = _IO('i', 0x1), + /** @DRM_XE_OBSERVATION_IOCTL_DISABLE: Disable data capture for a observation stream */ + DRM_XE_OBSERVATION_IOCTL_DISABLE = _IO('i', 0x1), - /** @DRM_XE_PERF_IOCTL_CONFIG: Change stream configuration */ - DRM_XE_PERF_IOCTL_CONFIG = _IO('i', 0x2), + /** @DRM_XE_OBSERVATION_IOCTL_CONFIG: Change observation stream configuration */ + DRM_XE_OBSERVATION_IOCTL_CONFIG = _IO('i', 0x2), - /** @DRM_XE_PERF_IOCTL_STATUS: Return stream status */ - DRM_XE_PERF_IOCTL_STATUS = _IO('i', 0x3), + /** @DRM_XE_OBSERVATION_IOCTL_STATUS: Return observation stream status */ + DRM_XE_OBSERVATION_IOCTL_STATUS = _IO('i', 0x3), - /** @DRM_XE_PERF_IOCTL_INFO: Return stream info */ - DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4), + /** @DRM_XE_OBSERVATION_IOCTL_INFO: Return observation stream info */ + DRM_XE_OBSERVATION_IOCTL_INFO = _IO('i', 0x4), }; /** @@ -1546,12 +1547,12 @@ enum drm_xe_oa_format_type { * Stream params are specified as a chain of @drm_xe_ext_set_property * struct's, with @property values from enum @drm_xe_oa_property_id and * @drm_xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY. - * @param field in struct @drm_xe_perf_param points to the first + * @param field in struct @drm_xe_observation_param points to the first * @drm_xe_ext_set_property struct. * - * Exactly the same mechanism is also used for stream reconfiguration using - * the @DRM_XE_PERF_IOCTL_CONFIG perf fd ioctl, though only a subset of - * properties below can be specified for stream reconfiguration. + * Exactly the same mechanism is also used for stream reconfiguration using the + * @DRM_XE_OBSERVATION_IOCTL_CONFIG observation stream fd ioctl, though only a + * subset of properties below can be specified for stream reconfiguration. */ enum drm_xe_oa_property_id { #define DRM_XE_OA_EXTENSION_SET_PROPERTY 0 @@ -1571,11 +1572,11 @@ enum drm_xe_oa_property_id { /** * @DRM_XE_OA_PROPERTY_OA_METRIC_SET: OA metrics defining contents of OA - * reports, previously added via @DRM_XE_PERF_OP_ADD_CONFIG. + * reports, previously added via @DRM_XE_OBSERVATION_OP_ADD_CONFIG. */ DRM_XE_OA_PROPERTY_OA_METRIC_SET, - /** @DRM_XE_OA_PROPERTY_OA_FORMAT: Perf counter report format */ + /** @DRM_XE_OA_PROPERTY_OA_FORMAT: OA counter report format */ DRM_XE_OA_PROPERTY_OA_FORMAT, /* * OA_FORMAT's are specified the same way as in PRM/Bspec 52198/60942, @@ -1596,13 +1597,13 @@ enum drm_xe_oa_property_id { /** * @DRM_XE_OA_PROPERTY_OA_DISABLED: A value of 1 will open the OA - * stream in a DISABLED state (see @DRM_XE_PERF_IOCTL_ENABLE). + * stream in a DISABLED state (see @DRM_XE_OBSERVATION_IOCTL_ENABLE). */ DRM_XE_OA_PROPERTY_OA_DISABLED, /** * @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID: Open the stream for a specific - * @exec_queue_id. Perf queries can be executed on this exec queue. + * @exec_queue_id. OA queries can be executed on this exec queue. */ DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID, @@ -1622,7 +1623,7 @@ enum drm_xe_oa_property_id { /** * struct drm_xe_oa_config - OA metric configuration * - * Multiple OA configs can be added using @DRM_XE_PERF_OP_ADD_CONFIG. A + * Multiple OA configs can be added using @DRM_XE_OBSERVATION_OP_ADD_CONFIG. A * particular config can be specified when opening an OA stream using * @DRM_XE_OA_PROPERTY_OA_METRIC_SET property. */ @@ -1645,8 +1646,9 @@ struct drm_xe_oa_config { /** * struct drm_xe_oa_stream_status - OA stream status returned from - * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl. Userspace can call the ioctl to - * query stream status in response to EIO errno from perf fd read(). + * @DRM_XE_OBSERVATION_IOCTL_STATUS observation stream fd ioctl. Userspace can + * call the ioctl to query stream status in response to EIO errno from + * observation fd read(). */ struct drm_xe_oa_stream_status { /** @extensions: Pointer to the first extension struct, if any */ @@ -1665,7 +1667,7 @@ struct drm_xe_oa_stream_status { /** * struct drm_xe_oa_stream_info - OA stream info returned from - * @DRM_XE_PERF_IOCTL_INFO perf fd ioctl + * @DRM_XE_OBSERVATION_IOCTL_INFO observation stream fd ioctl */ struct drm_xe_oa_stream_info { /** @extensions: Pointer to the first extension struct, if any */ -- cgit v1.2.3 From bcea31e2d1c7a34aeeae9458f38833d5a8409cf7 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 10 Jul 2024 11:37:18 +0200 Subject: x86/xen: eliminate some private header files Under arch/x86/xen there is one large private header file xen-ops.h containing most of the Xen-private x86 related declarations, and then there are several small headers with a handful of declarations each. Merge the small headers into xen-ops.h. While doing that, move the declaration of xen_fifo_events from xen-ops.h into include/xen/events.h where it should have been from the beginning. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Message-ID: <20240710093718.14552-3-jgross@suse.com> Signed-off-by: Juergen Gross --- include/xen/events.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/xen/events.h b/include/xen/events.h index 3b07409f8032..de5da58a0205 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -144,4 +144,6 @@ static inline void xen_evtchn_close(evtchn_port_t port) BUG(); } +extern bool xen_fifo_events; + #endif /* _XEN_EVENTS_H */ -- cgit v1.2.3 From 4484940514295b75389f94787f8e179ba6255353 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 8 Jul 2024 15:42:45 +0100 Subject: btrfs: avoid races when tracking progress for extent map shrinking We store the progress (root and inode numbers) of the extent map shrinker in fs_info without any synchronization but we can have multiple tasks calling into the shrinker during memory allocations when there's enough memory pressure for example. This can result in a task A reading fs_info->extent_map_shrinker_last_ino after another task B updates it, and task A reading fs_info->extent_map_shrinker_last_root before task B updates it, making task A see an odd state that isn't necessarily harmful but may make it skip certain inode ranges or do more work than necessary by going over the same inodes again. These unprotected accesses would also trigger warnings from tools like KCSAN. So add a lock to protect access to these progress fields. Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index d2d94d7c3fb5..0067e8443d38 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -2556,9 +2556,10 @@ TRACE_EVENT(btrfs_extent_map_shrinker_count, TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter, - TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_to_scan, long nr), + TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_to_scan, long nr, + u64 last_root_id, u64 last_ino), - TP_ARGS(fs_info, nr_to_scan, nr), + TP_ARGS(fs_info, nr_to_scan, nr, last_root_id, last_ino), TP_STRUCT__entry_btrfs( __field( long, nr_to_scan ) @@ -2570,8 +2571,8 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter, TP_fast_assign_btrfs(fs_info, __entry->nr_to_scan = nr_to_scan; __entry->nr = nr; - __entry->last_root_id = fs_info->extent_map_shrinker_last_root; - __entry->last_ino = fs_info->extent_map_shrinker_last_ino; + __entry->last_root_id = last_root_id; + __entry->last_ino = last_ino; ), TP_printk_btrfs("nr_to_scan=%ld nr=%ld last_root=%llu(%s) last_ino=%llu", @@ -2581,9 +2582,10 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter, TRACE_EVENT(btrfs_extent_map_shrinker_scan_exit, - TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_dropped, long nr), + TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_dropped, long nr, + u64 last_root_id, u64 last_ino), - TP_ARGS(fs_info, nr_dropped, nr), + TP_ARGS(fs_info, nr_dropped, nr, last_root_id, last_ino), TP_STRUCT__entry_btrfs( __field( long, nr_dropped ) @@ -2595,8 +2597,8 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_exit, TP_fast_assign_btrfs(fs_info, __entry->nr_dropped = nr_dropped; __entry->nr = nr; - __entry->last_root_id = fs_info->extent_map_shrinker_last_root; - __entry->last_ino = fs_info->extent_map_shrinker_last_ino; + __entry->last_root_id = last_root_id; + __entry->last_ino = last_ino; ), TP_printk_btrfs("nr_dropped=%ld nr=%ld last_root=%llu(%s) last_ino=%llu", -- cgit v1.2.3 From 5207c393d3e7dda9aff813d6b3e2264370d241be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 5 Jul 2024 15:28:28 +0200 Subject: drm/xe: Use write-back caching mode for system memory on DGFX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The caching mode for buffer objects with VRAM as a possible placement was forced to write-combined, regardless of placement. However, write-combined system memory is expensive to allocate and even though it is pooled, the pool is expensive to shrink, since it involves global CPU TLB flushes. Moreover write-combined system memory from TTM is only reliably available on x86 and DGFX doesn't have an x86 restriction. So regardless of the cpu caching mode selected for a bo, internally use write-back caching mode for system memory on DGFX. Coherency is maintained, but user-space clients may perceive a difference in cpu access speeds. v2: - Update RB- and Ack tags. - Rephrase wording in xe_drm.h (Matt Roper) v3: - Really rephrase wording. Signed-off-by: Thomas Hellström Fixes: 622f709ca629 ("drm/xe/uapi: Add support for CPU caching mode") Cc: Pallavi Mishra Cc: Matthew Auld Cc: dri-devel@lists.freedesktop.org Cc: Joonas Lahtinen Cc: Effie Yu Cc: Matthew Brost Cc: Maarten Lankhorst Cc: Jose Souza Cc: Michal Mrozek Cc: # v6.8+ Acked-by: Matthew Auld Acked-by: José Roberto de Souza Reviewed-by: Rodrigo Vivi Fixes: 622f709ca629 ("drm/xe/uapi: Add support for CPU caching mode") Acked-by: Michal Mrozek Acked-by: Effie Yu #On chat Link: https://patchwork.freedesktop.org/patch/msgid/20240705132828.27714-1-thomas.hellstrom@linux.intel.com (cherry picked from commit 01e0cfc994be484ddcb9e121e353e51d8bb837c0) Signed-off-by: Lucas De Marchi --- include/uapi/drm/xe_drm.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 1446c3bae515..d425b83181df 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -776,7 +776,13 @@ struct drm_xe_gem_create { #define DRM_XE_GEM_CPU_CACHING_WC 2 /** * @cpu_caching: The CPU caching mode to select for this object. If - * mmaping the object the mode selected here will also be used. + * mmaping the object the mode selected here will also be used. The + * exception is when mapping system memory (including data evicted + * to system) on discrete GPUs. The caching mode selected will + * then be overridden to DRM_XE_GEM_CPU_CACHING_WB, and coherency + * between GPU- and CPU is guaranteed. The caching mode of + * existing CPU-mappings will be updated transparently to + * user-space clients. */ __u16 cpu_caching; /** @pad: MBZ */ -- cgit v1.2.3 From b4e891901ed5409f4c33d12fd92011be925ef5b3 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Wed, 12 Jun 2024 17:00:38 +0100 Subject: fixmap: Remove unused set_fixmap_offset_io() The macro set_fixmap_offset_io() was added in commit f774b7d10e21 ("arm64: fixmap: fix missing sub-page offset for earlyprintk") but then commit 8ef0ed95ee04 ("arm64: remove arch specific earlyprintk") removed the file causing the only user to be removed when the two commits were merged. Since this has never been used again since the v3.15 release remove it. Signed-off-by: Steven Price Signed-off-by: Arnd Bergmann --- include/asm-generic/fixmap.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h index 8cc7b09c1bc7..29cab7947980 100644 --- a/include/asm-generic/fixmap.h +++ b/include/asm-generic/fixmap.h @@ -97,8 +97,5 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr) #define set_fixmap_io(idx, phys) \ __set_fixmap(idx, phys, FIXMAP_PAGE_IO) -#define set_fixmap_offset_io(idx, phys) \ - __set_fixmap_offset(idx, phys, FIXMAP_PAGE_IO) - #endif /* __ASSEMBLY__ */ #endif /* __ASM_GENERIC_FIXMAP_H */ -- cgit v1.2.3 From a808878308a8041ae10a151d69e2d22f94cae9f4 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 14 Apr 2024 11:05:25 +0300 Subject: driver core: auxiliary bus: show auxiliary device IRQs PCI subfunctions (SF) are anchored on the auxiliary bus. PCI physical and virtual functions are anchored on the PCI bus. The irq information of each such function is visible to users via sysfs directory "msi_irqs" containing files for each irq entry. However, for PCI SFs such information is unavailable. Due to this users have no visibility on IRQs used by the SFs. Secondly, an SF can be multi function device supporting rdma, netdevice and more. Without irq information at the bus level, the user is unable to view or use the affinity of the SF IRQs. Hence to match to the equivalent PCI PFs and VFs, add "irqs" directory, for supporting auxiliary devices, containing file for each irq entry. For example: $ ls /sys/bus/auxiliary/devices/mlx5_core.sf.1/irqs/ 50 51 52 53 54 55 56 57 58 Cc: Simon Horman Reviewed-by: Przemek Kitszel Reviewed-by: Parav Pandit Reviewed-by: Greg Kroah-Hartman Signed-off-by: Shay Drory Signed-off-by: Saeed Mahameed --- v9-v10: - remove Przemek RB - add name field to auxiliary_irq_info (Greg and Przemek) - handle bogus IRQ in auxiliary_device_sysfs_irq_remove (Greg) v8-v9: - add Przemek RB - use guard() in auxiliary_irq_dir_prepare (Paolo) v7-v8: - use cleanup.h for info and name fields (Greg) - correct error flow in auxiliary_irq_dir_prepare (Przemek) - add documentation for new fields of auxiliary_device (Simon) v6-v7: - dynamically creating irqs directory when first irq file created (Greg) - removed irqs flag and simplified the dev_add() API (Greg) - move sysfs related new code to a new auxiliary_sysfs.c file (Greg) v5-v6: - removed concept of shared and exclusive and hence global xarray (Greg) v4-v5: - restore global mutex and replace refcount_t with simple integer (Greg) v3->4: - remove global mutex (Przemek) v2->v3: - fix function declaration in case SYSFS isn't defined v1->v2: - move #ifdefs from drivers/base/auxiliary.c to include/linux/auxiliary_bus.h (Greg) - use EXPORT_SYMBOL_GPL instead of EXPORT_SYMBOL (Greg) - Fix kzalloc(ref) to kzalloc(*ref) (Simon) - Add return description in auxiliary_device_sysfs_irq_add() kdoc (Simon) - Fix auxiliary_irq_mode_show doc (kernel test boot) --- include/linux/auxiliary_bus.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index de21d9d24a95..3ba4487c9cd9 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -58,6 +58,9 @@ * in * @name: Match name found by the auxiliary device driver, * @id: unique identitier if multiple devices of the same name are exported, + * @irqs: irqs xarray contains irq indices which are used by the device, + * @lock: Synchronize irq sysfs creation, + * @irq_dir_exists: whether "irqs" directory exists, * * An auxiliary_device represents a part of its parent device's functionality. * It is given a name that, combined with the registering drivers @@ -139,6 +142,11 @@ struct auxiliary_device { struct device dev; const char *name; u32 id; + struct { + struct xarray irqs; + struct mutex lock; /* Synchronize irq sysfs creation */ + bool irq_dir_exists; + } sysfs; }; /** @@ -212,8 +220,24 @@ int auxiliary_device_init(struct auxiliary_device *auxdev); int __auxiliary_device_add(struct auxiliary_device *auxdev, const char *modname); #define auxiliary_device_add(auxdev) __auxiliary_device_add(auxdev, KBUILD_MODNAME) +#ifdef CONFIG_SYSFS +int auxiliary_device_sysfs_irq_add(struct auxiliary_device *auxdev, int irq); +void auxiliary_device_sysfs_irq_remove(struct auxiliary_device *auxdev, + int irq); +#else /* CONFIG_SYSFS */ +static inline int +auxiliary_device_sysfs_irq_add(struct auxiliary_device *auxdev, int irq) +{ + return 0; +} + +static inline void +auxiliary_device_sysfs_irq_remove(struct auxiliary_device *auxdev, int irq) {} +#endif + static inline void auxiliary_device_uninit(struct auxiliary_device *auxdev) { + mutex_destroy(&auxdev->sysfs.lock); put_device(&auxdev->dev); } -- cgit v1.2.3 From ad78e05d654567e0a96f91d5db198469ddc2d4fb Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Thu, 13 Jun 2024 13:50:25 +0200 Subject: PCI: Add managed pcim_iomap_range() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only managed mapping function currently is pcim_iomap() which doesn't allow for mapping an area starting at a certain offset, which many drivers want. Add pcim_iomap_range() as an exported function. Link: https://lore.kernel.org/r/20240613115032.29098-13-pstanner@redhat.com Signed-off-by: Philipp Stanner Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 0c19f0717899..98893a89bb5b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2303,6 +2303,8 @@ int pcim_iomap_regions(struct pci_dev *pdev, int mask, const char *name); int pcim_iomap_regions_request_all(struct pci_dev *pdev, int mask, const char *name); void pcim_iounmap_regions(struct pci_dev *pdev, int mask); +void __iomem *pcim_iomap_range(struct pci_dev *pdev, int bar, + unsigned long offset, unsigned long len); extern int pci_pci_problems; #define PCIPCI_FAIL 1 /* No PCI PCI DMA */ -- cgit v1.2.3 From 861f96a785149a0062cce6578e0fa7cb95435a7e Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 10 Jul 2024 16:33:39 +0800 Subject: iommufd: Remove IOMMUFD_PAGE_RESP_FAILURE The response code of IOMMUFD_PAGE_RESP_FAILURE was defined to be equivalent to the "Response Failure" in PCI spec, section 10.4.2.1. This response code indicates that one or more pages within the associated request group have encountered or caused an unrecoverable error. Therefore, this response disables the PRI at the function. Modern I/O virtualization technologies, like SR-IOV, share PRI among the assignable device units. Therefore, a response failure on one unit might cause I/O failure on other units. Remove this response code so that user space can only respond with SUCCESS or INVALID. The VMM is recommended to emulate a failure response as a PRI reset, or PRI disable and changing to a non-PRI domain. Fixes: c714f15860fc ("iommufd: Add fault and response message definitions") Link: https://lore.kernel.org/r/20240710083341.44617-2-baolu.lu@linux.intel.com Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index ede2b464a761..e31385b75d0b 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -765,14 +765,10 @@ struct iommu_hwpt_pgfault { * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the * access. This is the "Invalid Request" in PCI * 10.4.2.1. - * @IOMMUFD_PAGE_RESP_FAILURE: General error. Drop all subsequent faults from - * this device if possible. This is the "Response - * Failure" in PCI 10.4.2.1. */ enum iommufd_page_response_code { IOMMUFD_PAGE_RESP_SUCCESS = 0, IOMMUFD_PAGE_RESP_INVALID, - IOMMUFD_PAGE_RESP_FAILURE, }; /** -- cgit v1.2.3 From 8341eee81c794db0d8dd503c2b0ea2f55eba7334 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Wed, 10 Jul 2024 19:10:04 +0200 Subject: net: psample: fix flag being set in wrong skb A typo makes PSAMPLE_ATTR_SAMPLE_RATE netlink flag be added to the wrong sk_buff. Fix the error and make the input sk_buff pointer "const" so that it doesn't happen again. Acked-by: Eelco Chaudron Fixes: 7b1b2b60c63f ("net: psample: allow using rate as probability") Signed-off-by: Adrian Moreno Reviewed-by: Ido Schimmel Reviewed-by: Antoine Tenart Link: https://patch.msgid.link/20240710171004.2164034-1-amorenoz@redhat.com Signed-off-by: Jakub Kicinski --- include/net/psample.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/psample.h b/include/net/psample.h index c52e9ebd88dd..5071b5fc2b59 100644 --- a/include/net/psample.h +++ b/include/net/psample.h @@ -38,13 +38,15 @@ struct sk_buff; #if IS_ENABLED(CONFIG_PSAMPLE) -void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, - u32 sample_rate, const struct psample_metadata *md); +void psample_sample_packet(struct psample_group *group, + const struct sk_buff *skb, u32 sample_rate, + const struct psample_metadata *md); #else static inline void psample_sample_packet(struct psample_group *group, - struct sk_buff *skb, u32 sample_rate, + const struct sk_buff *skb, + u32 sample_rate, const struct psample_metadata *md) { } -- cgit v1.2.3 From 13cabc47f8ae69d24653f32c28399d493fde0a56 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 10 Jul 2024 04:30:28 -0700 Subject: netdevice: define and allocate &net_device _properly_ In fact, this structure contains a flexible array at the end, but historically its size, alignment etc., is calculated manually. There are several instances of the structure embedded into other structures, but also there's ongoing effort to remove them and we could in the meantime declare &net_device properly. Declare the array explicitly, use struct_size() and store the array size inside the structure, so that __counted_by() can be applied. Don't use PTR_ALIGN(), as SLUB itself tries its best to ensure the allocated buffer is aligned to what the user expects. Also, change its alignment from %NETDEV_ALIGN to the cacheline size as per several suggestions on the netdev ML. bloat-o-meter for vmlinux: free_netdev 445 440 -5 netdev_freemem 24 - -24 alloc_netdev_mqs 1481 1450 -31 On x86_64 with several NICs of different vendors, I was never able to get a &net_device pointer not aligned to the cacheline size after the change. Signed-off-by: Alexander Lobakin Signed-off-by: Breno Leitao Reviewed-by: Przemek Kitszel Reviewed-by: Eric Dumazet Reviewed-by: Kees Cook Link: https://patch.msgid.link/20240710113036.2125584-1-leitao@debian.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 93558645c6d0..607009150b5f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1819,7 +1819,8 @@ enum netdev_reg_state { * @priv_flags: Like 'flags' but invisible to userspace, * see if.h for the definitions * @gflags: Global flags ( kept as legacy ) - * @padded: How much padding added by alloc_netdev() + * @priv_len: Size of the ->priv flexible array + * @priv: Flexible array containing private data * @operstate: RFC2863 operstate * @link_mode: Mapping policy to operstate * @if_port: Selectable AUI, TP, ... @@ -2199,10 +2200,10 @@ struct net_device { unsigned short neigh_priv_len; unsigned short dev_id; unsigned short dev_port; - unsigned short padded; + int irq; + u32 priv_len; spinlock_t addr_list_lock; - int irq; struct netdev_hw_addr_list uc; struct netdev_hw_addr_list mc; @@ -2406,7 +2407,10 @@ struct net_device { /** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */ struct dim_irq_moder *irq_moder; -}; + + u8 priv[] ____cacheline_aligned + __counted_by(priv_len); +} ____cacheline_aligned; #define to_net_dev(d) container_of(d, struct net_device, dev) /* @@ -2596,7 +2600,7 @@ void dev_net_set(struct net_device *dev, struct net *net) */ static inline void *netdev_priv(const struct net_device *dev) { - return (char *)dev + ALIGN(sizeof(struct net_device), NETDEV_ALIGN); + return (void *)dev->priv; } /* Set the sysfs physical device reference for the network logical device @@ -3127,7 +3131,6 @@ static inline void unregister_netdevice(struct net_device *dev) int netdev_refcnt_read(const struct net_device *dev); void free_netdev(struct net_device *dev); -void netdev_freemem(struct net_device *dev); void init_dummy_netdev(struct net_device *dev); struct net_device *netdev_get_xmit_slave(struct net_device *dev, -- cgit v1.2.3 From 06dcc4c9baa9e92896f00d02ffa760c0988b4371 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Fri, 12 Jul 2024 09:30:10 +0900 Subject: firewire: core: move copy_port_status() helper function to TP_fast_assign() block It would be possible to put any statement in TP_fast_assign(). This commit obsoletes the helper function and put its statements to TP_fast_assign() for the code simplicity. Link: https://lore.kernel.org/r/20240712003010.87341-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/trace/events/firewire.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index bac9d98e88e5..b108176deb22 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -392,9 +392,6 @@ TRACE_EVENT(bus_reset_handle, #define PHY_PACKET_SELF_ID_GET_INITIATED_RESET(quads) \ ((((const u32 *)quads)[0] & SELF_ID_ZERO_INITIATED_RESET_MASK) >> SELF_ID_ZERO_INITIATED_RESET_SHIFT) -void copy_port_status(u8 *port_status, unsigned int port_capacity, const u32 *self_id_sequence, - unsigned int quadlet_count); - TRACE_EVENT(self_id_sequence, TP_PROTO(unsigned int card_index, const u32 *self_id_sequence, unsigned int quadlet_count, unsigned int generation), TP_ARGS(card_index, self_id_sequence, quadlet_count, generation), @@ -407,8 +404,16 @@ TRACE_EVENT(self_id_sequence, TP_fast_assign( __entry->card_index = card_index; __entry->generation = generation; - copy_port_status(__get_dynamic_array(port_status), __get_dynamic_array_len(port_status), - self_id_sequence, quadlet_count); + { + u8 *port_status = __get_dynamic_array(port_status); + unsigned int port_index; + + for (port_index = 0; port_index < __get_dynamic_array_len(port_status); ++port_index) { + port_status[port_index] = + self_id_sequence_get_port_status(self_id_sequence, + quadlet_count, port_index); + } + } memcpy(__get_dynamic_array(self_id_sequence), self_id_sequence, __get_dynamic_array_len(self_id_sequence)); ), -- cgit v1.2.3 From 1a7b7326d587c9a5e8ff067e70d6aaf0333f4bb3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 12 Jul 2024 07:51:58 +0200 Subject: vmlinux.lds.h: catch .bss..L* sections into BSS") Commit 9a427556fb8e ("vmlinux.lds.h: catch compound literals into data and BSS") added catches for .data..L* and .rodata..L* but missed .bss..L* Since commit 5431fdd2c181 ("ptrace: Convert ptrace_attach() to use lock guards") the following appears at build: LD .tmp_vmlinux.kallsyms1 powerpc64-linux-ld: warning: orphan section `.bss..Lubsan_data33' from `kernel/ptrace.o' being placed in section `.bss..Lubsan_data33' NM .tmp_vmlinux.kallsyms1.syms KSYMS .tmp_vmlinux.kallsyms1.S AS .tmp_vmlinux.kallsyms1.S LD .tmp_vmlinux.kallsyms2 powerpc64-linux-ld: warning: orphan section `.bss..Lubsan_data33' from `kernel/ptrace.o' being placed in section `.bss..Lubsan_data33' NM .tmp_vmlinux.kallsyms2.syms KSYMS .tmp_vmlinux.kallsyms2.S AS .tmp_vmlinux.kallsyms2.S LD vmlinux powerpc64-linux-ld: warning: orphan section `.bss..Lubsan_data33' from `kernel/ptrace.o' being placed in section `.bss..Lubsan_data33' Lets add .bss..L* to BSS_MAIN macro to catch those sections into BSS. Fixes: 9a427556fb8e ("vmlinux.lds.h: catch compound literals into data and BSS") Signed-off-by: Christophe Leroy Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202404031349.nmKhyuUG-lkp@intel.com/ Signed-off-by: Arnd Bergmann --- include/asm-generic/vmlinux.lds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 5703526d6ebf..70bf1004076b 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -103,7 +103,7 @@ #define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L* #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* #define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L* -#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..compoundliteral* +#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..L* .bss..compoundliteral* #define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]* #else #define TEXT_MAIN .text -- cgit v1.2.3 From 887c4cf5594a073fd60c0df84150eb06d78c6406 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 11 Jul 2024 10:11:12 -0700 Subject: efi: Rename efi_early_memdesc_ptr() to efi_memdesc_ptr() The "early" part of the helper's name isn't accurate[1]. Drop it in preparation for adding a new (not early) usage. Suggested-by: Ard Biesheuvel Link: https://lore.kernel.org/lkml/CAMj1kXEyDjH0uu3Z4eBesV3PEnKGi5ArXXMp7R-hn8HdRytiPg@mail.gmail.com [1] Signed-off-by: Kees Cook Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/efi.h b/include/linux/efi.h index 3a6c04a9f9aa..67b2fa80c8b9 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -784,7 +784,7 @@ extern int efi_memattr_apply_permissions(struct mm_struct *mm, efi_memattr_perm_setter fn); /* - * efi_early_memdesc_ptr - get the n-th EFI memmap descriptor + * efi_memdesc_ptr - get the n-th EFI memmap descriptor * @map: the start of efi memmap * @desc_size: the size of space for each EFI memmap descriptor * @n: the index of efi memmap descriptor @@ -802,7 +802,7 @@ extern int efi_memattr_apply_permissions(struct mm_struct *mm, * during bootup since for_each_efi_memory_desc_xxx() is available after the * kernel initializes the EFI subsystem to set up struct efi_memory_map. */ -#define efi_early_memdesc_ptr(map, desc_size, n) \ +#define efi_memdesc_ptr(map, desc_size, n) \ (efi_memory_desc_t *)((void *)(map) + ((n) * (desc_size))) /* Iterate through an efi_memory_map */ -- cgit v1.2.3 From 4a2ebb082297f41803742729642961532e54079e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 11 Jul 2024 10:11:13 -0700 Subject: efi: Replace efi_memory_attributes_table_t 0-sized array with flexible array While efi_memory_attributes_table_t::entry isn't used directly as an array, it is used as a base for pointer arithmetic. The type is wrong as it's not technically an array of efi_memory_desc_t's; they could be larger. Regardless, leave the type unchanged and remove the old style "0" array size. Additionally replace the open-coded entry offset code with the existing efi_memdesc_ptr() helper. Signed-off-by: Kees Cook Signed-off-by: Ard Biesheuvel --- include/linux/efi.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/efi.h b/include/linux/efi.h index 67b2fa80c8b9..6bf3c4fe8511 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -608,7 +608,11 @@ typedef struct { u32 num_entries; u32 desc_size; u32 flags; - efi_memory_desc_t entry[0]; + /* + * There are @num_entries following, each of size @desc_size bytes, + * including an efi_memory_desc_t header. See efi_memdesc_ptr(). + */ + efi_memory_desc_t entry[]; } efi_memory_attributes_table_t; typedef struct { -- cgit v1.2.3 From 79cf9c6ee44e0af7e1383365d29c64eece6665bb Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Fri, 28 Jun 2024 15:29:42 +0200 Subject: media: uapi: pisp_be_config: Drop BIT() from uAPI The pisp_be_config.h uAPI header file contains a bit-field definition that uses the BIT() helper macro. As the BIT() identifier is not defined in userspace, drop it from the uAPI header. Fixes: c6c49bac8770 ("media: uapi: Add Raspberry Pi PiSP Back End uAPI") Signed-off-by: Jacopo Mondi Reviewed-by: Tomi Valkeinen Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/uapi/linux/media/raspberrypi/pisp_be_config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/media/raspberrypi/pisp_be_config.h b/include/uapi/linux/media/raspberrypi/pisp_be_config.h index 1684ae068d4f..27d0cc417d6b 100644 --- a/include/uapi/linux/media/raspberrypi/pisp_be_config.h +++ b/include/uapi/linux/media/raspberrypi/pisp_be_config.h @@ -146,7 +146,7 @@ struct pisp_be_dpc_config { */ struct pisp_be_geq_config { __u16 offset; -#define PISP_BE_GEQ_SHARPER BIT(15) +#define PISP_BE_GEQ_SHARPER (1U << 15) #define PISP_BE_GEQ_SLOPE ((1 << 10) - 1) /* top bit is the "sharper" flag, slope value is bottom 10 bits */ __u16 slope_sharper; -- cgit v1.2.3 From 1991a09e6d7c1dff5efea65b5b31082332f2e64e Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Fri, 28 Jun 2024 15:29:43 +0200 Subject: media: uapi: pisp_common: Add 32 bpp format test Add definition and test for 32-bits image formats to the pisp_common.h uAPI header. Fixes: c6c49bac8770 ("media: uapi: Add Raspberry Pi PiSP Back End uAPI") Signed-off-by: Jacopo Mondi Acked-by: David Plowman Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/uapi/linux/media/raspberrypi/pisp_common.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/media/raspberrypi/pisp_common.h b/include/uapi/linux/media/raspberrypi/pisp_common.h index b2522e29c976..74d096188233 100644 --- a/include/uapi/linux/media/raspberrypi/pisp_common.h +++ b/include/uapi/linux/media/raspberrypi/pisp_common.h @@ -72,6 +72,8 @@ enum pisp_image_format { PISP_IMAGE_FORMAT_SHIFT_8 = 0x00080000, PISP_IMAGE_FORMAT_SHIFT_MASK = 0x000f0000, + PISP_IMAGE_FORMAT_BPP_32 = 0x00100000, + PISP_IMAGE_FORMAT_UNCOMPRESSED = 0x00000000, PISP_IMAGE_FORMAT_COMPRESSION_MODE_1 = 0x01000000, PISP_IMAGE_FORMAT_COMPRESSION_MODE_2 = 0x02000000, @@ -134,6 +136,7 @@ enum pisp_image_format { PISP_IMAGE_FORMAT_PLANARITY_PLANAR) #define PISP_IMAGE_FORMAT_wallpaper(fmt) \ ((fmt) & PISP_IMAGE_FORMAT_WALLPAPER_ROLL) +#define PISP_IMAGE_FORMAT_bpp_32(fmt) ((fmt) & PISP_IMAGE_FORMAT_BPP_32) #define PISP_IMAGE_FORMAT_HOG(fmt) \ ((fmt) & \ (PISP_IMAGE_FORMAT_HOG_SIGNED | PISP_IMAGE_FORMAT_HOG_UNSIGNED)) -- cgit v1.2.3 From f5cee94f2dfe5b7625452b831f7629369ce68a7b Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Fri, 28 Jun 2024 15:29:44 +0200 Subject: media: uapi: pisp_common: Capitalize all macros The macro used to inspect an image format characteristic use a mixture of capitalized and non-capitalized letters, which is rather unusual for the Linux kernel style. Capitalize all identifiers. Fixes: c6c49bac8770 ("media: uapi: Add Raspberry Pi PiSP Back End uAPI") Signed-off-by: Jacopo Mondi Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/uapi/linux/media/raspberrypi/pisp_common.h | 38 +++++++++++----------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/media/raspberrypi/pisp_common.h b/include/uapi/linux/media/raspberrypi/pisp_common.h index 74d096188233..cbdccfed1261 100644 --- a/include/uapi/linux/media/raspberrypi/pisp_common.h +++ b/include/uapi/linux/media/raspberrypi/pisp_common.h @@ -92,51 +92,51 @@ enum pisp_image_format { PISP_IMAGE_FORMAT_THREE_CHANNEL }; -#define PISP_IMAGE_FORMAT_bps_8(fmt) \ +#define PISP_IMAGE_FORMAT_BPS_8(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_8) -#define PISP_IMAGE_FORMAT_bps_10(fmt) \ +#define PISP_IMAGE_FORMAT_BPS_10(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_10) -#define PISP_IMAGE_FORMAT_bps_12(fmt) \ +#define PISP_IMAGE_FORMAT_BPS_12(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_12) -#define PISP_IMAGE_FORMAT_bps_16(fmt) \ +#define PISP_IMAGE_FORMAT_BPS_16(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_16) -#define PISP_IMAGE_FORMAT_bps(fmt) \ +#define PISP_IMAGE_FORMAT_BPS(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) ? \ 8 + (2 << (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) - 1)) : 8) -#define PISP_IMAGE_FORMAT_shift(fmt) \ +#define PISP_IMAGE_FORMAT_SHIFT(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_SHIFT_MASK) / PISP_IMAGE_FORMAT_SHIFT_1) -#define PISP_IMAGE_FORMAT_three_channel(fmt) \ +#define PISP_IMAGE_FORMAT_THREE_CHANNEL(fmt) \ ((fmt) & PISP_IMAGE_FORMAT_THREE_CHANNEL) -#define PISP_IMAGE_FORMAT_single_channel(fmt) \ +#define PISP_IMAGE_FORMAT_SINGLE_CHANNEL(fmt) \ (!((fmt) & PISP_IMAGE_FORMAT_THREE_CHANNEL)) -#define PISP_IMAGE_FORMAT_compressed(fmt) \ +#define PISP_IMAGE_FORMAT_COMPRESSED(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_COMPRESSION_MASK) != \ PISP_IMAGE_FORMAT_UNCOMPRESSED) -#define PISP_IMAGE_FORMAT_sampling_444(fmt) \ +#define PISP_IMAGE_FORMAT_SAMPLING_444(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_SAMPLING_MASK) == \ PISP_IMAGE_FORMAT_SAMPLING_444) -#define PISP_IMAGE_FORMAT_sampling_422(fmt) \ +#define PISP_IMAGE_FORMAT_SAMPLING_422(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_SAMPLING_MASK) == \ PISP_IMAGE_FORMAT_SAMPLING_422) -#define PISP_IMAGE_FORMAT_sampling_420(fmt) \ +#define PISP_IMAGE_FORMAT_SAMPLING_420(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_SAMPLING_MASK) == \ PISP_IMAGE_FORMAT_SAMPLING_420) -#define PISP_IMAGE_FORMAT_order_normal(fmt) \ +#define PISP_IMAGE_FORMAT_ORDER_NORMAL(fmt) \ (!((fmt) & PISP_IMAGE_FORMAT_ORDER_SWAPPED)) -#define PISP_IMAGE_FORMAT_order_swapped(fmt) \ +#define PISP_IMAGE_FORMAT_ORDER_SWAPPED(fmt) \ ((fmt) & PISP_IMAGE_FORMAT_ORDER_SWAPPED) -#define PISP_IMAGE_FORMAT_interleaved(fmt) \ +#define PISP_IMAGE_FORMAT_INTERLEAVED(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_PLANARITY_MASK) == \ PISP_IMAGE_FORMAT_PLANARITY_INTERLEAVED) -#define PISP_IMAGE_FORMAT_semiplanar(fmt) \ +#define PISP_IMAGE_FORMAT_SEMIPLANAR(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_PLANARITY_MASK) == \ PISP_IMAGE_FORMAT_PLANARITY_SEMI_PLANAR) -#define PISP_IMAGE_FORMAT_planar(fmt) \ +#define PISP_IMAGE_FORMAT_PLANAR(fmt) \ (((fmt) & PISP_IMAGE_FORMAT_PLANARITY_MASK) == \ PISP_IMAGE_FORMAT_PLANARITY_PLANAR) -#define PISP_IMAGE_FORMAT_wallpaper(fmt) \ +#define PISP_IMAGE_FORMAT_WALLPAPER(fmt) \ ((fmt) & PISP_IMAGE_FORMAT_WALLPAPER_ROLL) -#define PISP_IMAGE_FORMAT_bpp_32(fmt) ((fmt) & PISP_IMAGE_FORMAT_BPP_32) +#define PISP_IMAGE_FORMAT_BPP_32(fmt) ((fmt) & PISP_IMAGE_FORMAT_BPP_32) #define PISP_IMAGE_FORMAT_HOG(fmt) \ ((fmt) & \ (PISP_IMAGE_FORMAT_HOG_SIGNED | PISP_IMAGE_FORMAT_HOG_UNSIGNED)) -- cgit v1.2.3 From 639065c621df9bad9d94373084e1c568f81d34e0 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Fri, 28 Jun 2024 15:29:45 +0200 Subject: media: uapi: pisp_be_config: Re-sort pisp_be_tiles_config The order of the members of pisp_be_tiles_config is relevant as the driver logic assumes 'config' to be at offset 0. Re-sort the member to match the driver's expectations. Fixes: c6c49bac8770 ("media: uapi: Add Raspberry Pi PiSP Back End uAPI") Signed-off-by: Jacopo Mondi Acked-by: Naushir Patuck Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/uapi/linux/media/raspberrypi/pisp_be_config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/media/raspberrypi/pisp_be_config.h b/include/uapi/linux/media/raspberrypi/pisp_be_config.h index 27d0cc417d6b..f8650ca92bf8 100644 --- a/include/uapi/linux/media/raspberrypi/pisp_be_config.h +++ b/include/uapi/linux/media/raspberrypi/pisp_be_config.h @@ -919,9 +919,9 @@ struct pisp_tile { * @config: PiSP Back End configuration */ struct pisp_be_tiles_config { + struct pisp_be_config config; struct pisp_tile tiles[PISP_BACK_END_NUM_TILES]; __u32 num_tiles; - struct pisp_be_config config; } __attribute__((packed)); #endif /* _UAPI_PISP_BE_CONFIG_H_ */ -- cgit v1.2.3 From 1c2c57bd439ecaffc728139a0a00701bee886d0a Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Fri, 28 Jun 2024 15:29:46 +0200 Subject: media: uapi: pisp_be_config: Add extra config fields Complete the pisp_be_config strcture by adding fields that even if not written to the HW are relevant to complete the uAPI and put it in par with the BSP driver. Fixes: c6c49bac8770 ("media: uapi: Add Raspberry Pi PiSP Back End uAPI") Signed-off-by: Jacopo Mondi Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- .../uapi/linux/media/raspberrypi/pisp_be_config.h | 41 ++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/media/raspberrypi/pisp_be_config.h b/include/uapi/linux/media/raspberrypi/pisp_be_config.h index f8650ca92bf8..cbeb714f4d61 100644 --- a/include/uapi/linux/media/raspberrypi/pisp_be_config.h +++ b/include/uapi/linux/media/raspberrypi/pisp_be_config.h @@ -716,6 +716,13 @@ struct pisp_be_hog_buffer_config { /** * struct pisp_be_config - RaspberryPi PiSP Back End Processing configuration * + * @input_buffer: Input buffer addresses + * @tdn_input_buffer: TDN input buffer addresses + * @stitch_input_buffer: Stitch input buffer addresses + * @tdn_output_buffer: TDN output buffer addresses + * @stitch_output_buffer: Stitch output buffer addresses + * @output_buffer: Output buffers addresses + * @hog_buffer: HOG buffer addresses * @global: Global PiSP configuration * @input_format: Input image format * @decompress: Decompress configuration @@ -753,8 +760,30 @@ struct pisp_be_hog_buffer_config { * @resample: Resampling configuration * @output_format: Output format configuration * @hog: HOG configuration + * @axi: AXI bus configuration + * @lsc_extra: LSC extra info + * @cac_extra: CAC extra info + * @downscale_extra: Downscaler extra info + * @resample_extra: Resample extra info + * @crop: Crop configuration + * @hog_format: HOG format info + * @dirty_flags_bayer: Bayer enable dirty flags + * (:c:type:`pisp_be_bayer_enable`) + * @dirty_flags_rgb: RGB enable dirty flags + * (:c:type:`pisp_be_rgb_enable`) + * @dirty_flags_extra: Extra dirty flags */ struct pisp_be_config { + /* I/O configuration: */ + struct pisp_be_input_buffer_config input_buffer; + struct pisp_be_tdn_input_buffer_config tdn_input_buffer; + struct pisp_be_stitch_input_buffer_config stitch_input_buffer; + struct pisp_be_tdn_output_buffer_config tdn_output_buffer; + struct pisp_be_stitch_output_buffer_config stitch_output_buffer; + struct pisp_be_output_buffer_config + output_buffer[PISP_BACK_END_NUM_OUTPUTS]; + struct pisp_be_hog_buffer_config hog_buffer; + /* Processing configuration: */ struct pisp_be_global_config global; struct pisp_image_format_config input_format; struct pisp_decompress_config decompress; @@ -793,6 +822,18 @@ struct pisp_be_config { struct pisp_be_output_format_config output_format[PISP_BACK_END_NUM_OUTPUTS]; struct pisp_be_hog_config hog; + struct pisp_be_axi_config axi; + /* Non-register fields: */ + struct pisp_be_lsc_extra lsc_extra; + struct pisp_be_cac_extra cac_extra; + struct pisp_be_downscale_extra + downscale_extra[PISP_BACK_END_NUM_OUTPUTS]; + struct pisp_be_resample_extra resample_extra[PISP_BACK_END_NUM_OUTPUTS]; + struct pisp_be_crop_config crop; + struct pisp_image_format_config hog_format; + __u32 dirty_flags_bayer; /* these use pisp_be_bayer_enable */ + __u32 dirty_flags_rgb; /* use pisp_be_rgb_enable */ + __u32 dirty_flags_extra; /* these use pisp_be_dirty_t */ } __attribute__((packed)); /** -- cgit v1.2.3 From 0728c810873e1a94e8b767f9809af940c9307d60 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 2 Jul 2024 16:42:33 +0200 Subject: thermal: trip: Pass trip pointer to .set_trip_temp() thermal zone callback Out of several drivers implementing the .set_trip_temp() thermal zone operation, three don't actually use the trip ID argument passed to it, two call __thermal_zone_get_trip() to get a struct thermal_trip corresponding to the given trip ID, and the other use the trip ID as an index into their own data structures with the assumption that it will always match the ordering of entries in the trips table passed to the core during thermal zone registration, which is fragile and not really guaranteed. Even though the trip IDs used by the core are in fact their indices in the trips table passed to it by the thermal zone creator, that is purely a matter of convenience and should not be relied on for correctness. For this reason, modify trip_point_temp_store() to pass a (const) trip pointer to .set_trip_temp() and adjust the drivers implementing it accordingly. This helps to simplify the drivers invoking __thermal_zone_get_trip() from their .set_trip_temp() callback functions because they will not need to do it now and the other drivers can store their internal trip indices in the priv field in struct thermal_trip and their .set_trip_temp() callback functions can get those indices from there. The intel_quark_dts thermal driver can instead use the trip type to determine the requisite trip index. Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/8392906.T7Z3S40VBb@rjwysocki.net [ rjw: Add missing colon and 2 empty code lines ] [ rjw: Add missing change in imx_thermal.c and adjust the changelog ] [ rjw: Drop an unused local variable ] Signed-off-by: Rafael J. Wysocki --- include/linux/thermal.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 3bd1b361ec34..cdd9c722cda2 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -93,7 +93,8 @@ struct thermal_zone_device_ops { int (*set_trips) (struct thermal_zone_device *, int, int); int (*change_mode) (struct thermal_zone_device *, enum thermal_device_mode); - int (*set_trip_temp) (struct thermal_zone_device *, int, int); + int (*set_trip_temp) (struct thermal_zone_device *, + const struct thermal_trip *, int); int (*get_crit_temp) (struct thermal_zone_device *, int *); int (*set_emul_temp) (struct thermal_zone_device *, int); int (*get_trend) (struct thermal_zone_device *, -- cgit v1.2.3 From 5b674baa596e624fde8bf62b9a3d8a26eef399b2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 2 Jul 2024 16:44:27 +0200 Subject: thermal: trip: Fold __thermal_zone_get_trip() into its caller Because __thermal_zone_get_trip() is only called by thermal_zone_get_trip() now, fold the former into the latter. Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/22339769.EfDdHjke4D@rjwysocki.net --- include/linux/thermal.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index cdd9c722cda2..25fbf960b474 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -202,8 +202,6 @@ static inline void devm_thermal_of_zone_unregister(struct device *dev, } #endif -int __thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id, - struct thermal_trip *trip); int thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id, struct thermal_trip *trip); int for_each_thermal_trip(struct thermal_zone_device *tz, -- cgit v1.2.3 From 4bdc3eaa102b6bedb0800f76f53eca516d5cf20c Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Wed, 10 Jul 2024 16:35:21 +1200 Subject: clocksource/drivers/realtek: Add timer driver for rtl-otto platforms The timer/counter block on the Realtek SoCs provides up to 5 timers. It also includes a watchdog timer which is handled by the realtek_otto_wdt.c driver. One timer will be used per CPU as a local clock event generator. An additional timer will be used as an overal stable clocksource. Signed-off-by: Markus Stockhausen Signed-off-by: Sander Vanheule Signed-off-by: Chris Packham Link: https://lore.kernel.org/r/20240710043524.1535151-8-chris.packham@alliedtelesis.co.nz Signed-off-by: Daniel Lezcano --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 7a5785f405b6..56b744dc1317 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -171,6 +171,7 @@ enum cpuhp_state { CPUHP_AP_ARMADA_TIMER_STARTING, CPUHP_AP_MIPS_GIC_TIMER_STARTING, CPUHP_AP_ARC_TIMER_STARTING, + CPUHP_AP_REALTEK_TIMER_STARTING, CPUHP_AP_RISCV_TIMER_STARTING, CPUHP_AP_CLINT_TIMER_STARTING, CPUHP_AP_CSKY_TIMER_STARTING, -- cgit v1.2.3 From 75ed63a5ab5d1d2872c735bc7edf8fef0e2fa2ea Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Wed, 10 Jul 2024 14:42:37 +0800 Subject: ASoC: tas2781: Add new Kontrol to set tas2563 digital Volume Requriment from customer to add new kcontrol to set tas2563 digital Volume Signed-off-by: Shenghao Ding Link: https://patch.msgid.link/20240710064238.1480-1-shenghao-ding@ti.com Signed-off-by: Mark Brown --- include/sound/tas2781-tlv.h | 262 +++++++++++++++++++++++++++++++++++++++++++- include/sound/tas2781.h | 1 + 2 files changed, 262 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/tas2781-tlv.h b/include/sound/tas2781-tlv.h index 1dc59005d241..99c41bfc7827 100644 --- a/include/sound/tas2781-tlv.h +++ b/include/sound/tas2781-tlv.h @@ -2,7 +2,7 @@ // // ALSA SoC Texas Instruments TAS2781 Audio Smart Amplifier // -// Copyright (C) 2022 - 2023 Texas Instruments Incorporated +// Copyright (C) 2022 - 2024 Texas Instruments Incorporated // https://www.ti.com // // The TAS2781 driver implements a flexible and configurable @@ -17,5 +17,265 @@ static const __maybe_unused DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 100, 0); static const DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0); +static const DECLARE_TLV_DB_SCALE(tas2563_dvc_tlv, -12150, 50, 1); +/* pow(10, db/20) * pow(2,30) */ +static const unsigned char tas2563_dvc_table[][4] = { + { 0X00, 0X00, 0X00, 0X00 }, /* -121.5db */ + { 0X00, 0X00, 0X03, 0XBC }, /* -121.0db */ + { 0X00, 0X00, 0X03, 0XF5 }, /* -120.5db */ + { 0X00, 0X00, 0X04, 0X31 }, /* -120.0db */ + { 0X00, 0X00, 0X04, 0X71 }, /* -119.5db */ + { 0X00, 0X00, 0X04, 0XB4 }, /* -119.0db */ + { 0X00, 0X00, 0X04, 0XFC }, /* -118.5db */ + { 0X00, 0X00, 0X05, 0X47 }, /* -118.0db */ + { 0X00, 0X00, 0X05, 0X97 }, /* -117.5db */ + { 0X00, 0X00, 0X05, 0XEC }, /* -117.0db */ + { 0X00, 0X00, 0X06, 0X46 }, /* -116.5db */ + { 0X00, 0X00, 0X06, 0XA5 }, /* -116.0db */ + { 0X00, 0X00, 0X07, 0X0A }, /* -115.5db */ + { 0X00, 0X00, 0X07, 0X75 }, /* -115.0db */ + { 0X00, 0X00, 0X07, 0XE6 }, /* -114.5db */ + { 0X00, 0X00, 0X08, 0X5E }, /* -114.0db */ + { 0X00, 0X00, 0X08, 0XDD }, /* -113.5db */ + { 0X00, 0X00, 0X09, 0X63 }, /* -113.0db */ + { 0X00, 0X00, 0X09, 0XF2 }, /* -112.5db */ + { 0X00, 0X00, 0X0A, 0X89 }, /* -112.0db */ + { 0X00, 0X00, 0X0B, 0X28 }, /* -111.5db */ + { 0X00, 0X00, 0X0B, 0XD2 }, /* -111.0db */ + { 0X00, 0X00, 0X0C, 0X85 }, /* -110.5db */ + { 0X00, 0X00, 0X0D, 0X43 }, /* -110.0db */ + { 0X00, 0X00, 0X0E, 0X0C }, /* -109.5db */ + { 0X00, 0X00, 0X0E, 0XE1 }, /* -109.0db */ + { 0X00, 0X00, 0X0F, 0XC3 }, /* -108.5db */ + { 0X00, 0X00, 0X10, 0XB2 }, /* -108.0db */ + { 0X00, 0X00, 0X11, 0XAF }, /* -107.5db */ + { 0X00, 0X00, 0X12, 0XBC }, /* -107.0db */ + { 0X00, 0X00, 0X13, 0XD8 }, /* -106.5db */ + { 0X00, 0X00, 0X15, 0X05 }, /* -106.0db */ + { 0X00, 0X00, 0X16, 0X44 }, /* -105.5db */ + { 0X00, 0X00, 0X17, 0X96 }, /* -105.0db */ + { 0X00, 0X00, 0X18, 0XFB }, /* -104.5db */ + { 0X00, 0X00, 0X1A, 0X76 }, /* -104.0db */ + { 0X00, 0X00, 0X1C, 0X08 }, /* -103.5db */ + { 0X00, 0X00, 0X1D, 0XB1 }, /* -103.0db */ + { 0X00, 0X00, 0X1F, 0X73 }, /* -102.5db */ + { 0X00, 0X00, 0X21, 0X51 }, /* -102.0db */ + { 0X00, 0X00, 0X23, 0X4A }, /* -101.5db */ + { 0X00, 0X00, 0X25, 0X61 }, /* -101.0db */ + { 0X00, 0X00, 0X27, 0X98 }, /* -100.5db */ + { 0X00, 0X00, 0X29, 0XF1 }, /* -100.0db */ + { 0X00, 0X00, 0X2C, 0X6D }, /* -99.5db */ + { 0X00, 0X00, 0X2F, 0X0F }, /* -99.0db */ + { 0X00, 0X00, 0X31, 0XD9 }, /* -98.5db */ + { 0X00, 0X00, 0X34, 0XCD }, /* -98.0db */ + { 0X00, 0X00, 0X37, 0XEE }, /* -97.5db */ + { 0X00, 0X00, 0X3B, 0X3F }, /* -97.0db */ + { 0X00, 0X00, 0X3E, 0XC1 }, /* -96.5db */ + { 0X00, 0X00, 0X42, 0X79 }, /* -96.0db */ + { 0X00, 0X00, 0X46, 0X6A }, /* -95.5db */ + { 0X00, 0X00, 0X4A, 0X96 }, /* -95.0db */ + { 0X00, 0X00, 0X4F, 0X01 }, /* -94.5db */ + { 0X00, 0X00, 0X53, 0XAF }, /* -94.0db */ + { 0X00, 0X00, 0X58, 0XA5 }, /* -93.5db */ + { 0X00, 0X00, 0X5D, 0XE6 }, /* -93.0db */ + { 0X00, 0X00, 0X63, 0X76 }, /* -92.5db */ + { 0X00, 0X00, 0X69, 0X5B }, /* -92.0db */ + { 0X00, 0X00, 0X6F, 0X99 }, /* -91.5db */ + { 0X00, 0X00, 0X76, 0X36 }, /* -91.0db */ + { 0X00, 0X00, 0X7D, 0X37 }, /* -90.5db */ + { 0X00, 0X00, 0X84, 0XA2 }, /* -90.0db */ + { 0X00, 0X00, 0X8C, 0X7E }, /* -89.5db */ + { 0X00, 0X00, 0X94, 0XD1 }, /* -89.0db */ + { 0X00, 0X00, 0X9D, 0XA3 }, /* -88.5db */ + { 0X00, 0X00, 0XA6, 0XFA }, /* -88.0db */ + { 0X00, 0X00, 0XB0, 0XDF }, /* -87.5db */ + { 0X00, 0X00, 0XBB, 0X5A }, /* -87.0db */ + { 0X00, 0X00, 0XC6, 0X74 }, /* -86.5db */ + { 0X00, 0X00, 0XD2, 0X36 }, /* -86.0db */ + { 0X00, 0X00, 0XDE, 0XAB }, /* -85.5db */ + { 0X00, 0X00, 0XEB, 0XDC }, /* -85.0db */ + { 0X00, 0X00, 0XF9, 0XD6 }, /* -84.5db */ + { 0X00, 0X01, 0X08, 0XA4 }, /* -84.0db */ + { 0X00, 0X01, 0X18, 0X52 }, /* -83.5db */ + { 0X00, 0X01, 0X28, 0XEF }, /* -83.0db */ + { 0X00, 0X01, 0X3A, 0X87 }, /* -82.5db */ + { 0X00, 0X01, 0X4D, 0X2A }, /* -82.0db */ + { 0X00, 0X01, 0X60, 0XE8 }, /* -81.5db */ + { 0X00, 0X01, 0X75, 0XD1 }, /* -81.0db */ + { 0X00, 0X01, 0X8B, 0XF7 }, /* -80.5db */ + { 0X00, 0X01, 0XA3, 0X6E }, /* -80.0db */ + { 0X00, 0X01, 0XBC, 0X48 }, /* -79.5db */ + { 0X00, 0X01, 0XD6, 0X9B }, /* -79.0db */ + { 0X00, 0X01, 0XF2, 0X7E }, /* -78.5db */ + { 0X00, 0X02, 0X10, 0X08 }, /* -78.0db */ + { 0X00, 0X02, 0X2F, 0X51 }, /* -77.5db */ + { 0X00, 0X02, 0X50, 0X76 }, /* -77.0db */ + { 0X00, 0X02, 0X73, 0X91 }, /* -76.5db */ + { 0X00, 0X02, 0X98, 0XC0 }, /* -76.0db */ + { 0X00, 0X02, 0XC0, 0X24 }, /* -75.5db */ + { 0X00, 0X02, 0XE9, 0XDD }, /* -75.0db */ + { 0X00, 0X03, 0X16, 0X0F }, /* -74.5db */ + { 0X00, 0X03, 0X44, 0XDF }, /* -74.0db */ + { 0X00, 0X03, 0X76, 0X76 }, /* -73.5db */ + { 0X00, 0X03, 0XAA, 0XFC }, /* -73.0db */ + { 0X00, 0X03, 0XE2, 0XA0 }, /* -72.5db */ + { 0X00, 0X04, 0X1D, 0X8F }, /* -72.0db */ + { 0X00, 0X04, 0X5B, 0XFD }, /* -71.5db */ + { 0X00, 0X04, 0X9E, 0X1D }, /* -71.0db */ + { 0X00, 0X04, 0XE4, 0X29 }, /* -70.5db */ + { 0X00, 0X05, 0X2E, 0X5A }, /* -70.0db */ + { 0X00, 0X05, 0X7C, 0XF2 }, /* -69.5db */ + { 0X00, 0X05, 0XD0, 0X31 }, /* -69.0db */ + { 0X00, 0X06, 0X28, 0X60 }, /* -68.5db */ + { 0X00, 0X06, 0X85, 0XC8 }, /* -68.0db */ + { 0X00, 0X06, 0XE8, 0XB9 }, /* -67.5db */ + { 0X00, 0X07, 0X51, 0X86 }, /* -67.0db */ + { 0X00, 0X07, 0XC0, 0X8A }, /* -66.5db */ + { 0X00, 0X08, 0X36, 0X21 }, /* -66.0db */ + { 0X00, 0X08, 0XB2, 0XB0 }, /* -65.5db */ + { 0X00, 0X09, 0X36, 0XA1 }, /* -65.0db */ + { 0X00, 0X09, 0XC2, 0X63 }, /* -64.5db */ + { 0X00, 0X0A, 0X56, 0X6D }, /* -64.0db */ + { 0X00, 0X0A, 0XF3, 0X3C }, /* -63.5db */ + { 0X00, 0X0B, 0X99, 0X56 }, /* -63.0db */ + { 0X00, 0X0C, 0X49, 0X48 }, /* -62.5db */ + { 0X00, 0X0D, 0X03, 0XA7 }, /* -62.0db */ + { 0X00, 0X0D, 0XC9, 0X11 }, /* -61.5db */ + { 0X00, 0X0E, 0X9A, 0X2D }, /* -61.0db */ + { 0X00, 0X0F, 0X77, 0XAD }, /* -60.5db */ + { 0X00, 0X10, 0X62, 0X4D }, /* -60.0db */ + { 0X00, 0X11, 0X5A, 0XD5 }, /* -59.5db */ + { 0X00, 0X12, 0X62, 0X16 }, /* -59.0db */ + { 0X00, 0X13, 0X78, 0XF0 }, /* -58.5db */ + { 0X00, 0X14, 0XA0, 0X50 }, /* -58.0db */ + { 0X00, 0X15, 0XD9, 0X31 }, /* -57.5db */ + { 0X00, 0X17, 0X24, 0X9C }, /* -57.0db */ + { 0X00, 0X18, 0X83, 0XAA }, /* -56.5db */ + { 0X00, 0X19, 0XF7, 0X86 }, /* -56.0db */ + { 0X00, 0X1B, 0X81, 0X6A }, /* -55.5db */ + { 0X00, 0X1D, 0X22, 0XA4 }, /* -55.0db */ + { 0X00, 0X1E, 0XDC, 0X98 }, /* -54.5db */ + { 0X00, 0X20, 0XB0, 0XBC }, /* -54.0db */ + { 0X00, 0X22, 0XA0, 0X9D }, /* -53.5db */ + { 0X00, 0X24, 0XAD, 0XE0 }, /* -53.0db */ + { 0X00, 0X26, 0XDA, 0X43 }, /* -52.5db */ + { 0X00, 0X29, 0X27, 0X9D }, /* -52.0db */ + { 0X00, 0X2B, 0X97, 0XE3 }, /* -51.5db */ + { 0X00, 0X2E, 0X2D, 0X27 }, /* -51.0db */ + { 0X00, 0X30, 0XE9, 0X9A }, /* -50.5db */ + { 0X00, 0X33, 0XCF, 0X8D }, /* -50.0db */ + { 0X00, 0X36, 0XE1, 0X78 }, /* -49.5db */ + { 0X00, 0X3A, 0X21, 0XF3 }, /* -49.0db */ + { 0X00, 0X3D, 0X93, 0XC3 }, /* -48.5db */ + { 0X00, 0X41, 0X39, 0XD3 }, /* -48.0db */ + { 0X00, 0X45, 0X17, 0X3B }, /* -47.5db */ + { 0X00, 0X49, 0X2F, 0X44 }, /* -47.0db */ + { 0X00, 0X4D, 0X85, 0X66 }, /* -46.5db */ + { 0X00, 0X52, 0X1D, 0X50 }, /* -46.0db */ + { 0X00, 0X56, 0XFA, 0XE8 }, /* -45.5db */ + { 0X00, 0X5C, 0X22, 0X4E }, /* -45.0db */ + { 0X00, 0X61, 0X97, 0XE1 }, /* -44.5db */ + { 0X00, 0X67, 0X60, 0X44 }, /* -44.0db */ + { 0X00, 0X6D, 0X80, 0X60 }, /* -43.5db */ + { 0X00, 0X73, 0XFD, 0X65 }, /* -43.0db */ + { 0X00, 0X7A, 0XDC, 0XD7 }, /* -42.5db */ + { 0X00, 0X82, 0X24, 0X8A }, /* -42.0db */ + { 0X00, 0X89, 0XDA, 0XAB }, /* -41.5db */ + { 0X00, 0X92, 0X05, 0XC6 }, /* -41.0db */ + { 0X00, 0X9A, 0XAC, 0XC8 }, /* -40.5db */ + { 0X00, 0XA3, 0XD7, 0X0A }, /* -40.0db */ + { 0X00, 0XAD, 0X8C, 0X52 }, /* -39.5db */ + { 0X00, 0XB7, 0XD4, 0XDD }, /* -39.0db */ + { 0X00, 0XC2, 0XB9, 0X65 }, /* -38.5db */ + { 0X00, 0XCE, 0X43, 0X28 }, /* -38.0db */ + { 0X00, 0XDA, 0X7B, 0XF1 }, /* -37.5db */ + { 0X00, 0XE7, 0X6E, 0X1E }, /* -37.0db */ + { 0X00, 0XF5, 0X24, 0XAC }, /* -36.5db */ + { 0X01, 0X03, 0XAB, 0X3D }, /* -36.0db */ + { 0X01, 0X13, 0X0E, 0X24 }, /* -35.5db */ + { 0X01, 0X23, 0X5A, 0X71 }, /* -35.0db */ + { 0X01, 0X34, 0X9D, 0XF8 }, /* -34.5db */ + { 0X01, 0X46, 0XE7, 0X5D }, /* -34.0db */ + { 0X01, 0X5A, 0X46, 0X27 }, /* -33.5db */ + { 0X01, 0X6E, 0XCA, 0XC5 }, /* -33.0db */ + { 0X01, 0X84, 0X86, 0X9F }, /* -32.5db */ + { 0X01, 0X9B, 0X8C, 0X27 }, /* -32.0db */ + { 0X01, 0XB3, 0XEE, 0XE5 }, /* -31.5db */ + { 0X01, 0XCD, 0XC3, 0X8C }, /* -31.0db */ + { 0X01, 0XE9, 0X20, 0X05 }, /* -30.5db */ + { 0X02, 0X06, 0X1B, 0X89 }, /* -30.0db */ + { 0X02, 0X24, 0XCE, 0XB0 }, /* -29.5db */ + { 0X02, 0X45, 0X53, 0X85 }, /* -29.0db */ + { 0X02, 0X67, 0XC5, 0XA2 }, /* -28.5db */ + { 0X02, 0X8C, 0X42, 0X3F }, /* -28.0db */ + { 0X02, 0XB2, 0XE8, 0X55 }, /* -27.5db */ + { 0X02, 0XDB, 0XD8, 0XAD }, /* -27.0db */ + { 0X03, 0X07, 0X36, 0X05 }, /* -26.5db */ + { 0X03, 0X35, 0X25, 0X29 }, /* -26.0db */ + { 0X03, 0X65, 0XCD, 0X13 }, /* -25.5db */ + { 0X03, 0X99, 0X57, 0X0C }, /* -25.0db */ + { 0X03, 0XCF, 0XEE, 0XCF }, /* -24.5db */ + { 0X04, 0X09, 0XC2, 0XB0 }, /* -24.0db */ + { 0X04, 0X47, 0X03, 0XC1 }, /* -23.5db */ + { 0X04, 0X87, 0XE5, 0XFB }, /* -23.0db */ + { 0X04, 0XCC, 0XA0, 0X6D }, /* -22.5db */ + { 0X05, 0X15, 0X6D, 0X68 }, /* -22.0db */ + { 0X05, 0X62, 0X8A, 0XB3 }, /* -21.5db */ + { 0X05, 0XB4, 0X39, 0XBC }, /* -21.0db */ + { 0X06, 0X0A, 0XBF, 0XD4 }, /* -20.5db */ + { 0X06, 0X66, 0X66, 0X66 }, /* -20.0db */ + { 0X06, 0XC7, 0X7B, 0X36 }, /* -19.5db */ + { 0X07, 0X2E, 0X50, 0XA6 }, /* -19.0db */ + { 0X07, 0X9B, 0X3D, 0XF6 }, /* -18.5db */ + { 0X08, 0X0E, 0X9F, 0X96 }, /* -18.0db */ + { 0X08, 0X88, 0XD7, 0X6D }, /* -17.5db */ + { 0X09, 0X0A, 0X4D, 0X2F }, /* -17.0db */ + { 0X09, 0X93, 0X6E, 0XB8 }, /* -16.5db */ + { 0X0A, 0X24, 0XB0, 0X62 }, /* -16.0db */ + { 0X0A, 0XBE, 0X8D, 0X70 }, /* -15.5db */ + { 0X0B, 0X61, 0X88, 0X71 }, /* -15.0db */ + { 0X0C, 0X0E, 0X2B, 0XB0 }, /* -14.5db */ + { 0X0C, 0XC5, 0X09, 0XAB }, /* -14.0db */ + { 0X0D, 0X86, 0XBD, 0X8D }, /* -13.5db */ + { 0X0E, 0X53, 0XEB, 0XB3 }, /* -13.0db */ + { 0X0F, 0X2D, 0X42, 0X38 }, /* -12.5db */ + { 0X10, 0X13, 0X79, 0X87 }, /* -12.0db */ + { 0X11, 0X07, 0X54, 0XF9 }, /* -11.5db */ + { 0X12, 0X09, 0XA3, 0X7A }, /* -11.0db */ + { 0X13, 0X1B, 0X40, 0X39 }, /* -10.5db */ + { 0X14, 0X3D, 0X13, 0X62 }, /* -10.0db */ + { 0X15, 0X70, 0X12, 0XE1 }, /* -9.5db */ + { 0X16, 0XB5, 0X43, 0X37 }, /* -9.0db */ + { 0X18, 0X0D, 0XB8, 0X54 }, /* -8.5db */ + { 0X19, 0X7A, 0X96, 0X7F }, /* -8.0db */ + { 0X1A, 0XFD, 0X13, 0X54 }, /* -7.5db */ + { 0X1C, 0X96, 0X76, 0XC6 }, /* -7.0db */ + { 0X1E, 0X48, 0X1C, 0X37 }, /* -6.5db */ + { 0X20, 0X13, 0X73, 0X9E }, /* -6.0db */ + { 0X21, 0XFA, 0X02, 0XBF }, /* -5.5db */ + { 0X23, 0XFD, 0X66, 0X78 }, /* -5.0db */ + { 0X26, 0X1F, 0X54, 0X1C }, /* -4.5db */ + { 0X28, 0X61, 0X9A, 0XE9 }, /* -4.0db */ + { 0X2A, 0XC6, 0X25, 0X91 }, /* -3.5db */ + { 0X2D, 0X4E, 0XFB, 0XD5 }, /* -3.0db */ + { 0X2F, 0XFE, 0X44, 0X48 }, /* -2.5db */ + { 0X32, 0XD6, 0X46, 0X17 }, /* -2.0db */ + { 0X35, 0XD9, 0X6B, 0X02 }, /* -1.5db */ + { 0X39, 0X0A, 0X41, 0X5F }, /* -1.0db */ + { 0X3C, 0X6B, 0X7E, 0X4F }, /* -0.5db */ + { 0X40, 0X00, 0X00, 0X00 }, /* 0.0db */ + { 0X43, 0XCA, 0XD0, 0X22 }, /* 0.5db */ + { 0X47, 0XCF, 0X26, 0X7D }, /* 1.0db */ + { 0X4C, 0X10, 0X6B, 0XA5 }, /* 1.5db */ + { 0X50, 0X92, 0X3B, 0XE3 }, /* 2.0db */ + { 0X55, 0X58, 0X6A, 0X46 }, /* 2.5db */ + { 0X5A, 0X67, 0X03, 0XDF }, /* 3.0db */ + { 0X5F, 0XC2, 0X53, 0X32 }, /* 3.5db */ + { 0X65, 0X6E, 0XE3, 0XDB }, /* 4.0db */ + { 0X6B, 0X71, 0X86, 0X68 }, /* 4.5db */ + { 0X71, 0XCF, 0X54, 0X71 }, /* 5.0db */ + { 0X78, 0X8D, 0XB4, 0XE9 }, /* 5.5db */ + { 0XFF, 0XFF, 0XFF, 0XFF }, /* 6.0db */ +}; #endif diff --git a/include/sound/tas2781.h b/include/sound/tas2781.h index a43ad6dcb7c7..18161d02a96f 100644 --- a/include/sound/tas2781.h +++ b/include/sound/tas2781.h @@ -50,6 +50,7 @@ #define TASDEVICE_I2CChecksum TASDEVICE_REG(0x0, 0x0, 0x7E) /* Volume control */ +#define TAS2563_DVC_LVL TASDEVICE_REG(0x00, 0x02, 0x0C) #define TAS2781_DVC_LVL TASDEVICE_REG(0x0, 0x0, 0x1A) #define TAS2781_AMP_LEVEL TASDEVICE_REG(0x0, 0x0, 0x03) #define TAS2781_AMP_LEVEL_MASK GENMASK(5, 1) -- cgit v1.2.3 From 27e6a24a4cf3d25421c0f6ebb7c39f45fc14d20f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 11 Jul 2024 13:56:54 -0400 Subject: mm, virt: merge AS_UNMOVABLE and AS_INACCESSIBLE The flags AS_UNMOVABLE and AS_INACCESSIBLE were both added just for guest_memfd; AS_UNMOVABLE is already in existing versions of Linux, while AS_INACCESSIBLE was acked for inclusion in 6.11. But really, they are the same thing: only guest_memfd uses them, at least for now, and guest_memfd pages are unmovable because they should not be accessed by the CPU. So merge them into one; use the AS_INACCESSIBLE name which is more comprehensive. At the same time, this fixes an embarrassing bug where AS_INACCESSIBLE was used as a bit mask, despite it being just a bit index. The bug was mostly benign, because AS_INACCESSIBLE's bit representation (1010) corresponded to setting AS_UNEVICTABLE (which is already set) and AS_ENOSPC (except no async writes can happen on the guest_memfd). So the AS_INACCESSIBLE flag simply had no effect. Fixes: 1d23040caa8b ("KVM: guest_memfd: Use AS_INACCESSIBLE when creating guest_memfd inode") Fixes: c72ceafbd12c ("mm: Introduce AS_INACCESSIBLE for encrypted/confidential memory") Cc: linux-mm@kvack.org Acked-by: Vlastimil Babka Acked-by: David Hildenbrand Tested-by: Michael Roth Reviewed-by: Michael Roth Signed-off-by: Paolo Bonzini --- include/linux/pagemap.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ce7bac8f81da..e05585eda771 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -208,8 +208,8 @@ enum mapping_flags { AS_RELEASE_ALWAYS, /* Call ->release_folio(), even if no private data */ AS_STABLE_WRITES, /* must wait for writeback before modifying folio contents */ - AS_UNMOVABLE, /* The mapping cannot be moved, ever */ - AS_INACCESSIBLE, /* Do not attempt direct R/W access to the mapping */ + AS_INACCESSIBLE, /* Do not attempt direct R/W access to the mapping, + including to move the mapping */ }; /** @@ -310,20 +310,20 @@ static inline void mapping_clear_stable_writes(struct address_space *mapping) clear_bit(AS_STABLE_WRITES, &mapping->flags); } -static inline void mapping_set_unmovable(struct address_space *mapping) +static inline void mapping_set_inaccessible(struct address_space *mapping) { /* - * It's expected unmovable mappings are also unevictable. Compaction + * It's expected inaccessible mappings are also unevictable. Compaction * migrate scanner (isolate_migratepages_block()) relies on this to * reduce page locking. */ set_bit(AS_UNEVICTABLE, &mapping->flags); - set_bit(AS_UNMOVABLE, &mapping->flags); + set_bit(AS_INACCESSIBLE, &mapping->flags); } -static inline bool mapping_unmovable(struct address_space *mapping) +static inline bool mapping_inaccessible(struct address_space *mapping) { - return test_bit(AS_UNMOVABLE, &mapping->flags); + return test_bit(AS_INACCESSIBLE, &mapping->flags); } static inline gfp_t mapping_gfp_mask(struct address_space * mapping) -- cgit v1.2.3 From bc1a5cd002116552db4c3541e91f8a5b1b0cf65d Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Wed, 10 Apr 2024 15:07:28 -0700 Subject: KVM: Add KVM_PRE_FAULT_MEMORY vcpu ioctl to pre-populate guest memory Add a new ioctl KVM_PRE_FAULT_MEMORY in the KVM common code. It iterates on the memory range and calls the arch-specific function. The implementation is optional and enabled by a Kconfig symbol. Suggested-by: Sean Christopherson Signed-off-by: Isaku Yamahata Reviewed-by: Rick Edgecombe Message-ID: <819322b8f25971f2b9933bfa4506e618508ad782.1712785629.git.isaku.yamahata@intel.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 5 +++++ include/uapi/linux/kvm.h | 10 ++++++++++ 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7b57878c8c18..c3c922bf077f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2477,4 +2477,9 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end); #endif +#ifdef CONFIG_KVM_GENERIC_PRE_FAULT_MEMORY +long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, + struct kvm_pre_fault_memory *range); +#endif + #endif diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d03842abae57..e5af8c692dc0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -917,6 +917,7 @@ struct kvm_enable_cap { #define KVM_CAP_MEMORY_ATTRIBUTES 233 #define KVM_CAP_GUEST_MEMFD 234 #define KVM_CAP_VM_TYPES 235 +#define KVM_CAP_PRE_FAULT_MEMORY 236 struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1548,4 +1549,13 @@ struct kvm_create_guest_memfd { __u64 reserved[6]; }; +#define KVM_PRE_FAULT_MEMORY _IOWR(KVMIO, 0xd5, struct kvm_pre_fault_memory) + +struct kvm_pre_fault_memory { + __u64 gpa; + __u64 size; + __u64 flags; + __u64 padding[5]; +}; + #endif /* __LINUX_KVM_H */ -- cgit v1.2.3 From 9b2bc6b9a264b863a2273c02db5ee9e214e0a526 Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Fri, 12 Jul 2024 12:31:32 +0100 Subject: iommu: Move IOMMU_DIRTY_NO_CLEAR define Fixes the compile issue when CONFIG_IOMMU_API is not set. Fixes: 4fe88fd8b4ae ("iommu/io-pgtable-arm: Add read_and_clear_dirty() support") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202407121602.HL9ih1it-lkp@intel.com/ Signed-off-by: Shameer Kolothum Reviewed-by: Joao Martins Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240712113132.45100-1-shameerali.kolothum.thodi@huawei.com Signed-off-by: Will Deacon --- include/linux/iommu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 7bc8dff7cf6d..104ce84647d4 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -317,6 +317,9 @@ enum iommu_dev_features { #define IOMMU_PASID_INVALID (-1U) typedef unsigned int ioasid_t; +/* Read but do not clear any dirty bits */ +#define IOMMU_DIRTY_NO_CLEAR (1 << 0) + #ifdef CONFIG_IOMMU_API /** @@ -353,9 +356,6 @@ struct iommu_dirty_bitmap { struct iommu_iotlb_gather *gather; }; -/* Read but do not clear any dirty bits */ -#define IOMMU_DIRTY_NO_CLEAR (1 << 0) - /** * struct iommu_dirty_ops - domain specific dirty tracking operations * @set_dirty_tracking: Enable or Disable dirty tracking on the iommu domain -- cgit v1.2.3 From 2be32bbe6989e071aea1767cb54c7528a0dac07f Mon Sep 17 00:00:00 2001 From: Eric Chan Date: Thu, 11 Jul 2024 19:39:17 +0000 Subject: kunit: Fix the comment of KUNIT_ASSERT_STRNEQ as assertion The current comment for KUNIT_ASSERT_STRNEQ incorrectly describes it as an expectation. Since KUNIT_ASSERT_STRNEQ is an assertion, updates the comment to correctly refer to it as such. Signed-off-by: Eric Chan Reviewed-by: David Gow Signed-off-by: Shuah Khan --- include/kunit/test.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/kunit/test.h b/include/kunit/test.h index ec61cad6b71d..e37df1df5a92 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -1455,12 +1455,12 @@ do { \ ##__VA_ARGS__) /** - * KUNIT_ASSERT_STRNEQ() - Expects that strings @left and @right are not equal. + * KUNIT_ASSERT_STRNEQ() - An assertion that strings @left and @right are not equal. * @test: The test context object. * @left: an arbitrary expression that evaluates to a null terminated string. * @right: an arbitrary expression that evaluates to a null terminated string. * - * Sets an expectation that the values that @left and @right evaluate to are + * Sets an assertion that the values that @left and @right evaluate to are * not equal. This is semantically equivalent to * KUNIT_ASSERT_TRUE(@test, strcmp((@left), (@right))). See KUNIT_ASSERT_TRUE() * for more information. -- cgit v1.2.3 From 7d4087b013895524438f2522367c984f776e09e3 Mon Sep 17 00:00:00 2001 From: Eric Chan Date: Thu, 11 Jul 2024 19:39:31 +0000 Subject: kunit: Rename KUNIT_ASSERT_FAILURE to KUNIT_FAIL_AND_ABORT for readability Both KUNIT_FAIL and KUNIT_ASSERT_FAILURE defined to KUNIT_FAIL_ASSERTION with different tpye of kunit_assert_type. The current naming of KUNIT_ASSERT_FAILURE and KUNIT_FAIL_ASSERTION is confusing due to their similarities. To improve readability and symmetry, renames KUNIT_ASSERT_FAILURE to KUNIT_FAIL_AND_ABORT. Makes the naming consistent, with KUNIT_FAIL and KUNIT_FAIL_AND_ABORT being symmetrical. Additionally, an explanation for KUNIT_FAIL_AND_ABORT has been added to clarify its usage. Signed-off-by: Eric Chan Reviewed-by: David Gow Signed-off-by: Shuah Khan --- include/kunit/assert.h | 2 +- include/kunit/test.h | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/kunit/assert.h b/include/kunit/assert.h index 7e7490a74b13..bb879389f11d 100644 --- a/include/kunit/assert.h +++ b/include/kunit/assert.h @@ -60,7 +60,7 @@ void kunit_assert_prologue(const struct kunit_loc *loc, * struct kunit_fail_assert - Represents a plain fail expectation/assertion. * @assert: The parent of this type. * - * Represents a simple KUNIT_FAIL/KUNIT_ASSERT_FAILURE that always fails. + * Represents a simple KUNIT_FAIL/KUNIT_FAIL_AND_ABORT that always fails. */ struct kunit_fail_assert { struct kunit_assert assert; diff --git a/include/kunit/test.h b/include/kunit/test.h index e37df1df5a92..fb58fa530c5e 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -1228,7 +1228,18 @@ do { \ fmt, \ ##__VA_ARGS__) -#define KUNIT_ASSERT_FAILURE(test, fmt, ...) \ +/** + * KUNIT_FAIL_AND_ABORT() - Always causes a test to fail and abort when evaluated. + * @test: The test context object. + * @fmt: an informational message to be printed when the assertion is made. + * @...: string format arguments. + * + * The opposite of KUNIT_SUCCEED(), it is an assertion that always fails. In + * other words, it always results in a failed assertion, and consequently + * always causes the test case to fail and abort when evaluated. + * See KUNIT_ASSERT_TRUE() for more information. + */ +#define KUNIT_FAIL_AND_ABORT(test, fmt, ...) \ KUNIT_FAIL_ASSERTION(test, KUNIT_ASSERTION, fmt, ##__VA_ARGS__) /** -- cgit v1.2.3 From ebf51e460e488511d9ee60b07d00dac68883facf Mon Sep 17 00:00:00 2001 From: Eric Chan Date: Thu, 11 Jul 2024 19:39:45 +0000 Subject: kunit: Introduce KUNIT_ASSERT_MEMEQ and KUNIT_ASSERT_MEMNEQ macros Introduces KUNIT_ASSERT_MEMEQ and KUNIT_ASSERT_MEMNEQ macros to provide assert-type equivalents for memory comparison. While KUNIT_EXPECT_MEMEQ and KUNIT_EXPECT_MEMNEQ are available for expectations, the addition of these new macros ensures that assertions can also be used for memory comparisons, enhancing the consistency and completeness of the kunit framework. Signed-off-by: Eric Chan Reviewed-by: David Gow Signed-off-by: Shuah Khan --- include/kunit/test.h | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'include') diff --git a/include/kunit/test.h b/include/kunit/test.h index fb58fa530c5e..e2a1f0928e8b 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -1486,6 +1486,60 @@ do { \ fmt, \ ##__VA_ARGS__) +/** + * KUNIT_ASSERT_MEMEQ() - Asserts that the first @size bytes of @left and @right are equal. + * @test: The test context object. + * @left: An arbitrary expression that evaluates to the specified size. + * @right: An arbitrary expression that evaluates to the specified size. + * @size: Number of bytes compared. + * + * Sets an assertion that the values that @left and @right evaluate to are + * equal. This is semantically equivalent to + * KUNIT_ASSERT_TRUE(@test, !memcmp((@left), (@right), (@size))). See + * KUNIT_ASSERT_TRUE() for more information. + * + * Although this assertion works for any memory block, it is not recommended + * for comparing more structured data, such as structs. This assertion is + * recommended for comparing, for example, data arrays. + */ +#define KUNIT_ASSERT_MEMEQ(test, left, right, size) \ + KUNIT_ASSERT_MEMEQ_MSG(test, left, right, size, NULL) + +#define KUNIT_ASSERT_MEMEQ_MSG(test, left, right, size, fmt, ...) \ + KUNIT_MEM_ASSERTION(test, \ + KUNIT_ASSERTION, \ + left, ==, right, \ + size, \ + fmt, \ + ##__VA_ARGS__) + +/** + * KUNIT_ASSERT_MEMNEQ() - Asserts that the first @size bytes of @left and @right are not equal. + * @test: The test context object. + * @left: An arbitrary expression that evaluates to the specified size. + * @right: An arbitrary expression that evaluates to the specified size. + * @size: Number of bytes compared. + * + * Sets an assertion that the values that @left and @right evaluate to are + * not equal. This is semantically equivalent to + * KUNIT_ASSERT_TRUE(@test, memcmp((@left), (@right), (@size))). See + * KUNIT_ASSERT_TRUE() for more information. + * + * Although this assertion works for any memory block, it is not recommended + * for comparing more structured data, such as structs. This assertion is + * recommended for comparing, for example, data arrays. + */ +#define KUNIT_ASSERT_MEMNEQ(test, left, right, size) \ + KUNIT_ASSERT_MEMNEQ_MSG(test, left, right, size, NULL) + +#define KUNIT_ASSERT_MEMNEQ_MSG(test, left, right, size, fmt, ...) \ + KUNIT_MEM_ASSERTION(test, \ + KUNIT_ASSERTION, \ + left, !=, right, \ + size, \ + fmt, \ + ##__VA_ARGS__) + /** * KUNIT_ASSERT_NULL() - Asserts that pointers @ptr is null. * @test: The test context object. -- cgit v1.2.3 From 617069741dfbb141bc4574531a5dbbbad8ddf197 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 10 Jul 2024 20:53:12 +0200 Subject: dm: introduce the target flag mempool_needs_integrity This commit introduces the dm target flag mempool_needs_integrity. When the flag is set, device mapper will call bioset_integrity_create on it's bio sets. The target can then call bio_integrity_alloc on the bios allocated from the table's mempool. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 384649a61bfa..1363f87fbba6 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -405,6 +405,12 @@ struct dm_target { * support it. */ bool flush_bypasses_map:1; + + /* + * Set if the target calls bio_integrity_alloc on bios received + * in the map method. + */ + bool mempool_needs_integrity:1; }; void *dm_per_bio_data(struct bio *bio, size_t data_size); -- cgit v1.2.3 From 6a26f9c68901797261bc145975a02f85be0c1d8f Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Thu, 11 Jul 2024 10:14:57 +0000 Subject: cgroup/misc: Introduce misc.events.local Currently the event counting provided by misc.events is hierarchical, it's not practical if user is only concerned with events of a specified cgroup. Therefore, introduce misc.events.local collect events specific to the given cgroup. This is analogous to memory.events.local and pids.events.local. Signed-off-by: Xiu Jianfeng Signed-off-by: Tejun Heo --- include/linux/misc_cgroup.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h index 618392d41975..49eef10c8e59 100644 --- a/include/linux/misc_cgroup.h +++ b/include/linux/misc_cgroup.h @@ -40,6 +40,7 @@ struct misc_res { atomic64_t watermark; atomic64_t usage; atomic64_t events; + atomic64_t events_local; }; /** @@ -53,6 +54,8 @@ struct misc_cg { /* misc.events */ struct cgroup_file events_file; + /* misc.events.local */ + struct cgroup_file events_local_file; struct misc_res res[MISC_CG_RES_TYPES]; }; -- cgit v1.2.3 From f7866c35873377313ff94398f17d425b28b71de1 Mon Sep 17 00:00:00 2001 From: Tengda Wu Date: Thu, 11 Jul 2024 22:58:18 +0800 Subject: bpf: Fix null pointer dereference in resolve_prog_type() for BPF_PROG_TYPE_EXT When loading a EXT program without specifying `attr->attach_prog_fd`, the `prog->aux->dst_prog` will be null. At this time, calling resolve_prog_type() anywhere will result in a null pointer dereference. Example stack trace: [ 8.107863] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000004 [ 8.108262] Mem abort info: [ 8.108384] ESR = 0x0000000096000004 [ 8.108547] EC = 0x25: DABT (current EL), IL = 32 bits [ 8.108722] SET = 0, FnV = 0 [ 8.108827] EA = 0, S1PTW = 0 [ 8.108939] FSC = 0x04: level 0 translation fault [ 8.109102] Data abort info: [ 8.109203] ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 [ 8.109399] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 8.109614] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 8.109836] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000101354000 [ 8.110011] [0000000000000004] pgd=0000000000000000, p4d=0000000000000000 [ 8.112624] Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP [ 8.112783] Modules linked in: [ 8.113120] CPU: 0 PID: 99 Comm: may_access_dire Not tainted 6.10.0-rc3-next-20240613-dirty #1 [ 8.113230] Hardware name: linux,dummy-virt (DT) [ 8.113390] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 8.113429] pc : may_access_direct_pkt_data+0x24/0xa0 [ 8.113746] lr : add_subprog_and_kfunc+0x634/0x8e8 [ 8.113798] sp : ffff80008283b9f0 [ 8.113813] x29: ffff80008283b9f0 x28: ffff800082795048 x27: 0000000000000001 [ 8.113881] x26: ffff0000c0bb2600 x25: 0000000000000000 x24: 0000000000000000 [ 8.113897] x23: ffff0000c1134000 x22: 000000000001864f x21: ffff0000c1138000 [ 8.113912] x20: 0000000000000001 x19: ffff0000c12b8000 x18: ffffffffffffffff [ 8.113929] x17: 0000000000000000 x16: 0000000000000000 x15: 0720072007200720 [ 8.113944] x14: 0720072007200720 x13: 0720072007200720 x12: 0720072007200720 [ 8.113958] x11: 0720072007200720 x10: 0000000000f9fca4 x9 : ffff80008021f4e4 [ 8.113991] x8 : 0101010101010101 x7 : 746f72705f6d656d x6 : 000000001e0e0f5f [ 8.114006] x5 : 000000000001864f x4 : ffff0000c12b8000 x3 : 000000000000001c [ 8.114020] x2 : 0000000000000002 x1 : 0000000000000000 x0 : 0000000000000000 [ 8.114126] Call trace: [ 8.114159] may_access_direct_pkt_data+0x24/0xa0 [ 8.114202] bpf_check+0x3bc/0x28c0 [ 8.114214] bpf_prog_load+0x658/0xa58 [ 8.114227] __sys_bpf+0xc50/0x2250 [ 8.114240] __arm64_sys_bpf+0x28/0x40 [ 8.114254] invoke_syscall.constprop.0+0x54/0xf0 [ 8.114273] do_el0_svc+0x4c/0xd8 [ 8.114289] el0_svc+0x3c/0x140 [ 8.114305] el0t_64_sync_handler+0x134/0x150 [ 8.114331] el0t_64_sync+0x168/0x170 [ 8.114477] Code: 7100707f 54000081 f9401c00 f9403800 (b9400403) [ 8.118672] ---[ end trace 0000000000000000 ]--- One way to fix it is by forcing `attach_prog_fd` non-empty when bpf_prog_load(). But this will lead to `libbpf_probe_bpf_prog_type` API broken which use verifier log to probe prog type and will log nothing if we reject invalid EXT prog before bpf_check(). Another way is by adding null check in resolve_prog_type(). The issue was introduced by commit 4a9c7bbe2ed4 ("bpf: Resolve to prog->aux->dst_prog->type only for BPF_PROG_TYPE_EXT") which wanted to correct type resolution for BPF_PROG_TYPE_TRACING programs. Before that, the type resolution of BPF_PROG_TYPE_EXT prog actually follows the logic below: prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type; It implies that when EXT program is not yet attached to `dst_prog`, the prog type should be EXT itself. This code worked fine in the past. So just keep using it. Fix this by returning `prog->type` for BPF_PROG_TYPE_EXT if `dst_prog` is not present in resolve_prog_type(). Fixes: 4a9c7bbe2ed4 ("bpf: Resolve to prog->aux->dst_prog->type only for BPF_PROG_TYPE_EXT") Signed-off-by: Tengda Wu Signed-off-by: Daniel Borkmann Acked-by: Daniel Borkmann Cc: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20240711145819.254178-2-wutengda@huaweicloud.com --- include/linux/bpf_verifier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e98ba5a5cf79..6503c85b10a3 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -856,7 +856,7 @@ static inline u32 type_flag(u32 type) /* only use after check_attach_btf_id() */ static inline enum bpf_prog_type resolve_prog_type(const struct bpf_prog *prog) { - return prog->type == BPF_PROG_TYPE_EXT ? + return (prog->type == BPF_PROG_TYPE_EXT && prog->aux->dst_prog) ? prog->aux->dst_prog->type : prog->type; } -- cgit v1.2.3 From 6cc040542ba7b2c60e5119cd04d841fcf048c872 Mon Sep 17 00:00:00 2001 From: Vivek Kasireddy Date: Sun, 23 Jun 2024 23:36:09 -0700 Subject: mm/gup: introduce unpin_folio/unpin_folios helpers Patch series "mm/gup: Introduce memfd_pin_folios() for pinning memfd folios", v16. Currently, some drivers (e.g, Udmabuf) that want to longterm-pin the pages/folios associated with a memfd, do so by simply taking a reference on them. This is not desirable because the pages/folios may reside in Movable zone or CMA block. Therefore, having drivers use memfd_pin_folios() API ensures that the folios are appropriately pinned via FOLL_PIN for longterm DMA. This patchset also introduces a few helpers and converts the Udmabuf driver to use folios and memfd_pin_folios() API to longterm-pin the folios for DMA. Two new Udmabuf selftests are also included to test the driver and the new API. This patch (of 9): These helpers are the folio versions of unpin_user_page/unpin_user_pages. They are currently only useful for unpinning folios pinned by memfd_pin_folios() or other associated routines. However, they could find new uses in the future, when more and more folio-only helpers are added to GUP. We should probably sanity check the folio as part of unpin similar to how it is done in unpin_user_page/unpin_user_pages but we cannot cleanly do that at the moment without also checking the subpage. Therefore, sanity checking needs to be added to these routines once we have a way to determine if any given folio is anon-exclusive (via a per folio AnonExclusive flag). Link: https://lkml.kernel.org/r/20240624063952.1572359-1-vivek.kasireddy@intel.com Link: https://lkml.kernel.org/r/20240624063952.1572359-2-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy Suggested-by: David Hildenbrand Reviewed-by: David Hildenbrand Acked-by: Dave Airlie Acked-by: Gerd Hoffmann Cc: Matthew Wilcox Cc: Christoph Hellwig Cc: Jason Gunthorpe Cc: Peter Xu Cc: Christoph Hellwig Cc: Daniel Vetter Cc: Dongwon Kim Cc: Hugh Dickins Cc: Junxiao Chang Cc: Oscar Salvador Cc: Arnd Bergmann Cc: Christoph Hellwig Cc: Mike Kravetz Cc: Shuah Khan Signed-off-by: Andrew Morton --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index a2e3ebead410..7b84379e3a1b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1584,11 +1584,13 @@ static inline void put_page(struct page *page) #define GUP_PIN_COUNTING_BIAS (1U << 10) void unpin_user_page(struct page *page); +void unpin_folio(struct folio *folio); void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, bool make_dirty); void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages, bool make_dirty); void unpin_user_pages(struct page **pages, unsigned long npages); +void unpin_folios(struct folio **folios, unsigned long nfolios); static inline bool is_cow_mapping(vm_flags_t flags) { -- cgit v1.2.3 From 89c1905d9c140372b7f50ef48f42378cf85d9bc5 Mon Sep 17 00:00:00 2001 From: Vivek Kasireddy Date: Sun, 23 Jun 2024 23:36:11 -0700 Subject: mm/gup: introduce memfd_pin_folios() for pinning memfd folios For drivers that would like to longterm-pin the folios associated with a memfd, the memfd_pin_folios() API provides an option to not only pin the folios via FOLL_PIN but also to check and migrate them if they reside in movable zone or CMA block. This API currently works with memfds but it should work with any files that belong to either shmemfs or hugetlbfs. Files belonging to other filesystems are rejected for now. The folios need to be located first before pinning them via FOLL_PIN. If they are found in the page cache, they can be immediately pinned. Otherwise, they need to be allocated using the filesystem specific APIs and then pinned. [akpm@linux-foundation.org: improve the CONFIG_MMU=n situation, per SeongJae] [vivek.kasireddy@intel.com: return -EINVAL if the end offset is greater than the size of memfd] Link: https://lkml.kernel.org/r/IA0PR11MB71850525CBC7D541CAB45DF1F8DB2@IA0PR11MB7185.namprd11.prod.outlook.com Link: https://lkml.kernel.org/r/20240624063952.1572359-4-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe (v2) Reviewed-by: David Hildenbrand (v3) Reviewed-by: Christoph Hellwig (v6) Acked-by: Dave Airlie Acked-by: Gerd Hoffmann Cc: Matthew Wilcox (Oracle) Cc: Daniel Vetter Cc: Hugh Dickins Cc: Peter Xu Cc: Dongwon Kim Cc: Junxiao Chang Cc: Arnd Bergmann Cc: Christoph Hellwig Cc: Mike Kravetz Cc: Oscar Salvador Cc: Shuah Khan Signed-off-by: Andrew Morton --- include/linux/memfd.h | 5 +++++ include/linux/mm.h | 3 +++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/memfd.h b/include/linux/memfd.h index e7abf6fa4c52..3f2cf339ceaf 100644 --- a/include/linux/memfd.h +++ b/include/linux/memfd.h @@ -6,11 +6,16 @@ #ifdef CONFIG_MEMFD_CREATE extern long memfd_fcntl(struct file *file, unsigned int cmd, unsigned int arg); +struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx); #else static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned int a) { return -EINVAL; } +static inline struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx) +{ + return ERR_PTR(-EINVAL); +} #endif #endif /* __LINUX_MEMFD_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 7b84379e3a1b..5f1075d19600 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2500,6 +2500,9 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags); long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags); +long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end, + struct folio **folios, unsigned int max_folios, + pgoff_t *offset); int get_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages); -- cgit v1.2.3 From ed5d583a88a9207b866c14ba834984c6f3c51d23 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 27 Jun 2024 10:08:54 -0700 Subject: fs/procfs: implement efficient VMA querying API for /proc//maps /proc//maps file is extremely useful in practice for various tasks involving figuring out process memory layout, what files are backing any given memory range, etc. One important class of applications that absolutely rely on this are profilers/stack symbolizers (perf tool being one of them). Patterns of use differ, but they generally would fall into two categories. In on-demand pattern, a profiler/symbolizer would normally capture stack trace containing absolute memory addresses of some functions, and would then use /proc//maps file to find corresponding backing ELF files (normally, only executable VMAs are of interest), file offsets within them, and then continue from there to get yet more information (ELF symbols, DWARF information) to get human-readable symbolic information. This pattern is used by Meta's fleet-wide profiler, as one example. In preprocessing pattern, application doesn't know the set of addresses of interest, so it has to fetch all relevant VMAs (again, probably only executable ones), store or cache them, then proceed with profiling and stack trace capture. Once done, it would do symbolization based on stored VMA information. This can happen at much later point in time. This patterns is used by perf tool, as an example. In either case, there are both performance and correctness requirement involved. This address to VMA information translation has to be done as efficiently as possible, but also not miss any VMA (especially in the case of loading/unloading shared libraries). In practice, correctness can't be guaranteed (due to process dying before VMA data can be captured, or shared library being unloaded, etc), but any effort to maximize the chance of finding the VMA is appreciated. Unfortunately, for all the /proc//maps file universality and usefulness, it doesn't fit the above use cases 100%. First, it's main purpose is to emit all VMAs sequentially, but in practice captured addresses would fall only into a smaller subset of all process' VMAs, mainly containing executable text. Yet, library would need to parse most or all of the contents to find needed VMAs, as there is no way to skip VMAs that are of no use. Efficient library can do the linear pass and it is still relatively efficient, but it's definitely an overhead that can be avoided, if there was a way to do more targeted querying of the relevant VMA information. Second, it's a text based interface, which makes its programmatic use from applications and libraries more cumbersome and inefficient due to the need to handle text parsing to get necessary pieces of information. The overhead is actually payed both by kernel, formatting originally binary VMA data into text, and then by user space application, parsing it back into binary data for further use. For the on-demand pattern of usage, described above, another problem when writing generic stack trace symbolization library is an unfortunate performance-vs-correctness tradeoff that needs to be made. Library has to make a decision to either cache parsed contents of /proc//maps (after initial processing) to service future requests (if application requests to symbolize another set of addresses (for the same process), captured at some later time, which is typical for periodic/continuous profiling cases) to avoid higher costs of re-parsing this file. Or it has to choose to cache the contents in memory to speed up future requests. In the former case, more memory is used for the cache and there is a risk of getting stale data if application loads or unloads shared libraries, or otherwise changed its set of VMAs somehow, e.g., through additional mmap() calls. In the latter case, it's the performance hit that comes from re-opening the file and re-parsing its contents all over again. This patch aims to solve this problem by providing a new API built on top of /proc//maps. It's meant to address both non-selectiveness and text nature of /proc//maps, by giving user more control of what sort of VMA(s) needs to be queried, and being binary-based interface eliminates the overhead of text formatting (on kernel side) and parsing (on user space side). It's also designed to be extensible and forward/backward compatible by including required struct size field, which user has to provide. We use established copy_struct_from_user() approach to handle extensibility. User has a choice to pick either getting VMA that covers provided address or -ENOENT if none is found (exact, least surprising, case). Or, with an extra query flag (PROCMAP_QUERY_COVERING_OR_NEXT_VMA), they can get either VMA that covers the address (if there is one), or the closest next VMA (i.e., VMA with the smallest vm_start > addr). The latter allows more efficient use, but, given it could be a surprising behavior, requires an explicit opt-in. There is another query flag that is useful for some use cases. PROCMAP_QUERY_FILE_BACKED_VMA instructs this API to only return file-backed VMAs. Combining this with PROCMAP_QUERY_COVERING_OR_NEXT_VMA makes it possible to efficiently iterate only file-backed VMAs of the process, which is what profilers/symbolizers are normally interested in. All the above querying flags can be combined with (also optional) set of desired VMA permissions flags. This allows to, for example, iterate only an executable subset of VMAs, which is what preprocessing pattern, used by perf tool, would benefit from, as the assumption is that captured stack traces would have addresses of executable code. This saves time by skipping non-executable VMAs altogether efficienty. All these querying flags (modifiers) are orthogonal and can be combined in a semantically meaningful and natural way. Basing this ioctl()-based API on top of /proc//maps's FD makes sense given it's querying the same set of VMA data. It's also benefitial because permission checks for /proc//maps is performed at open time once, and the actual data read of text contents of /proc//maps is done without further permission checks. We piggyback on this pattern with ioctl()-based API as well, as that's a desired property. Both for performance reasons, but also for security and flexibility reasons. Allowing application to open an FD for /proc/self/maps without any extra capabilities, and then passing it to some sort of profiling agent through Unix-domain socket, would allow such profiling agent to not require some of the capabilities that are otherwise expected when opening /proc//maps file for *another* process. This is a desirable property for some more restricted setups. This new ioctl-based implementation doesn't interfere with seq_file-based implementation of /proc//maps textual interface, and so could be used together or independently without paying any price for that. Note also, that fetching VMA name (e.g., backing file path, or special hard-coded or user-provided names) is optional just like build ID. If user sets vma_name_size to zero, kernel code won't attempt to retrieve it, saving resources. Earlier versions of this patch set were adding per-VMA locking, which is why we have a code structure that is ready for abstracting mmap_lock vs vm_lock differences (query_vma_setup(), query_vma_teardown(), and query_vma_find_by_addr()), but given anon_vma_name() is not yet compatible with per-VMA locking, initial implementation sticks to using only mmap_lock for now. It will be easy to add back per-VMA locking once all the pieces are ready later on. Which is why we keep existing code structure with setup/teardown/query helper functions. [andrii@kernel.org: improve PROCMAP_QUERY's compat mode handling] Link: https://lkml.kernel.org/r/20240701174805.1897344-2-andrii@kernel.org Link: https://lkml.kernel.org/r/20240627170900.1672542-3-andrii@kernel.org Signed-off-by: Andrii Nakryiko Acked-by: Liam R. Howlett Cc: Alexey Dobriyan Cc: Al Viro Cc: Christian Brauner Cc: Greg Kroah-Hartman Cc: Mike Rapoport (IBM) Cc: Suren Baghdasaryan Cc: Andi Kleen Cc: Arnd Bergmann Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/uapi/linux/fs.h | 130 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 129 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 45e4e64fd664..5d440f9b5d92 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -333,8 +333,10 @@ typedef int __bitwise __kernel_rwf_t; #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ RWF_APPEND | RWF_NOAPPEND) +#define PROCFS_IOCTL_MAGIC 'f' + /* Pagemap ioctl */ -#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg) +#define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg) /* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */ #define PAGE_IS_WPALLOWED (1 << 0) @@ -393,4 +395,130 @@ struct pm_scan_arg { __u64 return_mask; }; +/* /proc//maps ioctl */ +#define PROCMAP_QUERY _IOWR(PROCFS_IOCTL_MAGIC, 17, struct procmap_query) + +enum procmap_query_flags { + /* + * VMA permission flags. + * + * Can be used as part of procmap_query.query_flags field to look up + * only VMAs satisfying specified subset of permissions. E.g., specifying + * PROCMAP_QUERY_VMA_READABLE only will return both readable and read/write VMAs, + * while having PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_WRITABLE will only + * return read/write VMAs, though both executable/non-executable and + * private/shared will be ignored. + * + * PROCMAP_QUERY_VMA_* flags are also returned in procmap_query.vma_flags + * field to specify actual VMA permissions. + */ + PROCMAP_QUERY_VMA_READABLE = 0x01, + PROCMAP_QUERY_VMA_WRITABLE = 0x02, + PROCMAP_QUERY_VMA_EXECUTABLE = 0x04, + PROCMAP_QUERY_VMA_SHARED = 0x08, + /* + * Query modifier flags. + * + * By default VMA that covers provided address is returned, or -ENOENT + * is returned. With PROCMAP_QUERY_COVERING_OR_NEXT_VMA flag set, closest + * VMA with vma_start > addr will be returned if no covering VMA is + * found. + * + * PROCMAP_QUERY_FILE_BACKED_VMA instructs query to consider only VMAs that + * have file backing. Can be combined with PROCMAP_QUERY_COVERING_OR_NEXT_VMA + * to iterate all VMAs with file backing. + */ + PROCMAP_QUERY_COVERING_OR_NEXT_VMA = 0x10, + PROCMAP_QUERY_FILE_BACKED_VMA = 0x20, +}; + +/* + * Input/output argument structured passed into ioctl() call. It can be used + * to query a set of VMAs (Virtual Memory Areas) of a process. + * + * Each field can be one of three kinds, marked in a short comment to the + * right of the field: + * - "in", input argument, user has to provide this value, kernel doesn't modify it; + * - "out", output argument, kernel sets this field with VMA data; + * - "in/out", input and output argument; user provides initial value (used + * to specify maximum allowable buffer size), and kernel sets it to actual + * amount of data written (or zero, if there is no data). + * + * If matching VMA is found (according to criterias specified by + * query_addr/query_flags, all the out fields are filled out, and ioctl() + * returns 0. If there is no matching VMA, -ENOENT will be returned. + * In case of any other error, negative error code other than -ENOENT is + * returned. + * + * Most of the data is similar to the one returned as text in /proc//maps + * file, but procmap_query provides more querying flexibility. There are no + * consistency guarantees between subsequent ioctl() calls, but data returned + * for matched VMA is self-consistent. + */ +struct procmap_query { + /* Query struct size, for backwards/forward compatibility */ + __u64 size; + /* + * Query flags, a combination of enum procmap_query_flags values. + * Defines query filtering and behavior, see enum procmap_query_flags. + * + * Input argument, provided by user. Kernel doesn't modify it. + */ + __u64 query_flags; /* in */ + /* + * Query address. By default, VMA that covers this address will + * be looked up. PROCMAP_QUERY_* flags above modify this default + * behavior further. + * + * Input argument, provided by user. Kernel doesn't modify it. + */ + __u64 query_addr; /* in */ + /* VMA starting (inclusive) and ending (exclusive) address, if VMA is found. */ + __u64 vma_start; /* out */ + __u64 vma_end; /* out */ + /* VMA permissions flags. A combination of PROCMAP_QUERY_VMA_* flags. */ + __u64 vma_flags; /* out */ + /* VMA backing page size granularity. */ + __u64 vma_page_size; /* out */ + /* + * VMA file offset. If VMA has file backing, this specifies offset + * within the file that VMA's start address corresponds to. + * Is set to zero if VMA has no backing file. + */ + __u64 vma_offset; /* out */ + /* Backing file's inode number, or zero, if VMA has no backing file. */ + __u64 inode; /* out */ + /* Backing file's device major/minor number, or zero, if VMA has no backing file. */ + __u32 dev_major; /* out */ + __u32 dev_minor; /* out */ + /* + * If set to non-zero value, signals the request to return VMA name + * (i.e., VMA's backing file's absolute path, with " (deleted)" suffix + * appended, if file was unlinked from FS) for matched VMA. VMA name + * can also be some special name (e.g., "[heap]", "[stack]") or could + * be even user-supplied with prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME). + * + * Kernel will set this field to zero, if VMA has no associated name. + * Otherwise kernel will return actual amount of bytes filled in + * user-supplied buffer (see vma_name_addr field below), including the + * terminating zero. + * + * If VMA name is longer that user-supplied maximum buffer size, + * -E2BIG error is returned. + * + * If this field is set to non-zero value, vma_name_addr should point + * to valid user space memory buffer of at least vma_name_size bytes. + * If set to zero, vma_name_addr should be set to zero as well + */ + __u32 vma_name_size; /* in/out */ + /* + * User-supplied address of a buffer of at least vma_name_size bytes + * for kernel to fill with matched VMA's name (see vma_name_size field + * description above for details). + * + * Should be set to zero if VMA name should not be returned. + */ + __u64 vma_name_addr; /* in */ +}; + #endif /* _UAPI_LINUX_FS_H */ -- cgit v1.2.3 From bfc69fd05ef9b6416f4811aafafb7f2b34daa000 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 27 Jun 2024 10:08:55 -0700 Subject: fs/procfs: add build ID fetching to PROCMAP_QUERY API The need to get ELF build ID reliably is an important aspect when dealing with profiling and stack trace symbolization, and /proc//maps textual representation doesn't help with this. To get backing file's ELF build ID, application has to first resolve VMA, then use it's start/end address range to follow a special /proc//map_files/- symlink to open the ELF file (this is necessary because backing file might have been removed from the disk or was already replaced with another binary in the same file path. Such approach, beyond just adding complexity of having to do a bunch of extra work, has extra security implications. Because application opens underlying ELF file and needs read access to its entire contents (as far as kernel is concerned), kernel puts additional capable() checks on following /proc//map_files/- symlink. And that makes sense in general. But in the case of build ID, profiler/symbolizer doesn't need the contents of ELF file, per se. It's only build ID that is of interest, and ELF build ID itself doesn't provide any sensitive information. So this patch adds a way to request backing file's ELF build ID along the rest of VMA information in the same API. User has control over whether this piece of information is requested or not by either setting build_id_size field to zero or non-zero maximum buffer size they provided through build_id_addr field (which encodes user pointer as __u64 field). This is a completely optional piece of information, and so has no performance implications for user cases that don't care about build ID, while improving performance and simplifying the setup for those application that do need it. Kernel already implements build ID fetching, which is used from BPF subsystem. We are reusing this code here, but plan a follow up changes to make it work better under more relaxed assumption (compared to what existing code assumes) of being called from user process context, in which page faults are allowed. BPF-specific implementation currently bails out if necessary part of ELF file is not paged in, all due to extra BPF-specific restrictions (like the need to fetch build ID in restrictive contexts such as NMI handler). [andrii@kernel.org: fix integer to pointer cast warning in do_procmap_query()] Link: https://lkml.kernel.org/r/20240701174805.1897344-1-andrii@kernel.org Link: https://lkml.kernel.org/r/20240627170900.1672542-4-andrii@kernel.org Signed-off-by: Andrii Nakryiko Acked-by: Liam R. Howlett Cc: Alexey Dobriyan Cc: Al Viro Cc: Christian Brauner Cc: Greg Kroah-Hartman Cc: Mike Rapoport (IBM) Cc: Suren Baghdasaryan Cc: Andi Kleen Cc: Arnd Bergmann Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/uapi/linux/fs.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 5d440f9b5d92..2a4a5f50c98e 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -511,6 +511,26 @@ struct procmap_query { * If set to zero, vma_name_addr should be set to zero as well */ __u32 vma_name_size; /* in/out */ + /* + * If set to non-zero value, signals the request to extract and return + * VMA's backing file's build ID, if the backing file is an ELF file + * and it contains embedded build ID. + * + * Kernel will set this field to zero, if VMA has no backing file, + * backing file is not an ELF file, or ELF file has no build ID + * embedded. + * + * Build ID is a binary value (not a string). Kernel will set + * build_id_size field to exact number of bytes used for build ID. + * If build ID is requested and present, but needs more bytes than + * user-supplied maximum buffer size (see build_id_addr field below), + * -E2BIG error will be returned. + * + * If this field is set to non-zero value, build_id_addr should point + * to valid user space memory buffer of at least build_id_size bytes. + * If set to zero, build_id_addr should be set to zero as well + */ + __u32 build_id_size; /* in/out */ /* * User-supplied address of a buffer of at least vma_name_size bytes * for kernel to fill with matched VMA's name (see vma_name_size field @@ -519,6 +539,14 @@ struct procmap_query { * Should be set to zero if VMA name should not be returned. */ __u64 vma_name_addr; /* in */ + /* + * User-supplied address of a buffer of at least build_id_size bytes + * for kernel to fill with matched VMA's ELF build ID, if available + * (see build_id_size field description above for details). + * + * Should be set to zero if build ID should not be returned. + */ + __u64 build_id_addr; /* in */ }; #endif /* _UAPI_LINUX_FS_H */ -- cgit v1.2.3 From f216c845f3c772e54d27fe209fd300b10e7bf54a Mon Sep 17 00:00:00 2001 From: Lance Yang Date: Fri, 28 Jun 2024 21:07:49 +0800 Subject: mm: add per-order mTHP split counters Patch series "mm: introduce per-order mTHP split counters", v3. At present, the split counters in THP statistics no longer include PTE-mapped mTHP. Therefore, we want to introduce per-order mTHP split counters to monitor the frequency of mTHP splits. This will assist developers in better analyzing and optimizing system performance. /sys/kernel/mm/transparent_hugepage/hugepages-/stats split split_failed split_deferred This patch (of 2): Currently, the split counters in THP statistics no longer include PTE-mapped mTHP. Therefore, we propose introducing per-order mTHP split counters to monitor the frequency of mTHP splits. This will help developers better analyze and optimize system performance. /sys/kernel/mm/transparent_hugepage/hugepages-/stats split split_failed split_deferred [ioworker0@gmail.com: make things more readable, per Barry and Baolin] Link: https://lkml.kernel.org/r/20240704012905.42971-2-ioworker0@gmail.com [ioworker0@gmail.com: use == for `order' test, per David] Link: https://lkml.kernel.org/r/20240705113119.82210-1-ioworker0@gmail.com Link: https://lkml.kernel.org/r/20240704012905.42971-1-ioworker0@gmail.com Link: https://lkml.kernel.org/r/20240704012905.42971-2-ioworker0@gmail.com Link: https://lkml.kernel.org/r/20240628130750.73097-1-ioworker0@gmail.com Link: https://lkml.kernel.org/r/20240628130750.73097-2-ioworker0@gmail.com Signed-off-by: Mingzhe Yang Signed-off-by: Lance Yang Reviewed-by: Ryan Roberts Acked-by: Barry Song Reviewed-by: Baolin Wang Acked-by: David Hildenbrand Cc: Bang Li Cc: Yang Shi Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 212cca384d7e..cee3c5da8f0e 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -284,6 +284,9 @@ enum mthp_stat_item { MTHP_STAT_FILE_ALLOC, MTHP_STAT_FILE_FALLBACK, MTHP_STAT_FILE_FALLBACK_CHARGE, + MTHP_STAT_SPLIT, + MTHP_STAT_SPLIT_FAILED, + MTHP_STAT_SPLIT_DEFERRED, __MTHP_STAT_COUNT }; -- cgit v1.2.3 From 18d095b2556e5e1292003c8e9f5d845ed42ef89b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 2 Jul 2024 15:51:19 +0200 Subject: mm: define __pte_leaf_size() to also take a PMD entry On powerpc 8xx, when a page is 8M size, the information is in the PMD entry. So allow architectures to provide __pte_leaf_size() instead of pte_leaf_size() and provide the PMD entry to that function. When __pte_leaf_size() is not defined, define it as a pte_leaf_size() so that architectures not interested in the PMD arguments are not impacted. Only define a default pte_leaf_size() when __pte_leaf_size() is not defined to make sure nobody adds new calls to pte_leaf_size() in the core. Link: https://lkml.kernel.org/r/c7c008f0a314bf8029ad7288fdc908db1ec7e449.1719928057.git.christophe.leroy@csgroup.eu Signed-off-by: Christophe Leroy Reviewed-by: Oscar Salvador Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Peter Xu Signed-off-by: Andrew Morton --- include/linux/pgtable.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 2f32eaccf0b9..2a6a3cccfc36 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1907,9 +1907,12 @@ typedef unsigned int pgtbl_mod_mask; #ifndef pmd_leaf_size #define pmd_leaf_size(x) PMD_SIZE #endif +#ifndef __pte_leaf_size #ifndef pte_leaf_size #define pte_leaf_size(x) PAGE_SIZE #endif +#define __pte_leaf_size(x,y) pte_leaf_size(y) +#endif /* * We always define pmd_pfn for all archs as it's used in lots of generic -- cgit v1.2.3 From e6c0c03245b14d6c205814aee67128257d0bea84 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 2 Jul 2024 15:51:20 +0200 Subject: mm: provide mm_struct and address to huge_ptep_get() On powerpc 8xx huge_ptep_get() will need to know whether the given ptep is a PTE entry or a PMD entry. This cannot be known with the PMD entry itself because there is no easy way to know it from the content of the entry. So huge_ptep_get() will need to know either the size of the page or get the pmd. In order to be consistent with huge_ptep_get_and_clear(), give mm and address to huge_ptep_get(). Link: https://lkml.kernel.org/r/cc00c70dd384298796a4e1b25d6c4eb306d3af85.1719928057.git.christophe.leroy@csgroup.eu Signed-off-by: Christophe Leroy Reviewed-by: Oscar Salvador Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Peter Xu Signed-off-by: Andrew Morton --- include/asm-generic/hugetlb.h | 2 +- include/linux/swapops.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h index 6dcf4d576970..594d5905f615 100644 --- a/include/asm-generic/hugetlb.h +++ b/include/asm-generic/hugetlb.h @@ -144,7 +144,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, #endif #ifndef __HAVE_ARCH_HUGE_PTEP_GET -static inline pte_t huge_ptep_get(pte_t *ptep) +static inline pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { return ptep_get(ptep); } diff --git a/include/linux/swapops.h b/include/linux/swapops.h index a5c560a2f8c2..cb468e418ea1 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -334,7 +334,7 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry) extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address); -extern void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte); +extern void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, pte_t *pte); #else /* CONFIG_MIGRATION */ static inline swp_entry_t make_readable_migration_entry(pgoff_t offset) { @@ -359,7 +359,7 @@ static inline int is_migration_entry(swp_entry_t swp) static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { } static inline void migration_entry_wait_huge(struct vm_area_struct *vma, - pte_t *pte) { } + unsigned long addr, pte_t *pte) { } static inline int is_writable_migration_entry(swp_entry_t entry) { return 0; -- cgit v1.2.3 From 8268614b408be6b76c1cee6f67de7deb0d6593b3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 2 Jul 2024 15:51:35 +0200 Subject: mm: remove CONFIG_ARCH_HAS_HUGEPD powerpc was the only user of CONFIG_ARCH_HAS_HUGEPD and doesn't use it anymore, so remove all related code. Link: https://lkml.kernel.org/r/4b10c54c794780b955f3ad6c657d0199dd792146.1719928057.git.christophe.leroy@csgroup.eu Signed-off-by: Christophe Leroy Acked-by: Oscar Salvador Cc: Jason Gunthorpe Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Peter Xu Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index a951c0d06061..ecd0ceeea206 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -20,12 +20,6 @@ struct user_struct; struct mmu_gather; struct node; -#ifndef CONFIG_ARCH_HAS_HUGEPD -typedef struct { unsigned long pd; } hugepd_t; -#define is_hugepd(hugepd) (0) -#define __hugepd(x) ((hugepd_t) { (x) }) -#endif - void free_huge_folio(struct folio *folio); #ifdef CONFIG_HUGETLB_PAGE -- cgit v1.2.3 From 3b0ba54d5f8ff60553c01d3ec3c607ab7bb3b452 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 3 Jul 2024 10:42:25 -0700 Subject: mm: add comments for allocation helpers explaining why they are macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A number of allocation helper functions were converted into macros to account them at the call sites. Add a comment for each converted allocation helper explaining why it has to be a macro and why we typecast the return value wherever required. The patch also moves acpi_os_acquire_object() closer to other allocation helpers to group them together under the same comment. The patch has no functional changes. Link: https://lkml.kernel.org/r/20240703174225.3891393-1-surenb@google.com Fixes: 2c321f3f70bc ("mm: change inlined allocation helpers to account at the call site") Signed-off-by: Suren Baghdasaryan Suggested-by: Andrew Morton Cc: Christian König Cc: Christoph Hellwig Cc: Jan Kara Cc: Kent Overstreet Cc: Thorsten Blum Signed-off-by: Andrew Morton --- include/acpi/platform/aclinuxex.h | 9 ++++++--- include/linux/bpf.h | 4 ++++ include/linux/dma-fence-chain.h | 4 ++++ include/linux/hid_bpf.h | 5 +++++ include/linux/jbd2.h | 10 ++++++++++ include/linux/skbuff.h | 8 ++++++++ include/linux/skmsg.h | 5 +++++ 7 files changed, 42 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/acpi/platform/aclinuxex.h b/include/acpi/platform/aclinuxex.h index 62cac266a1c8..eeff40295b4b 100644 --- a/include/acpi/platform/aclinuxex.h +++ b/include/acpi/platform/aclinuxex.h @@ -46,6 +46,9 @@ acpi_status acpi_os_terminate(void); * Interrupts are off during resume, just like they are for boot. * However, boot has (system_state != SYSTEM_RUNNING) * to quiet __might_sleep() in kmalloc() and resume does not. + * + * These specialized allocators have to be macros for their allocations to be + * accounted separately (to have separate alloc_tag). */ #define acpi_os_allocate(_size) \ kmalloc(_size, irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL) @@ -53,14 +56,14 @@ acpi_status acpi_os_terminate(void); #define acpi_os_allocate_zeroed(_size) \ kzalloc(_size, irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL) +#define acpi_os_acquire_object(_cache) \ + kmem_cache_zalloc(_cache, irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL) + static inline void acpi_os_free(void *memory) { kfree(memory); } -#define acpi_os_acquire_object(_cache) \ - kmem_cache_zalloc(_cache, irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL) - static inline acpi_thread_id acpi_os_get_thread_id(void) { return (acpi_thread_id) (unsigned long)current; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b8637555c9c2..50b82ff4551a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2261,6 +2261,10 @@ void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, gfp_t flags); #else +/* + * These specialized allocators have to be macros for their allocations to be + * accounted separately (to have separate alloc_tag). + */ #define bpf_map_kmalloc_node(_map, _size, _flags, _node) \ kmalloc_node(_size, _flags, _node) #define bpf_map_kzalloc(_map, _size, _flags) \ diff --git a/include/linux/dma-fence-chain.h b/include/linux/dma-fence-chain.h index ad9e2506c2f4..68c3c1e41014 100644 --- a/include/linux/dma-fence-chain.h +++ b/include/linux/dma-fence-chain.h @@ -85,6 +85,10 @@ dma_fence_chain_contained(struct dma_fence *fence) * dma_fence_chain_alloc * * Returns a new struct dma_fence_chain object or NULL on failure. + * + * This specialized allocator has to be a macro for its allocations to be + * accounted separately (to have a separate alloc_tag). The typecast is + * intentional to enforce typesafety. */ #define dma_fence_chain_alloc() \ ((struct dma_fence_chain *)kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL)) diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index eec2592dec12..99a3edb6cf07 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -152,6 +152,11 @@ static inline int hid_bpf_connect_device(struct hid_device *hdev) { return 0; } static inline void hid_bpf_disconnect_device(struct hid_device *hdev) {} static inline void hid_bpf_destroy_device(struct hid_device *hid) {} static inline void hid_bpf_device_init(struct hid_device *hid) {} +/* + * This specialized allocator has to be a macro for its allocations to be + * accounted separately (to have a separate alloc_tag). The typecast is + * intentional to enforce typesafety. + */ #define call_hid_bpf_rdesc_fixup(_hdev, _rdesc, _size) \ ((u8 *)kmemdup(_rdesc, *(_size), GFP_KERNEL)) diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index ab04c1c27fae..a280f42c8c76 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1588,6 +1588,11 @@ void jbd2_journal_put_journal_head(struct journal_head *jh); */ extern struct kmem_cache *jbd2_handle_cache; +/* + * This specialized allocator has to be a macro for its allocations to be + * accounted separately (to have a separate alloc_tag). The typecast is + * intentional to enforce typesafety. + */ #define jbd2_alloc_handle(_gfp_flags) \ ((handle_t *)kmem_cache_zalloc(jbd2_handle_cache, _gfp_flags)) @@ -1602,6 +1607,11 @@ static inline void jbd2_free_handle(handle_t *handle) */ extern struct kmem_cache *jbd2_inode_cache; +/* + * This specialized allocator has to be a macro for its allocations to be + * accounted separately (to have a separate alloc_tag). The typecast is + * intentional to enforce typesafety. + */ #define jbd2_alloc_inode(_gfp_flags) \ ((struct jbd2_inode *)kmem_cache_alloc(jbd2_inode_cache, _gfp_flags)) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1c2902eaebd3..fd51f2de51da 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3400,6 +3400,10 @@ static inline struct page *__dev_alloc_pages_noprof(gfp_t gfp_mask, } #define __dev_alloc_pages(...) alloc_hooks(__dev_alloc_pages_noprof(__VA_ARGS__)) +/* + * This specialized allocator has to be a macro for its allocations to be + * accounted separately (to have a separate alloc_tag). + */ #define dev_alloc_pages(_order) __dev_alloc_pages(GFP_ATOMIC | __GFP_NOWARN, _order) /** @@ -3416,6 +3420,10 @@ static inline struct page *__dev_alloc_page_noprof(gfp_t gfp_mask) } #define __dev_alloc_page(...) alloc_hooks(__dev_alloc_page_noprof(__VA_ARGS__)) +/* + * This specialized allocator has to be a macro for its allocations to be + * accounted separately (to have a separate alloc_tag). + */ #define dev_alloc_page() dev_alloc_pages(0) /** diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index c9efda9df285..d9b03e0746e7 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -414,6 +414,11 @@ void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock); int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock, struct sk_msg *msg); +/* + * This specialized allocator has to be a macro for its allocations to be + * accounted separately (to have a separate alloc_tag). The typecast is + * intentional to enforce typesafety. + */ #define sk_psock_init_link() \ ((struct sk_psock_link *)kzalloc(sizeof(struct sk_psock_link), \ GFP_ATOMIC | __GFP_NOWARN)) -- cgit v1.2.3 From a8585ac68621983587f1701b7567978fcbcd9573 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 3 Jul 2024 13:25:10 +0200 Subject: mm/page_counter: move calculating protection values to page_counter It's a lot of math, and there is nothing memcontrol specific about it. This makes it easier to use inside of the drm cgroup controller. [akpm@linux-foundation.org: fix kerneldoc, per Jeff Johnson] Link: https://lkml.kernel.org/r/20240703112510.36424-1-maarten.lankhorst@linux.intel.com Signed-off-by: Maarten Lankhorst Acked-by: Roman Gushchin Acked-by: Shakeel Butt Cc: Michal Hocko Cc: Johannes Weiner Cc: Muchun Song Cc: Jeff Johnson Signed-off-by: Andrew Morton --- include/linux/page_counter.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index 8cd858d912c4..904c52f97284 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -81,4 +81,8 @@ static inline void page_counter_reset_watermark(struct page_counter *counter) counter->watermark = page_counter_read(counter); } +void page_counter_calculate_protection(struct page_counter *root, + struct page_counter *counter, + bool recursive_protection); + #endif /* _LINUX_PAGE_COUNTER_H */ -- cgit v1.2.3 From 823430c8e9d98c5865af518c782d0493b76aa511 Mon Sep 17 00:00:00 2001 From: "Ho-Ren (Jack) Chuang" Date: Thu, 4 Jul 2024 07:26:44 +0000 Subject: memory tier: consolidate the initialization of memory tiers The current memory tier initialization process is distributed across two different functions, memory_tier_init() and memory_tier_late_init(). This design is hard to maintain. Thus, this patch is proposed to reduce the possible code paths by consolidating different initialization patches into one. The earlier discussion with Jonathan and Ying is listed here: https://lore.kernel.org/lkml/20240405150244.00004b49@Huawei.com/ If we want to put these two initializations together, they must be placed together in the later function. Because only at that time, the HMAT information will be ready, adist between nodes can be calculated, and memory tiering can be established based on the adist. So we position the initialization at memory_tier_init() to the memory_tier_late_init() call. Moreover, it's natural to keep memory_tier initialization in drivers at device_initcall() level. If we simply move the set_node_memory_tier() from memory_tier_init() to late_initcall(), it will result in HMAT not registering the mt_adistance_algorithm callback function, because set_node_memory_tier() is not performed during the memory tiering initialization phase, leading to a lack of correct default_dram information. Therefore, we introduced a nodemask to pass the information of the default DRAM nodes. The reason for not choosing to reuse default_dram_type->nodes is that it is not clean enough. So in the end, we use a __initdata variable, which is a variable that is released once initialization is complete, including both CPU and memory nodes for HMAT to iterate through. Link: https://lkml.kernel.org/r/20240704072646.437579-1-horen.chuang@linux.dev Signed-off-by: Ho-Ren (Jack) Chuang Suggested-by: Jonathan Cameron Reviewed-by: "Huang, Ying" Reviewed-by: Jonathan Cameron Cc: Alistair Popple Cc: Aneesh Kumar K.V Cc: Dan Williams Cc: Dave Jiang Cc: Gregory Price Cc: Len Brown Cc: Michal Hocko Cc: Rafael J. Wysocki Cc: Ravi Jonnalagadda Cc: SeongJae Park Cc: Tejun Heo Signed-off-by: Andrew Morton --- include/linux/memory-tiers.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index 0d70788558f4..0dc0cf2863e2 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -38,6 +38,7 @@ struct access_coordinate; #ifdef CONFIG_NUMA extern bool numa_demotion_enabled; extern struct memory_dev_type *default_dram_type; +extern nodemask_t default_dram_nodes; struct memory_dev_type *alloc_memory_type(int adistance); void put_memory_type(struct memory_dev_type *memtype); void init_node_memory_type(int node, struct memory_dev_type *default_type); @@ -76,6 +77,7 @@ static inline bool node_is_toptier(int node) #define numa_demotion_enabled false #define default_dram_type NULL +#define default_dram_nodes NODE_MASK_NONE /* * CONFIG_NUMA implementation returns non NULL error. */ -- cgit v1.2.3 From 00f58104202c472e487f0866fbd38832523fd4f9 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 4 Jul 2024 10:10:50 +0100 Subject: mm: fix khugepaged activation policy Since the introduction of mTHP, the docuementation has stated that khugepaged would be enabled when any mTHP size is enabled, and disabled when all mTHP sizes are disabled. There are 2 problems with this; 1. this is not what was implemented by the code and 2. this is not the desirable behavior. Desirable behavior is for khugepaged to be enabled when any PMD-sized THP is enabled, anon or file. (Note that file THP is still controlled by the top-level control so we must always consider that, as well as the PMD-size mTHP control for anon). khugepaged only supports collapsing to PMD-sized THP so there is no value in enabling it when PMD-sized THP is disabled. So let's change the code and documentation to reflect this policy. Further, per-size enabled control modification events were not previously forwarded to khugepaged to give it an opportunity to start or stop. Consequently the following was resulting in khugepaged eroneously not being activated: echo never > /sys/kernel/mm/transparent_hugepage/enabled echo always > /sys/kernel/mm/transparent_hugepage/hugepages-2048kB/enabled [ryan.roberts@arm.com: v3] Link: https://lkml.kernel.org/r/20240705102849.2479686-1-ryan.roberts@arm.com Link: https://lkml.kernel.org/r/20240705102849.2479686-1-ryan.roberts@arm.com Link: https://lkml.kernel.org/r/20240704091051.2411934-1-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Fixes: 3485b88390b0 ("mm: thp: introduce multi-size THP sysfs interface") Closes: https://lore.kernel.org/linux-mm/7a0bbe69-1e3d-4263-b206-da007791a5c4@redhat.com/ Acked-by: David Hildenbrand Cc: Baolin Wang Cc: Barry Song Cc: Jonathan Corbet Cc: Lance Yang Cc: Yang Shi Cc: Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index cee3c5da8f0e..acb6ac24a07e 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -128,18 +128,6 @@ static inline bool hugepage_global_always(void) (1< Date: Fri, 5 Jul 2024 11:23:09 +0800 Subject: mm: thp: support "THPeligible" semantics for mTHP with anonymous shmem After the commit 7fb1b252afb5 ("mm: shmem: add mTHP support for anonymous shmem"), we can configure different policies through the multi-size THP sysfs interface for anonymous shmem. But currently "THPeligible" indicates only whether the mapping is eligible for allocating THP-pages as well as the THP is PMD mappable or not for anonymous shmem, we need to support semantics for mTHP with anonymous shmem similar to those for mTHP with anonymous memory. Link: https://lkml.kernel.org/r/20240705032309.24933-1-libang.li@antgroup.com Signed-off-by: Bang Li Reviewed-by: Baolin Wang Cc: David Hildenbrand Cc: Hugh Dickins Cc: Kefeng Wang Cc: Lance Yang Cc: Ryan Roberts Cc: Zi Yan Signed-off-by: Andrew Morton --- include/linux/shmem_fs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 3fb18f7eb73e..1d06b1e5408a 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -113,12 +113,21 @@ int shmem_unuse(unsigned int type); #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, struct mm_struct *mm, unsigned long vm_flags); +unsigned long shmem_allowable_huge_orders(struct inode *inode, + struct vm_area_struct *vma, pgoff_t index, + bool global_huge); #else static __always_inline bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force, struct mm_struct *mm, unsigned long vm_flags) { return false; } +static inline unsigned long shmem_allowable_huge_orders(struct inode *inode, + struct vm_area_struct *vma, pgoff_t index, + bool global_huge) +{ + return 0; +} #endif #ifdef CONFIG_SHMEM -- cgit v1.2.3 From 8a78882dac1c8c464e047a29415edb50421651ce Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 8 Jul 2024 11:05:44 +0800 Subject: mm/memory-failure: remove obsolete MF_MSG_DIFFERENT_COMPOUND The page cannot become compound pages again just after a folio is split as an extra refcnt is held. So the MF_MSG_DIFFERENT_COMPOUND case is obsolete and can be removed to get rid of this false assumption and code burden. But add one WARN_ON() here to keep the situation clear. Link: https://lkml.kernel.org/r/20240708030544.196919-1-linmiaohe@huawei.com Signed-off-by: Miaohe Lin Cc: Borislav Petkov (AMD) Cc: Naoya Horiguchi Cc: Tony Luck Signed-off-by: Andrew Morton --- include/ras/ras_event.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 9bc707fe8819..e5f7ee0864e7 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -356,7 +356,6 @@ TRACE_EVENT(aer_event, #define MF_PAGE_TYPE \ EM ( MF_MSG_KERNEL, "reserved kernel page" ) \ EM ( MF_MSG_KERNEL_HIGH_ORDER, "high-order kernel page" ) \ - EM ( MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking" ) \ EM ( MF_MSG_HUGE, "huge page" ) \ EM ( MF_MSG_FREE_HUGE, "free huge page" ) \ EM ( MF_MSG_GET_HWPOISON, "get hwpoison page" ) \ -- cgit v1.2.3 From 63d9866ab01ffd0d0835d5564107283a4afc0a38 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Wed, 10 Jul 2024 10:55:01 +0100 Subject: mm: shmem: rename mTHP shmem counters The legacy PMD-sized THP counters at /proc/vmstat include thp_file_alloc, thp_file_fallback and thp_file_fallback_charge, which rather confusingly refer to shmem THP and do not include any other types of file pages. This is inconsistent since in most other places in the kernel, THP counters are explicitly separated for anon, shmem and file flavours. However, we are stuck with it since it constitutes a user ABI. Recently, commit 66f44583f9b6 ("mm: shmem: add mTHP counters for anonymous shmem") added equivalent mTHP stats for shmem, keeping the same "file_" prefix in the names. But in future, we may want to add extra stats to cover actual file pages, at which point, it would all become very confusing. So let's take the opportunity to rename these new counters "shmem_" before the change makes it upstream and the ABI becomes immutable. While we are at it, let's improve the documentation for the legacy counters to make it clear that they count shmem pages only. Link: https://lkml.kernel.org/r/20240710095503.3193901-1-ryan.roberts@arm.com Signed-off-by: Ryan Roberts Reviewed-by: Baolin Wang Reviewed-by: Lance Yang Reviewed-by: Zi Yan Reviewed-by: Barry Song Acked-by: David Hildenbrand Cc: Daniel Gomez Cc: Hugh Dickins Cc: Jonathan Corbet Cc: Matthew Wilcox (Oracle) Cc: Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index acb6ac24a07e..cff002be83eb 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -269,9 +269,9 @@ enum mthp_stat_item { MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE, MTHP_STAT_SWPOUT, MTHP_STAT_SWPOUT_FALLBACK, - MTHP_STAT_FILE_ALLOC, - MTHP_STAT_FILE_FALLBACK, - MTHP_STAT_FILE_FALLBACK_CHARGE, + MTHP_STAT_SHMEM_ALLOC, + MTHP_STAT_SHMEM_FALLBACK, + MTHP_STAT_SHMEM_FALLBACK_CHARGE, MTHP_STAT_SPLIT, MTHP_STAT_SPLIT_FAILED, MTHP_STAT_SPLIT_DEFERRED, -- cgit v1.2.3 From 7a7127aa33c9f5b7b54ffa80619f644c5e000846 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 7 Jul 2024 01:05:05 +0900 Subject: init: remove unused __MEMINIT* macros These macros are not used anywhere. Link: https://lkml.kernel.org/r/20240706160511.2331061-1-masahiroy@kernel.org Signed-off-by: Masahiro Yamada Reviewed-by: Wei Yang Signed-off-by: Andrew Morton --- include/linux/init.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/init.h b/include/linux/init.h index 58cef4c2e59a..b2e9dfff8691 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -99,10 +99,6 @@ #define __INITRODATA .section ".init.rodata","a",%progbits #define __FINITDATA .previous -#define __MEMINIT .section ".meminit.text", "ax" -#define __MEMINITDATA .section ".meminit.data", "aw" -#define __MEMINITRODATA .section ".meminit.rodata", "a" - /* silence warnings when references are OK */ #define __REF .section ".ref.text", "ax" #define __REFDATA .section ".ref.data", "aw" -- cgit v1.2.3 From 73db3abdca58c8a014ec4c88cf5ef925cbf63669 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 7 Jul 2024 01:05:06 +0900 Subject: init/modpost: conditionally check section mismatch to __meminit* This reverts commit eb8f689046b8 ("Use separate sections for __dev/ _cpu/__mem code/data"). Check section mismatch to __meminit* only when CONFIG_MEMORY_HOTPLUG=n. With this change, the linker script and modpost become simpler, and we can get rid of the __ref annotations from the memory hotplug code. [sfr@canb.auug.org.au: remove MEM_KEEP from arch/powerpc/kernel/vmlinux.lds.S] Link: https://lkml.kernel.org/r/20240710093213.2aefb25f@canb.auug.org.au Link: https://lkml.kernel.org/r/20240706160511.2331061-2-masahiroy@kernel.org Signed-off-by: Masahiro Yamada Signed-off-by: Stephen Rothwell Reviewed-by: Wei Yang Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/asm-generic/vmlinux.lds.h | 18 ++---------------- include/linux/init.h | 14 +++++++++----- 2 files changed, 11 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 5703526d6ebf..0db89c0aa2cc 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -141,14 +141,6 @@ * often happens at runtime) */ -#if defined(CONFIG_MEMORY_HOTPLUG) -#define MEM_KEEP(sec) *(.mem##sec) -#define MEM_DISCARD(sec) -#else -#define MEM_KEEP(sec) -#define MEM_DISCARD(sec) *(.mem##sec) -#endif - #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_NO_PATCHABLE #define KEEP_PATCHABLE KEEP(*(__patchable_function_entries)) #define PATCHABLE_DISCARDS @@ -357,7 +349,6 @@ *(.data..decrypted) \ *(.ref.data) \ *(.data..shared_aligned) /* percpu related */ \ - MEM_KEEP(init.data*) \ *(.data.unlikely) \ __start_once = .; \ *(.data.once) \ @@ -542,7 +533,6 @@ /* __*init sections */ \ __init_rodata : AT(ADDR(__init_rodata) - LOAD_OFFSET) { \ *(.ref.rodata) \ - MEM_KEEP(init.rodata) \ } \ \ /* Built-in module parameters. */ \ @@ -593,8 +583,7 @@ *(.text.unknown .text.unknown.*) \ NOINSTR_TEXT \ *(.ref.text) \ - *(.text.asan.* .text.tsan.*) \ - MEM_KEEP(init.text*) \ + *(.text.asan.* .text.tsan.*) /* sched.text is aling to function alignment to secure we have same @@ -701,7 +690,6 @@ #define INIT_DATA \ KEEP(*(SORT(___kentry+*))) \ *(.init.data .init.data.*) \ - MEM_DISCARD(init.data*) \ KERNEL_CTORS() \ MCOUNT_REC() \ *(.init.rodata .init.rodata.*) \ @@ -709,7 +697,6 @@ TRACE_SYSCALLS() \ KPROBE_BLACKLIST() \ ERROR_INJECT_WHITELIST() \ - MEM_DISCARD(init.rodata) \ CLK_OF_TABLES() \ RESERVEDMEM_OF_TABLES() \ TIMER_OF_TABLES() \ @@ -727,8 +714,7 @@ #define INIT_TEXT \ *(.init.text .init.text.*) \ - *(.text.startup) \ - MEM_DISCARD(init.text*) + *(.text.startup) #define EXIT_DATA \ *(.exit.data .exit.data.*) \ diff --git a/include/linux/init.h b/include/linux/init.h index b2e9dfff8691..ee1309473bc6 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -84,11 +84,15 @@ #define __exit __section(".exit.text") __exitused __cold notrace -/* Used for MEMORY_HOTPLUG */ -#define __meminit __section(".meminit.text") __cold notrace \ - __latent_entropy -#define __meminitdata __section(".meminit.data") -#define __meminitconst __section(".meminit.rodata") +#ifdef CONFIG_MEMORY_HOTPLUG +#define __meminit +#define __meminitdata +#define __meminitconst +#else +#define __meminit __init +#define __meminitdata __initdata +#define __meminitconst __initconst +#endif /* For assembly routines */ #define __HEAD .section ".head.text","ax" -- cgit v1.2.3 From d69ba6bbaf1f606ac354e925571a54d025e32aae Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 11 Jul 2024 15:07:03 -0700 Subject: net: ethtool: let drivers remove lost RSS contexts RSS contexts may get lost from a device, in various extreme circumstances. Specifically if the firmware leaks resources and resets, or crashes and either recovers in partially working state or the crash causes a different FW version to run - creating the context again may fail. Drivers should do their absolute best to prevent this from happening. When it does, however, telling user that a context exists, when it can't possibly be used any more is counter productive. Add a helper for drivers to discard contexts. Print an error, in the future netlink notification will also be sent. More robust approaches were proposed, like keeping the contexts but marking them as "dead" (but possibly resurrected by next reset). That may be better but it's unclear at this stage whether the effort is worth the benefits. Reviewed-by: Pavan Chebbi Link: https://patch.msgid.link/20240711220713.283778-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index e213b5508da6..89da0254ccd4 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -210,6 +210,8 @@ static inline size_t ethtool_rxfh_context_size(u32 indir_size, u32 key_size, return struct_size_t(struct ethtool_rxfh_context, data, flex_len); } +void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id); + /* declare a link mode bitmap */ #define __ETHTOOL_DECLARE_LINK_MODE_MASK(name) \ DECLARE_BITMAP(name, __ETHTOOL_LINK_MODE_MASK_NBITS) -- cgit v1.2.3 From 28c8757a792bbbc76407777bd0303862daa75057 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 11 Jul 2024 15:07:04 -0700 Subject: net: ethtool: let drivers declare max size of RSS indir table and key Some drivers (bnxt but I think also mlx5 from ML discussions) change the size of the indirection table depending on the number of Rx rings. Decouple the max table size from the size of the currently used table, so that we can reserve space in the context for table growth. Static members in ethtool_ops are good enough for now, we can add callbacks to read the max size more dynamically if someone needs that. Reviewed-by: Pavan Chebbi Link: https://patch.msgid.link/20240711220713.283778-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 89da0254ccd4..a1ee76936f53 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -181,6 +181,7 @@ struct ethtool_rxfh_context { /* private: driver private data, indirection table, and hash key are * stored sequentially in @data area. Use below helpers to access. */ + u32 key_off; u8 data[] __aligned(sizeof(void *)); }; @@ -196,18 +197,7 @@ static inline u32 *ethtool_rxfh_context_indir(struct ethtool_rxfh_context *ctx) static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx) { - return (u8 *)(ethtool_rxfh_context_indir(ctx) + ctx->indir_size); -} - -static inline size_t ethtool_rxfh_context_size(u32 indir_size, u32 key_size, - u16 priv_size) -{ - size_t indir_bytes = array_size(indir_size, sizeof(u32)); - size_t flex_len; - - flex_len = size_add(size_add(indir_bytes, key_size), - ALIGN(priv_size, sizeof(u32))); - return struct_size_t(struct ethtool_rxfh_context, data, flex_len); + return &ctx->data[ctx->key_off]; } void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id); @@ -723,6 +713,10 @@ struct ethtool_rxfh_param { * contexts. * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor * RSS. + * @rxfh_indir_space: max size of RSS indirection tables, if indirection table + * size as returned by @get_rxfh_indir_size may change during lifetime + * of the device. Leave as 0 if the table size is constant. + * @rxfh_key_space: same as @rxfh_indir_space, but for the key. * @rxfh_priv_size: size of the driver private data area the core should * allocate for an RSS context (in &struct ethtool_rxfh_context). * @rxfh_max_context_id: maximum (exclusive) supported RSS context ID. If this @@ -940,6 +934,8 @@ struct ethtool_ops { u32 cap_link_lanes_supported:1; u32 cap_rss_ctx_supported:1; u32 cap_rss_sym_xor_supported:1; + u32 rxfh_indir_space; + u16 rxfh_key_space; u16 rxfh_priv_size; u32 rxfh_max_context_id; u32 supported_coalesce_params; -- cgit v1.2.3 From 5d89b5bdbce3937c86f05ffe19455c3068fd94f7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 12 Jul 2024 11:52:40 +0200 Subject: i2c: document new callbacks in i2c_algorithm When updating the callbacks, adding their kernel-doc was forgotten. Add it now. Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/r/20240712165527.75e4ddc9@canb.auug.org.au Fixes: a93c2e5fe766 ("i2c: reword i2c_algorithm according to newest specification") Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 9d45b7b912dd..e9cc14b1f9a1 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -513,10 +513,10 @@ i2c_register_board_info(int busnum, struct i2c_board_info const *info, /** * struct i2c_algorithm - represent I2C transfer method - * @master_xfer: Issue a set of i2c transactions to the given I2C adapter + * @xfer: Issue a set of i2c transactions to the given I2C adapter * defined by the msgs array, with num messages available to transfer via * the adapter specified by adap. - * @master_xfer_atomic: same as @master_xfer. Yet, only using atomic context + * @xfer_atomic: same as @xfer. Yet, only using atomic context * so e.g. PMICs can be accessed very late before shutdown. Optional. * @smbus_xfer: Issue smbus transactions to the given I2C adapter. If this * is not present, then the bus layer will try and convert the SMBus calls @@ -525,27 +525,33 @@ i2c_register_board_info(int busnum, struct i2c_board_info const *info, * so e.g. PMICs can be accessed very late before shutdown. Optional. * @functionality: Return the flags that this algorithm/adapter pair supports * from the ``I2C_FUNC_*`` flags. - * @reg_slave: Register given client to I2C slave mode of this adapter - * @unreg_slave: Unregister given client from I2C slave mode of this adapter + * @reg_target: Register given client to local target mode of this adapter + * @unreg_target: Unregister given client from local target mode of this adapter + * + * @master_xfer: deprecated, use @xfer + * @master_xfer_atomic: deprecated, use @xfer_atomic + * @reg_slave: deprecated, use @reg_target + * @unreg_slave: deprecated, use @unreg_target + * * * The following structs are for those who like to implement new bus drivers: * i2c_algorithm is the interface to a class of hardware solutions which can * be addressed using the same bus algorithms - i.e. bit-banging or the PCF8584 * to name two of the most common. * - * The return codes from the ``master_xfer{_atomic}`` fields should indicate the + * The return codes from the ``xfer{_atomic}`` fields should indicate the * type of error code that occurred during the transfer, as documented in the * Kernel Documentation file Documentation/i2c/fault-codes.rst. Otherwise, the * number of messages executed should be returned. */ struct i2c_algorithm { /* - * If an adapter algorithm can't do I2C-level access, set master_xfer + * If an adapter algorithm can't do I2C-level access, set xfer * to NULL. If an adapter algorithm can do SMBus access, set * smbus_xfer. If set to NULL, the SMBus protocol is simulated * using common I2C messages. * - * master_xfer should return the number of messages successfully + * xfer should return the number of messages successfully * processed, or a negative value on error */ union { -- cgit v1.2.3 From 63c6e08eac8e7e2ad2e883c968e58e23e1dc8c70 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 11 Jul 2024 17:33:07 -0700 Subject: net/mlx5: IFC updates for SF max IO EQs Expose a new cap sf_eq_usage. The vhca_resource_manager can write this cap, indicating the SF driver should use max_num_eqs_24b to determine how many EQs to use. Will be used in the next patch, to indicate to the SF driver from the PF that the user has set the max io eqs via devlink. So the SF driver can later query the proper max eq value from the new cap. devlink port function set pci/0000:08:00.0/32768 max_io_eqs 32 Signed-off-by: Daniel Jurgens Reviewed-by: William Tu Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Link: https://patch.msgid.link/20240712003310.355106-2-saeed@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index fdad0071d599..360d42f041b0 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1994,7 +1994,9 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 migration_tracking_state[0x1]; u8 reserved_at_ca[0x6]; u8 migration_in_chunks[0x1]; - u8 reserved_at_d1[0xf]; + u8 reserved_at_d1[0x1]; + u8 sf_eq_usage[0x1]; + u8 reserved_at_d3[0xd]; u8 cross_vhca_object_to_object_supported[0x20]; -- cgit v1.2.3 From 084ebf7ca83e6cb743784f2eecc654193ce064fb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 21 Jun 2024 13:50:43 -0700 Subject: execve: Keep bprm->argmin behind CONFIG_MMU When argmin was added in commit 655c16a8ce9c ("exec: separate MM_ANONPAGES and RLIMIT_STACK accounting"), it was intended only for validating stack limits on CONFIG_MMU[1]. All checking for reaching the limit (argmin) is wrapped in CONFIG_MMU ifdef checks, though setting argmin was not. That argmin is only supposed to be used under CONFIG_MMU was rediscovered recently[2], and I don't want to trip over this again. Move argmin's declaration into the existing CONFIG_MMU area, and add helpers functions so the MMU tests can be consolidated. Link: https://lore.kernel.org/all/20181126122307.GA1660@redhat.com [1] Link: https://lore.kernel.org/all/202406211253.7037F69@keescook/ [2] Link: https://lore.kernel.org/r/20240621205046.4001362-1-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/binfmts.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 70f97f685bff..e6c00e860951 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -19,13 +19,13 @@ struct linux_binprm { #ifdef CONFIG_MMU struct vm_area_struct *vma; unsigned long vma_pages; + unsigned long argmin; /* rlimit marker for copy_strings() */ #else # define MAX_ARG_PAGES 32 struct page *page[MAX_ARG_PAGES]; #endif struct mm_struct *mm; unsigned long p; /* current top of mem */ - unsigned long argmin; /* rlimit marker for copy_strings() */ unsigned int /* Should an execfd be passed to userspace? */ have_execfd:1, -- cgit v1.2.3 From 872bb37f6829d4f7f3ed5afe2786add3d4384b4b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 2 Jul 2024 14:16:16 -0700 Subject: randomize_kstack: Improve stack alignment codegen The codgen for adding architecture-specific stack alignment to the effective alloca() usage is somewhat inefficient and allows a bit to get carried beyond the desired entropy range. This isn't really a problem, but it's unexpected and the codegen is kind of bad. Quoting Mark[1], the disassembly for arm64's invoke_syscall() looks like: // offset = raw_cpu_read(kstack_offset) mov x4, sp adrp x0, kstack_offset mrs x5, tpidr_el1 add x0, x0, #:lo12:kstack_offset ldr w0, [x0, x5] // offset = KSTACK_OFFSET_MAX(offset) and x0, x0, #0x3ff // alloca(offset) add x0, x0, #0xf and x0, x0, #0x7f0 sub sp, x4, x0 ... which in C would be: offset = raw_cpu_read(kstack_offset) offset &= 0x3ff; // [0x0, 0x3ff] offset += 0xf; // [0xf, 0x40e] offset &= 0x7f0; // [0x0, ... so when *all* bits [3:0] are 0, they'll have no impact, and when *any* of bits [3:0] are 1 they'll trigger a carry into bit 4, which could ripple all the way up and spill into bit 10. Switch the masking in KSTACK_OFFSET_MAX() to explicitly clear the bottom bits to avoid the rounding by using 0b1111110000 instead of 0b1111111111: // offset = raw_cpu_read(kstack_offset) mov x4, sp adrp x0, 0 mrs x5, tpidr_el1 add x0, x0, #:lo12:kstack_offset ldr w0, [x0, x5] // offset = KSTACK_OFFSET_MAX(offset) and x0, x0, #0x3f0 // alloca(offset) sub sp, x4, x0 Suggested-by: Mark Rutland Link: https://lore.kernel.org/lkml/ZnVfOnIuFl2kNWkT@J2N7QTR9R3/ [1] Link: https://lore.kernel.org/r/20240702211612.work.576-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/randomize_kstack.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h index 6d92b68efbf6..1d982dbdd0d0 100644 --- a/include/linux/randomize_kstack.h +++ b/include/linux/randomize_kstack.h @@ -32,13 +32,19 @@ DECLARE_PER_CPU(u32, kstack_offset); #endif /* - * Use, at most, 10 bits of entropy. We explicitly cap this to keep the - * "VLA" from being unbounded (see above). 10 bits leaves enough room for - * per-arch offset masks to reduce entropy (by removing higher bits, since - * high entropy may overly constrain usable stack space), and for - * compiler/arch-specific stack alignment to remove the lower bits. + * Use, at most, 6 bits of entropy (on 64-bit; 8 on 32-bit). This cap is + * to keep the "VLA" from being unbounded (see above). Additionally clear + * the bottom 4 bits (on 64-bit systems, 2 for 32-bit), since stack + * alignment will always be at least word size. This makes the compiler + * code gen better when it is applying the actual per-arch alignment to + * the final offset. The resulting randomness is reasonable without overly + * constraining usable stack space. */ -#define KSTACK_OFFSET_MAX(x) ((x) & 0x3FF) +#ifdef CONFIG_64BIT +#define KSTACK_OFFSET_MAX(x) ((x) & 0b1111110000) +#else +#define KSTACK_OFFSET_MAX(x) ((x) & 0b1111111100) +#endif /** * add_random_kstack_offset - Increase stack utilization by previously -- cgit v1.2.3 From 1df03a4b44146c4f720d793915747272c7773a3e Mon Sep 17 00:00:00 2001 From: Konstantin Taranov Date: Thu, 11 Jul 2024 06:37:57 -0700 Subject: RDMA/mana_ib: Set correct device into ib Add mana_get_primary_netdev_rcu helper to get a primary netdevice for a given port. When mana is used with netvsc, the VF netdev is controlled by an upper netvsc device. In a baremetal case, the VF netdev is the primary device. Use the mana_get_primary_netdev_rcu() helper in the mana_ib to get the correct device for querying network states. Fixes: 8b184e4f1c32 ("RDMA/mana_ib: Enable RoCE on port 1") Signed-off-by: Konstantin Taranov Link: https://lore.kernel.org/r/1720705077-322-1-git-send-email-kotaranov@linux.microsoft.com Reviewed-by: Long Li Reviewed-by: Zhu Yanjun Signed-off-by: Leon Romanovsky --- include/net/mana/mana.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 59823901b74f..f9b4b0dcb69f 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -797,4 +797,6 @@ void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, u32 doorbell_pg_id); void mana_uncfg_vport(struct mana_port_context *apc); + +struct net_device *mana_get_primary_netdev_rcu(struct mana_context *ac, u32 port_index); #endif /* _MANA_H */ -- cgit v1.2.3 From 252442f2ae317d109ef0b4b39ce0608c09563042 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 10 Jul 2024 10:14:28 +0200 Subject: ipv6: fix source address selection with route leak By default, an address assigned to the output interface is selected when the source address is not specified. This is problematic when a route, configured in a vrf, uses an interface from another vrf (aka route leak). The original vrf does not own the selected source address. Let's add a check against the output interface and call the appropriate function to select the source address. CC: stable@vger.kernel.org Fixes: 0d240e7811c4 ("net: vrf: Implement get_saddr for IPv6") Signed-off-by: Nicolas Dichtel Link: https://patch.msgid.link/20240710081521.3809742-3-nicolas.dichtel@6wind.com Signed-off-by: Jakub Kicinski --- include/net/ip6_route.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index a18ed24fed94..6dbdf60b342f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -127,18 +127,26 @@ void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args, static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *f6i, const struct in6_addr *daddr, - unsigned int prefs, + unsigned int prefs, int l3mdev_index, struct in6_addr *saddr) { + struct net_device *l3mdev; + struct net_device *dev; + bool same_vrf; int err = 0; - if (f6i && f6i->fib6_prefsrc.plen) { + rcu_read_lock(); + + l3mdev = dev_get_by_index_rcu(net, l3mdev_index); + if (!f6i || !f6i->fib6_prefsrc.plen || l3mdev) + dev = f6i ? fib6_info_nh_dev(f6i) : NULL; + same_vrf = !l3mdev || l3mdev_master_dev_rcu(dev) == l3mdev; + if (f6i && f6i->fib6_prefsrc.plen && same_vrf) *saddr = f6i->fib6_prefsrc.addr; - } else { - struct net_device *dev = f6i ? fib6_info_nh_dev(f6i) : NULL; + else + err = ipv6_dev_get_saddr(net, same_vrf ? dev : l3mdev, daddr, prefs, saddr); - err = ipv6_dev_get_saddr(net, dev, daddr, prefs, saddr); - } + rcu_read_unlock(); return err; } -- cgit v1.2.3 From 1a616c2fe96b357894b74b41787d4ea6987f6199 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 21 Dec 2023 20:34:17 -0500 Subject: lockdep: lockdep_set_notrack_class() Add a new helper to disable lockdep tracking entirely for a given class. This is needed for bcachefs, which takes too many btree node locks for lockdep to track. Instead, we have a single lockdep_map for "btree_trans has any btree nodes locked", which makes more since given that we have centralized lock management and a cycle detector. Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Will Deacon Cc: Waiman Long Cc: Boqun Feng Signed-off-by: Kent Overstreet --- include/linux/lockdep.h | 4 ++++ include/linux/lockdep_types.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 08b0d1d9d78b..b76f1bcd2f7f 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -181,6 +181,9 @@ static inline void lockdep_init_map(struct lockdep_map *lock, const char *name, #define lockdep_set_novalidate_class(lock) \ lockdep_set_class_and_name(lock, &__lockdep_no_validate__, #lock) +#define lockdep_set_notrack_class(lock) \ + lockdep_set_class_and_name(lock, &__lockdep_no_track__, #lock) + /* * Compare locking classes */ @@ -338,6 +341,7 @@ static inline void lockdep_set_selftest_task(struct task_struct *task) #define lockdep_set_subclass(lock, sub) do { } while (0) #define lockdep_set_novalidate_class(lock) do { } while (0) +#define lockdep_set_notrack_class(lock) do { } while (0) /* * We don't define lockdep_match_class() and lockdep_match_key() for !LOCKDEP diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h index 70d30d40ea4a..9f361d3ab9d9 100644 --- a/include/linux/lockdep_types.h +++ b/include/linux/lockdep_types.h @@ -80,6 +80,7 @@ struct lock_class_key { }; extern struct lock_class_key __lockdep_no_validate__; +extern struct lock_class_key __lockdep_no_track__; struct lock_trace; -- cgit v1.2.3 From 0ece498c27d8cd2fdad6f49a6abc34b8badd8fbc Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 10 May 2024 10:36:45 -0400 Subject: Bluetooth: MGMT: Make MGMT_OP_LOAD_CONN_PARAM update existing connection This makes MGMT_OP_LOAD_CONN_PARAM update existing connection by dectecting the request is just for one connection, parameters already exists and there is a connection. Since this is a new behavior the revision is also updated to enable userspace to detect it. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 534c3386e714..20168732f20e 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -138,6 +138,7 @@ int hci_suspend_sync(struct hci_dev *hdev); int hci_resume_sync(struct hci_dev *hdev); struct hci_conn; +struct hci_conn_params; int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason); @@ -156,3 +157,5 @@ int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn); int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn); int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn); +int hci_le_conn_update_sync(struct hci_dev *hdev, struct hci_conn *conn, + struct hci_conn_params *params); -- cgit v1.2.3 From 8f7dfe171c576aaec4911cc59feaed26d79c7c7f Mon Sep 17 00:00:00 2001 From: Erick Archer Date: Sat, 18 May 2024 10:30:38 +0200 Subject: Bluetooth: hci_core: Prefer struct_size over open coded arithmetic This is an effort to get rid of all multiplications from allocation functions in order to prevent integer overflows [1][2]. As the "dl" variable is a pointer to "struct hci_dev_list_req" and this structure ends in a flexible array: struct hci_dev_list_req { [...] struct hci_dev_req dev_req[]; /* hci_dev_req structures */ }; the preferred way in the kernel is to use the struct_size() helper to do the arithmetic instead of the calculation "size + count * size" in the kzalloc() and copy_to_user() functions. At the same time, prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). In this case, it is important to note that the logic needs a little refactoring to ensure that the "dev_num" member is initialized before the first access to the flex array. Specifically, add the assignment before the list_for_each_entry() loop. Also remove the "size" variable as it is no longer needed. This way, the code is more readable and safer. This code was detected with the help of Coccinelle, and audited and modified manually. Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments [1] Link: https://github.com/KSPP/linux/issues/160 [2] Reviewed-by: Kees Cook Signed-off-by: Erick Archer Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_sock.h b/include/net/bluetooth/hci_sock.h index 9949870f7d78..13e8cd4414a1 100644 --- a/include/net/bluetooth/hci_sock.h +++ b/include/net/bluetooth/hci_sock.h @@ -144,7 +144,7 @@ struct hci_dev_req { struct hci_dev_list_req { __u16 dev_num; - struct hci_dev_req dev_req[]; /* hci_dev_req structures */ + struct hci_dev_req dev_req[] __counted_by(dev_num); }; struct hci_conn_list_req { -- cgit v1.2.3 From 7d2c7ddba6238e6a14cd89ef869878dd22f2a661 Mon Sep 17 00:00:00 2001 From: Erick Archer Date: Fri, 17 May 2024 19:21:49 +0200 Subject: tty: rfcomm: prefer struct_size over open coded arithmetic This is an effort to get rid of all multiplications from allocation functions in order to prevent integer overflows [1][2]. As the "dl" variable is a pointer to "struct rfcomm_dev_list_req" and this structure ends in a flexible array: struct rfcomm_dev_list_req { [...] struct rfcomm_dev_info dev_info[]; }; the preferred way in the kernel is to use the struct_size() helper to do the arithmetic instead of the calculation "size + count * size" in the kzalloc() and copy_to_user() functions. At the same time, prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS (for array indexing) and CONFIG_FORTIFY_SOURCE (for strcpy/memcpy-family functions). In this case, it is important to note that the logic needs a little refactoring to ensure that the "dev_num" member is initialized before the first access to the flex array. Specifically, add the assignment before the list_for_each_entry() loop. Also remove the "size" variable as it is no longer needed. This way, the code is more readable and safer. This code was detected with the help of Coccinelle, and audited and modified manually. Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments [1] Link: https://github.com/KSPP/linux/issues/160 [2] Reviewed-by: Kees Cook Signed-off-by: Erick Archer Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/rfcomm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index 99d26879b02a..c05882476900 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -355,7 +355,7 @@ struct rfcomm_dev_info { struct rfcomm_dev_list_req { u16 dev_num; - struct rfcomm_dev_info dev_info[]; + struct rfcomm_dev_info dev_info[] __counted_by(dev_num); }; int rfcomm_dev_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); -- cgit v1.2.3 From f25b7fd36cc3a850e006aed686f5bbecd200de1b Mon Sep 17 00:00:00 2001 From: Ying Hsu Date: Wed, 29 May 2024 08:00:00 +0000 Subject: Bluetooth: Add vendor-specific packet classification for ISO data When HCI raw sockets are opened, the Bluetooth kernel module doesn't track CIS/BIS connections. User-space applications have to identify ISO data by maintaining connection information and look up the mapping for each ACL data packet received. Besides, btsnoop log captured in kernel couldn't tell ISO data from ACL data in this case. To avoid additional lookups, this patch introduces vendor-specific packet classification for Intel BT controllers to distinguish ISO data packets from ACL data packets. Signed-off-by: Ying Hsu Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index c43716edf205..f7de2681d457 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -649,6 +649,7 @@ struct hci_dev { int (*get_codec_config_data)(struct hci_dev *hdev, __u8 type, struct bt_codec *codec, __u8 *vnd_len, __u8 **vnd_data); + u8 (*classify_pkt_type)(struct hci_dev *hdev, struct sk_buff *skb); }; #define HCI_PHY_HANDLE(handle) (handle & 0xff) -- cgit v1.2.3 From da63f331353c9e1e6dc29e49e28f8f4fe5d642fd Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Tue, 18 Jun 2024 21:59:32 +0300 Subject: Bluetooth: hci_core, hci_sync: cleanup struct discovery_state After commit 78db544b5d27 ("Bluetooth: hci_core: Remove le_restart_scan work"), 'scan_start' and 'scan_duration' of 'struct discovery_state' are still initialized but actually unused. So remove the aforementioned fields and adjust 'hci_discovery_filter_clear()' and 'le_scan_disable()' accordingly. Compile tested only. Fixes: 78db544b5d27 ("Bluetooth: hci_core: Remove le_restart_scan work") Signed-off-by: Dmitry Antipov Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f7de2681d457..eaeaf3dc07aa 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -91,8 +91,6 @@ struct discovery_state { s8 rssi; u16 uuid_count; u8 (*uuids)[16]; - unsigned long scan_start; - unsigned long scan_duration; unsigned long name_resolve_timeout; }; @@ -891,8 +889,6 @@ static inline void hci_discovery_filter_clear(struct hci_dev *hdev) hdev->discovery.uuid_count = 0; kfree(hdev->discovery.uuids); hdev->discovery.uuids = NULL; - hdev->discovery.scan_start = 0; - hdev->discovery.scan_duration = 0; } bool hci_discovery_active(struct hci_dev *hdev); -- cgit v1.2.3 From 2c1583290b08c5aa9005178a573be8f329de2976 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Hor=C3=A1k=20=282N=29?= Date: Fri, 12 Jul 2024 17:07:06 +0200 Subject: net: phy: bcm54811: New link mode for BroadR-Reach MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a new link mode necessary for 10 MBit single-pair connection in BroadR-Reach mode on bcm5481x PHY by Broadcom. This new link mode, 10baseT1BRR, is known as 1BR10 in the Broadcom terminology. Another link mode to be used is 1BR100 and it is already present as 100baseT1, because Broadcom's 1BR100 became 100baseT1 (IEEE 802.3bw). Signed-off-by: Kamil Horák (2N) Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20240712150709.3134474-2-kamilh@axis.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 230110b97029..4a0a6e703483 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2054,6 +2054,7 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_10baseT1S_Full_BIT = 99, ETHTOOL_LINK_MODE_10baseT1S_Half_BIT = 100, ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT = 101, + ETHTOOL_LINK_MODE_10baseT1BRR_Full_BIT = 102, /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS -- cgit v1.2.3 From ff253875ff3bab75214c4e94352956dc02a0d965 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kamil=20Hor=C3=A1k=20=282N=29?= Date: Fri, 12 Jul 2024 17:07:07 +0200 Subject: net: phy: bcm54811: Add LRE registers definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the definitions of LRE registers for Broadcom BCM5481x PHY Signed-off-by: Kamil Horák (2N) Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20240712150709.3134474-3-kamilh@axis.com Signed-off-by: Jakub Kicinski --- include/linux/brcmphy.h | 88 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) (limited to 'include') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 1394ba302367..028b3e00378e 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -271,12 +271,100 @@ #define BCM5482_SSD_SGMII_SLAVE_EN 0x0002 /* Slave mode enable */ #define BCM5482_SSD_SGMII_SLAVE_AD 0x0001 /* Slave auto-detection */ +/* BroadR-Reach LRE Registers. */ +#define MII_BCM54XX_LRECR 0x00 /* LRE Control Register */ +#define MII_BCM54XX_LRESR 0x01 /* LRE Status Register */ +#define MII_BCM54XX_LREPHYSID1 0x02 /* LRE PHYS ID 1 */ +#define MII_BCM54XX_LREPHYSID2 0x03 /* LRE PHYS ID 2 */ +#define MII_BCM54XX_LREANAA 0x04 /* LDS Auto-Negotiation Advertised Ability */ +#define MII_BCM54XX_LREANAC 0x05 /* LDS Auto-Negotiation Advertised Control */ +#define MII_BCM54XX_LREANPT 0x06 /* LDS Ability Next Page Transmit */ +#define MII_BCM54XX_LRELPA 0x07 /* LDS Link Partner Ability */ +#define MII_BCM54XX_LRELPNPM 0x08 /* LDS Link Partner Next Page Message */ +#define MII_BCM54XX_LRELPNPC 0x09 /* LDS Link Partner Next Page Control */ +#define MII_BCM54XX_LRELDSE 0x0a /* LDS Expansion Register */ +#define MII_BCM54XX_LREES 0x0f /* LRE Extended Status */ + +/* LRE control register. */ +#define LRECR_RESET 0x8000 /* Reset to default state */ +#define LRECR_LOOPBACK 0x4000 /* Internal Loopback */ +#define LRECR_LDSRES 0x2000 /* Restart LDS Process */ +#define LRECR_LDSEN 0x1000 /* LDS Enable */ +#define LRECR_PDOWN 0x0800 /* Enable low power state */ +#define LRECR_ISOLATE 0x0400 /* Isolate data paths from MII */ +#define LRECR_SPEED100 0x0200 /* Select 100 Mbps */ +#define LRECR_SPEED10 0x0000 /* Select 10 Mbps */ +#define LRECR_4PAIRS 0x0020 /* Select 4 Pairs */ +#define LRECR_2PAIRS 0x0010 /* Select 2 Pairs */ +#define LRECR_1PAIR 0x0000 /* Select 1 Pair */ +#define LRECR_MASTER 0x0008 /* Force Master when LDS disabled */ +#define LRECR_SLAVE 0x0000 /* Force Slave when LDS disabled */ + +/* LRE status register. */ +#define LRESR_100_1PAIR 0x2000 /* Can do 100Mbps 1 Pair */ +#define LRESR_100_4PAIR 0x1000 /* Can do 100Mbps 4 Pairs */ +#define LRESR_100_2PAIR 0x0800 /* Can do 100Mbps 2 Pairs */ +#define LRESR_10_2PAIR 0x0400 /* Can do 10Mbps 2 Pairs */ +#define LRESR_10_1PAIR 0x0200 /* Can do 10Mbps 1 Pair */ +#define LRESR_ESTATEN 0x0100 /* Extended Status in R15 */ +#define LRESR_RESV 0x0080 /* Unused... */ +#define LRESR_MFPS 0x0040 /* Can suppress Management Frames Preamble */ +#define LRESR_LDSCOMPLETE 0x0020 /* LDS Auto-negotiation complete */ +#define LRESR_8023 0x0010 /* Has IEEE 802.3 Support */ +#define LRESR_LDSABILITY 0x0008 /* LDS auto-negotiation capable */ +#define LRESR_LSTATUS 0x0004 /* Link status */ +#define LRESR_JCD 0x0002 /* Jabber detected */ +#define LRESR_ERCAP 0x0001 /* Ext-reg capability */ + +/* LDS Auto-Negotiation Advertised Ability. */ +#define LREANAA_PAUSE_ASYM 0x8000 /* Can pause asymmetrically */ +#define LREANAA_PAUSE 0x4000 /* Can pause */ +#define LREANAA_100_1PAIR 0x0020 /* Can do 100Mbps 1 Pair */ +#define LREANAA_100_4PAIR 0x0010 /* Can do 100Mbps 4 Pair */ +#define LREANAA_100_2PAIR 0x0008 /* Can do 100Mbps 2 Pair */ +#define LREANAA_10_2PAIR 0x0004 /* Can do 10Mbps 2 Pair */ +#define LREANAA_10_1PAIR 0x0002 /* Can do 10Mbps 1 Pair */ + +#define LRE_ADVERTISE_FULL (LREANAA_100_1PAIR | LREANAA_100_4PAIR | \ + LREANAA_100_2PAIR | LREANAA_10_2PAIR | \ + LREANAA_10_1PAIR) + +#define LRE_ADVERTISE_ALL LRE_ADVERTISE_FULL + +/* LDS Link Partner Ability. */ +#define LRELPA_PAUSE_ASYM 0x8000 /* Supports asymmetric pause */ +#define LRELPA_PAUSE 0x4000 /* Supports pause capability */ +#define LRELPA_100_1PAIR 0x0020 /* 100Mbps 1 Pair capable */ +#define LRELPA_100_4PAIR 0x0010 /* 100Mbps 4 Pair capable */ +#define LRELPA_100_2PAIR 0x0008 /* 100Mbps 2 Pair capable */ +#define LRELPA_10_2PAIR 0x0004 /* 10Mbps 2 Pair capable */ +#define LRELPA_10_1PAIR 0x0002 /* 10Mbps 1 Pair capable */ + +/* LDS Expansion register. */ +#define LDSE_DOWNGRADE 0x8000 /* Can do LDS Speed Downgrade */ +#define LDSE_MASTER 0x4000 /* Master / Slave */ +#define LDSE_PAIRS_MASK 0x3000 /* Pair Count Mask */ +#define LDSE_PAIRS_SHIFT 12 +#define LDSE_4PAIRS (2 << LDSE_PAIRS_SHIFT) /* 4 Pairs Connection */ +#define LDSE_2PAIRS (1 << LDSE_PAIRS_SHIFT) /* 2 Pairs Connection */ +#define LDSE_1PAIR (0 << LDSE_PAIRS_SHIFT) /* 1 Pair Connection */ +#define LDSE_CABLEN_MASK 0x0FFF /* Cable Length Mask */ + /* BCM54810 Registers */ #define BCM54810_EXP_BROADREACH_LRE_MISC_CTL (MII_BCM54XX_EXP_SEL_ER + 0x90) #define BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN (1 << 0) #define BCM54810_SHD_CLK_CTL 0x3 #define BCM54810_SHD_CLK_CTL_GTXCLK_EN (1 << 9) +/* BCM54811 Registers */ +#define BCM54811_EXP_BROADREACH_LRE_OVERLAY_CTL (MII_BCM54XX_EXP_SEL_ER + 0x9A) +/* Access Control Override Enable */ +#define BCM54811_EXP_BROADREACH_LRE_OVERLAY_CTL_EN BIT(15) +/* Access Control Override Value */ +#define BCM54811_EXP_BROADREACH_LRE_OVERLAY_CTL_OVERRIDE_VAL BIT(14) +/* Access Control Value */ +#define BCM54811_EXP_BROADREACH_LRE_OVERLAY_CTL_VAL BIT(13) + /* BCM54612E Registers */ #define BCM54612E_EXP_SPARE0 (MII_BCM54XX_EXP_SEL_ETC + 0x34) #define BCM54612E_LED4_CLK125OUT_EN (1 << 1) -- cgit v1.2.3 From a52c6330ff2fe1163333fa6609bdc6e8763ec286 Mon Sep 17 00:00:00 2001 From: "Alex Shi (Tencent)" Date: Fri, 12 Jul 2024 12:14:35 +0800 Subject: mm/memcg: alignment memcg_data define condition commit 21c690a349ba ("mm: introduce slabobj_ext to support slab object extensions") changed the folio/page->memcg_data define condition from MEMCG to SLAB_OBJ_EXT. This action make memcg_data exposed while !MEMCG. As Vlastimil Babka suggested, let's add _unused_slab_obj_exts for SLAB_MATCH for slab.obj_exts while !MEMCG. That could resolve the match issue, clean up the feature logical. Signed-off-by: Alex Shi (Tencent) Cc: Randy Dunlap Cc: Yoann Congal Cc: Masahiro Yamada Cc: Petr Mladek Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Vlastimil Babka --- include/linux/mm_types.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index af3a0256fa93..a199c48bc462 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -169,8 +169,10 @@ struct page { /* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */ atomic_t _refcount; -#ifdef CONFIG_SLAB_OBJ_EXT +#ifdef CONFIG_MEMCG unsigned long memcg_data; +#elif defined(CONFIG_SLAB_OBJ_EXT) + unsigned long _unused_slab_obj_exts; #endif /* @@ -298,6 +300,7 @@ typedef struct { * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h. * @_hugetlb_hwpoison: Do not use directly, call raw_hwp_list_head(). * @_deferred_list: Folios to be split under memory pressure. + * @_unused_slab_obj_exts: Placeholder to match obj_exts in struct slab. * * A folio is a physically, virtually and logically contiguous set * of bytes. It is a power-of-two in size, and it is aligned to that @@ -332,8 +335,10 @@ struct folio { }; atomic_t _mapcount; atomic_t _refcount; -#ifdef CONFIG_SLAB_OBJ_EXT +#ifdef CONFIG_MEMCG unsigned long memcg_data; +#elif defined(CONFIG_SLAB_OBJ_EXT) + unsigned long _unused_slab_obj_exts; #endif #if defined(WANT_PAGE_VIRTUAL) void *virtual; -- cgit v1.2.3 From 6b04928e83f298d99359f3ba67187a328c223357 Mon Sep 17 00:00:00 2001 From: Julien Panis Date: Mon, 3 Jun 2024 12:50:48 +0200 Subject: dt-bindings: thermal: mediatek: Fix thermal zone definition for MT8186 Fix a thermal zone name for consistency with the other SoCs: MFG contains GPU, the latter is more specific and must be used here. The naming must be fixed "atomically" so compilation does not break. As a result, the change is made in the dt-bindings and in the LVTS driver within a single commit, despite the checkpatch warning. The definition can be safely modified here because it is used only in the LVTS driver, which is modified accordingly, and has not yet been included in a released kernel. Fixes: a2ca202350f9 ("dt-bindings: thermal: mediatek: Add LVTS thermal controller definition for MT8186") Reviewed-by: AngeloGioacchino Del Regno Acked-by: Conor Dooley Signed-off-by: Julien Panis Link: https://lore.kernel.org/r/20240603-mtk-thermal-mt818x-dtsi-v7-1-8c8e3c7a3643@baylibre.com Signed-off-by: Daniel Lezcano --- include/dt-bindings/thermal/mediatek,lvts-thermal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/dt-bindings/thermal/mediatek,lvts-thermal.h b/include/dt-bindings/thermal/mediatek,lvts-thermal.h index bf95309d2525..85d25b4d726d 100644 --- a/include/dt-bindings/thermal/mediatek,lvts-thermal.h +++ b/include/dt-bindings/thermal/mediatek,lvts-thermal.h @@ -24,7 +24,7 @@ #define MT8186_BIG_CPU1 5 #define MT8186_NNA 6 #define MT8186_ADSP 7 -#define MT8186_MFG 8 +#define MT8186_GPU 8 #define MT8188_MCU_LITTLE_CPU0 0 #define MT8188_MCU_LITTLE_CPU1 1 -- cgit v1.2.3 From be3e224ec502d49cf9017304286c14418b230fe6 Mon Sep 17 00:00:00 2001 From: Julien Panis Date: Mon, 3 Jun 2024 12:50:49 +0200 Subject: dt-bindings: thermal: mediatek: Fix thermal zone definitions for MT8188 Fix thermal zone names for consistency with the other SoCs: - GPU0 must be used as the first GPU item. - SOCx deal with audio DSP, video, and infra subsystems. The naming must be fixed "atomically" so compilation does not break. As a result, the change is made in the dt-bindings and in the LVTS driver within a single commit, despite the checkpatch warning. The definitions can be safely modified here because they are used only in the LVTS driver, which is modified accordingly, and have not yet been included in a released kernel. Fixes: 78c88534e5e1 ("dt-bindings: thermal: mediatek: Add LVTS thermal controller definition for MT8188") Reviewed-by: AngeloGioacchino Del Regno Acked-by: Conor Dooley Signed-off-by: Julien Panis Link: https://lore.kernel.org/r/20240603-mtk-thermal-mt818x-dtsi-v7-2-8c8e3c7a3643@baylibre.com Signed-off-by: Daniel Lezcano --- include/dt-bindings/thermal/mediatek,lvts-thermal.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/thermal/mediatek,lvts-thermal.h b/include/dt-bindings/thermal/mediatek,lvts-thermal.h index 85d25b4d726d..ddc7302a510a 100644 --- a/include/dt-bindings/thermal/mediatek,lvts-thermal.h +++ b/include/dt-bindings/thermal/mediatek,lvts-thermal.h @@ -34,11 +34,11 @@ #define MT8188_MCU_BIG_CPU1 5 #define MT8188_AP_APU 0 -#define MT8188_AP_GPU1 1 -#define MT8188_AP_GPU2 2 -#define MT8188_AP_SOC1 3 -#define MT8188_AP_SOC2 4 -#define MT8188_AP_SOC3 5 +#define MT8188_AP_GPU0 1 +#define MT8188_AP_GPU1 2 +#define MT8188_AP_ADSP 3 +#define MT8188_AP_VDO 4 +#define MT8188_AP_INFRA 5 #define MT8188_AP_CAM1 6 #define MT8188_AP_CAM2 7 -- cgit v1.2.3 From 136a8066676e593cd29627219467fc222c8f3b04 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 11 Jul 2024 21:11:03 -0300 Subject: iommufd: Put constants for all the uAPI enums Relying on position in the enum makes it subtly harder when doing merge resolutions or backporting as it is easy to grab a patch and not notice it is a uAPI change with a differently ordered enum. This may become a bigger problem in next cycles when iommu_hwpt_invalidate_data_type and other per-driver enums have patches flowing through different trees. So lets start including constants for all the uAPI enums to make this safer. No functional change. Link: https://lore.kernel.org/r/0-v1-2c06ec044924+133-iommufd_uapi_const_jgg@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Tested-by: Yi Liu Signed-off-by: Jason Gunthorpe --- include/uapi/linux/iommufd.h | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index e31385b75d0b..4dde745cfb7e 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -37,20 +37,20 @@ enum { IOMMUFD_CMD_BASE = 0x80, IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE, - IOMMUFD_CMD_IOAS_ALLOC, - IOMMUFD_CMD_IOAS_ALLOW_IOVAS, - IOMMUFD_CMD_IOAS_COPY, - IOMMUFD_CMD_IOAS_IOVA_RANGES, - IOMMUFD_CMD_IOAS_MAP, - IOMMUFD_CMD_IOAS_UNMAP, - IOMMUFD_CMD_OPTION, - IOMMUFD_CMD_VFIO_IOAS, - IOMMUFD_CMD_HWPT_ALLOC, - IOMMUFD_CMD_GET_HW_INFO, - IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING, - IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP, - IOMMUFD_CMD_HWPT_INVALIDATE, - IOMMUFD_CMD_FAULT_QUEUE_ALLOC, + IOMMUFD_CMD_IOAS_ALLOC = 0x81, + IOMMUFD_CMD_IOAS_ALLOW_IOVAS = 0x82, + IOMMUFD_CMD_IOAS_COPY = 0x83, + IOMMUFD_CMD_IOAS_IOVA_RANGES = 0x84, + IOMMUFD_CMD_IOAS_MAP = 0x85, + IOMMUFD_CMD_IOAS_UNMAP = 0x86, + IOMMUFD_CMD_OPTION = 0x87, + IOMMUFD_CMD_VFIO_IOAS = 0x88, + IOMMUFD_CMD_HWPT_ALLOC = 0x89, + IOMMUFD_CMD_GET_HW_INFO = 0x8a, + IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING = 0x8b, + IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c, + IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d, + IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e, }; /** @@ -400,8 +400,8 @@ struct iommu_hwpt_vtd_s1 { * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table */ enum iommu_hwpt_data_type { - IOMMU_HWPT_DATA_NONE, - IOMMU_HWPT_DATA_VTD_S1, + IOMMU_HWPT_DATA_NONE = 0, + IOMMU_HWPT_DATA_VTD_S1 = 1, }; /** @@ -491,8 +491,8 @@ struct iommu_hw_info_vtd { * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type */ enum iommu_hw_info_type { - IOMMU_HW_INFO_TYPE_NONE, - IOMMU_HW_INFO_TYPE_INTEL_VTD, + IOMMU_HW_INFO_TYPE_NONE = 0, + IOMMU_HW_INFO_TYPE_INTEL_VTD = 1, }; /** @@ -629,7 +629,7 @@ struct iommu_hwpt_get_dirty_bitmap { * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1 */ enum iommu_hwpt_invalidate_data_type { - IOMMU_HWPT_INVALIDATE_DATA_VTD_S1, + IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0, }; /** @@ -768,7 +768,7 @@ struct iommu_hwpt_pgfault { */ enum iommufd_page_response_code { IOMMUFD_PAGE_RESP_SUCCESS = 0, - IOMMUFD_PAGE_RESP_INVALID, + IOMMUFD_PAGE_RESP_INVALID = 1, }; /** -- cgit v1.2.3 From f0eb154c39471bf881422e8ac23e4c037289ece9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 5 Jul 2024 10:31:54 +0100 Subject: irqchip/gic-v4: Substitute vmovp_lock for a per-VM lock vmovp_lock is abused in a number of cases to serialise updates to vlpi_count[] and deal with map/unmap of a VM to ITSs. Instead, provide a per-VM lock and revisit the use of vlpi_count[] so that it is always wrapped in this per-VM vmapp_lock. This reduces the potential contention on a concurrent VMOVP command, and paves the way for subsequent VPE locking that holding vmovp_lock actively prevents due to the lock ordering. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Nianyao Tang Link: https://lore.kernel.org/r/20240705093155.871070-3-maz@kernel.org --- include/linux/irqchip/arm-gic-v4.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 2c63375bbd43..ecabed6d3307 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -25,6 +25,14 @@ struct its_vm { irq_hw_number_t db_lpi_base; unsigned long *db_bitmap; int nr_db_lpis; + /* + * Ensures mutual exclusion between updates to vlpi_count[] + * and map/unmap when using the ITSList mechanism. + * + * The lock order for any sequence involving the ITSList is + * vmapp_lock -> vpe_lock ->vmovp_lock. + */ + raw_spinlock_t vmapp_lock; u32 vlpi_count[GICv4_ITS_LIST_MAX]; }; -- cgit v1.2.3 From c37927a203fa283950f6045602b9f71328ad786c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 11 Jul 2024 12:20:04 +0200 Subject: genirq: Set IRQF_COND_ONESHOT in request_irq() The callers of request_irq() don't care about IRQF_ONESHOT because they don't provide threaded handlers, but if they happen to share the IRQ with the ACPI SCI, which has a threaded handler and sets IRQF_ONESHOT, request_irq() will fail for them due to a flags mismatch. Address this by making request_irq() add IRQF_COND_ONESHOT to the flags passed to request_threaded_irq() for all of its callers. Fixes: 7a36b901a6eb ("ACPI: OSL: Use a threaded interrupt handler for SCI") Reported-by: Stefan Seyfried Signed-off-by: Rafael J. Wysocki Signed-off-by: Thomas Gleixner Tested-by: Stefan Seyfried Cc: stable@vger.kerel.org Link: https://lore.kernel.org/r/5800834.DvuYhMxLoT@rjwysocki.net Closes: https://lore.kernel.org/lkml/205bd84a-fe8e-4963-968e-0763285f35ba@message-id.googlemail.com --- include/linux/interrupt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 5c9bdd3ffccc..dac7466de5f3 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -168,7 +168,7 @@ static inline int __must_check request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags, const char *name, void *dev) { - return request_threaded_irq(irq, handler, NULL, flags, name, dev); + return request_threaded_irq(irq, handler, NULL, flags | IRQF_COND_ONESHOT, name, dev); } extern int __must_check -- cgit v1.2.3 From b7b377332b96a38bc98928d7ec2674c77a95fcb3 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Fri, 12 Jul 2024 08:41:48 +0200 Subject: irqdomain: Fix the kernel-doc and plug it into Documentation There were several undocumented fields in structs irq_domain_ops and irq_domain_info. Document them. irq_domain_ops::revmap_size contained "[]" in the description, which is not allowed in sphinx. Remove that. Finally, plug the whole header (irqdomain.h) into genericirq.rst, so that the docs is autogenerated and hyperlinks to these structure are created. Signed-off-by: Jiri Slaby (SUSE) Signed-off-by: Thomas Gleixner Tested-by: Randy Dunlap Reviewed-by: Randy Dunlap Link: https://lore.kernel.org/r/20240712064148.157040-1-jirislaby@kernel.org --- include/linux/irqdomain.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 02cd486ac354..de6105f68fec 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -74,11 +74,24 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, * struct irq_domain_ops - Methods for irq_domain objects * @match: Match an interrupt controller device node to a host, returns * 1 on a match + * @select: Match an interrupt controller fw specification. It is more generic + * than @match as it receives a complete struct irq_fwspec. Therefore, + * @select is preferred if provided. Returns 1 on a match. * @map: Create or update a mapping between a virtual irq number and a hw * irq number. This is called only once for a given mapping. * @unmap: Dispose of such a mapping * @xlate: Given a device tree node and interrupt specifier, decode * the hardware irq number and linux irq type value. + * @alloc: Allocate @nr_irqs interrupts starting from @virq. + * @free: Free @nr_irqs interrupts starting from @virq. + * @activate: Activate one interrupt in HW (@irqd). If @reserve is set, only + * reserve the vector. If unset, assign the vector (called from + * request_irq()). + * @deactivate: Disarm one interrupt (@irqd). + * @translate: Given @fwspec, decode the hardware irq number (@out_hwirq) and + * linux irq type value (@out_type). This is a generalised @xlate + * (over struct irq_fwspec) and is preferred if provided. + * @debug_show: For domains to show specific data for an interrupt in debugfs. * * Functions below are provided by the driver and called whenever a new mapping * is created or an old mapping is disposed. The driver can then proceed to @@ -131,6 +144,9 @@ struct irq_domain_chip_generic; * Optional elements: * @fwnode: Pointer to firmware node associated with the irq_domain. Pretty easy * to swap it for the of_node via the irq_domain_get_of_node accessor + * @bus_token: @fwnode's device_node might be used for several irq domains. But + * in connection with @bus_token, the pair shall be unique in a + * system. * @gc: Pointer to a list of generic chips. There is a helper function for * setting up one or more generic chips for interrupt controllers * drivers using the generic chip library which uses this pointer. @@ -144,7 +160,9 @@ struct irq_domain_chip_generic; * @exit: Function called when the domain is destroyed * * Revmap data, used internally by the irq domain code: - * @revmap_size: Size of the linear map table @revmap[] + * @hwirq_max: Top limit for the HW irq number. Especially to avoid + * conflicts/failures with reserved HW irqs. Can be ~0. + * @revmap_size: Size of the linear map table @revmap * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map * @revmap: Linear table of irq_data pointers */ -- cgit v1.2.3 From 6c87e1a4792804efce8ab3dfdb6e9ada314ec6dd Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Sat, 13 Jul 2024 23:16:13 +0200 Subject: net: dsa: vsc73xx: introduce tag 8021q for vsc73xx This commit introduces a new tagger based on 802.1q tagging. It's designed for the vsc73xx driver. The VSC73xx family doesn't have any tag support for the RGMII port, but it could be based on VLANs. Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Signed-off-by: Pawel Dembicki Reviewed-by: Linus Walleij Link: https://patch.msgid.link/20240713211620.1125910-8-paweldembicki@gmail.com Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index f9ae3ca66b6f..5a5a03a7b4c3 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -53,6 +53,7 @@ struct tc_action; #define DSA_TAG_PROTO_RTL8_4T_VALUE 25 #define DSA_TAG_PROTO_RZN1_A5PSW_VALUE 26 #define DSA_TAG_PROTO_LAN937X_VALUE 27 +#define DSA_TAG_PROTO_VSC73XX_8021Q_VALUE 28 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, @@ -83,6 +84,7 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_RTL8_4T = DSA_TAG_PROTO_RTL8_4T_VALUE, DSA_TAG_PROTO_RZN1_A5PSW = DSA_TAG_PROTO_RZN1_A5PSW_VALUE, DSA_TAG_PROTO_LAN937X = DSA_TAG_PROTO_LAN937X_VALUE, + DSA_TAG_PROTO_VSC73XX_8021Q = DSA_TAG_PROTO_VSC73XX_8021Q_VALUE, }; struct dsa_switch; -- cgit v1.2.3 From ce20fdd670ac375a4e3dff91c2888ad9ff9eef56 Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Sat, 13 Jul 2024 23:16:15 +0200 Subject: net: dsa: Define max num of bridges in tag8021q implementation Max number of bridges in tag8021q implementation is strictly limited by VBID size: 3 bits. But zero is reserved and only 7 values can be used. This patch adds define which describe maximum possible value. Suggested-by: Vladimir Oltean Signed-off-by: Pawel Dembicki Reviewed-by: Florian Fainelli Reviewed-by: Linus Walleij Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20240713211620.1125910-10-paweldembicki@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/dsa/8021q.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index f3664ee12170..1dda2a13b832 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -8,6 +8,11 @@ #include #include +/* VBID is limited to three bits only and zero is reserved. + * Only 7 bridges can be enumerated. + */ +#define DSA_TAG_8021Q_MAX_NUM_BRIDGES 7 + int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto); void dsa_tag_8021q_unregister(struct dsa_switch *ds); -- cgit v1.2.3 From 85aabd1fe9d6af4dc5d11a2d8be567ec45d1dc5e Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Sat, 13 Jul 2024 23:16:16 +0200 Subject: net: dsa: prepare 'dsa_tag_8021q_bridge_join' for standalone use The 'dsa_tag_8021q_bridge_join' could be used as a generic implementation of the 'ds->ops->port_bridge_join()' function. However, it is necessary to synchronize their arguments. This patch also moves the 'tx_fwd_offload' flag configuration line into 'dsa_tag_8021q_bridge_join' body. Currently, every (sja1105) driver sets it, and the future vsc73xx implementation will also need it for simplification. Suggested-by: Vladimir Oltean Signed-off-by: Pawel Dembicki Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20240713211620.1125910-11-paweldembicki@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/dsa/8021q.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 1dda2a13b832..d13aabdeb4b2 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -18,7 +18,8 @@ int dsa_tag_8021q_register(struct dsa_switch *ds, __be16 proto); void dsa_tag_8021q_unregister(struct dsa_switch *ds); int dsa_tag_8021q_bridge_join(struct dsa_switch *ds, int port, - struct dsa_bridge bridge); + struct dsa_bridge bridge, bool *tx_fwd_offload, + struct netlink_ext_ack *extack); void dsa_tag_8021q_bridge_leave(struct dsa_switch *ds, int port, struct dsa_bridge bridge); -- cgit v1.2.3 From 3ba74b2f288bbc17c0c2a58ab219e1df19f80153 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Fri, 21 Jun 2024 16:01:55 +0300 Subject: Bluetooth: hci_core: cleanup struct hci_dev Remove unused and set but otherwise unused 'discovery_old_state' and 'sco_last_tx' members of 'struct hci_dev'. The first one is a leftover after commit 182ee45da083 ("Bluetooth: hci_sync: Rework hci_suspend_notifier"); the second one is originated from ancient 2.4.19 and I was unable to find any actual use since that. Signed-off-by: Dmitry Antipov Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index eaeaf3dc07aa..31020891fc68 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -476,7 +476,6 @@ struct hci_dev { unsigned int iso_pkts; unsigned long acl_last_tx; - unsigned long sco_last_tx; unsigned long le_last_tx; __u8 le_tx_def_phys; @@ -528,7 +527,6 @@ struct hci_dev { struct discovery_state discovery; - int discovery_old_state; bool discovery_paused; int advertising_old_state; bool advertising_paused; -- cgit v1.2.3 From 92048ab2e2e6cc90ad1cc9f55deb5cec4d731793 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 1 Jul 2024 16:00:08 -0400 Subject: Bluetooth: hci_core: Remove usage of hci_req_sync hci_request functions are considered deprecated so this replaces the usage of hci_req_sync with hci_inquiry_sync. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 20168732f20e..620e6014beb2 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -131,6 +131,8 @@ int hci_update_discoverable(struct hci_dev *hdev); int hci_update_connectable_sync(struct hci_dev *hdev); +int hci_inquiry_sync(struct hci_dev *hdev, u8 length, u8 num_rsp); + int hci_start_discovery_sync(struct hci_dev *hdev); int hci_stop_discovery_sync(struct hci_dev *hdev); -- cgit v1.2.3 From 176cbeceb5c5a740216a6be3e751e76aaddf94b9 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 1 Jul 2024 16:13:56 -0400 Subject: Bluetooth: hci_core: Don't use hci_prepare_cmd This replaces the instance of hci_prepare_cmd with hci_cmd_sync_alloc since the former is part of hci_request.c which is considered deprecated. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 620e6014beb2..a8d88247ac89 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -20,6 +20,10 @@ struct hci_cmd_sync_work_entry { }; struct adv_info; + +struct sk_buff *hci_cmd_sync_alloc(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param, struct sock *sk); + /* Function with sync suffix shall not be called with hdev->lock held as they * wait the command to complete and in the meantime an event could be received * which could attempt to acquire hdev->lock causing a deadlock. -- cgit v1.2.3 From f2d89775358606c7ab6b6b6c4a02fe1e8cd270b1 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 1 Jul 2024 16:52:57 -0400 Subject: Bluetooth: hci_sync: Remove remaining dependencies of hci_request This removes the dependencies of hci_req_init and hci_request_cancel_all from hci_sync.c. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_sync.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index a8d88247ac89..75e052909b5f 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -8,6 +8,23 @@ #define UINT_PTR(_handle) ((void *)((uintptr_t)_handle)) #define PTR_UINT(_ptr) ((uintptr_t)((void *)_ptr)) +#define HCI_REQ_DONE 0 +#define HCI_REQ_PEND 1 +#define HCI_REQ_CANCELED 2 + +#define hci_req_sync_lock(hdev) mutex_lock(&hdev->req_lock) +#define hci_req_sync_unlock(hdev) mutex_unlock(&hdev->req_lock) + +struct hci_request { + struct hci_dev *hdev; + struct sk_buff_head cmd_q; + + /* If something goes wrong when building the HCI request, the error + * value is stored in this field. + */ + int err; +}; + typedef int (*hci_cmd_sync_work_func_t)(struct hci_dev *hdev, void *data); typedef void (*hci_cmd_sync_work_destroy_t)(struct hci_dev *hdev, void *data, int err); -- cgit v1.2.3 From 936daee9cf08c5e58c9a0fe687f52adb2d80e87d Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 1 Jul 2024 17:10:41 -0400 Subject: Bluetooth: Remove hci_request.{c,h} This removes hci_request.{c,h} since it shall no longer be used. Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/bluetooth.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index b3228bd6cd6b..5d655e109b2c 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -441,6 +441,10 @@ typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status, u16 opcode); typedef void (*hci_req_complete_skb_t)(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb); +void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, + hci_req_complete_t *req_complete, + hci_req_complete_skb_t *req_complete_skb); + #define HCI_REQ_START BIT(0) #define HCI_REQ_SKB BIT(1) -- cgit v1.2.3 From e50bfd6bb231a6e2b7221ad78dce294330238c76 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Tue, 9 Jul 2024 15:53:33 +0200 Subject: net_tstamp: Add TIMESTAMPING SOFTWARE and HARDWARE mask Timestamping software or hardware flags are often used as a group, therefore adding these masks will ease future use. I did not use SOF_TIMESTAMPING_SYS_HARDWARE flag as it is deprecated and not used at all. Reviewed-by: Willem de Bruijn Reviewed-by: Florian Fainelli Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20240709-feature_ptp_netnext-v17-1-b5317f50df2a@bootlin.com Signed-off-by: Jakub Kicinski --- include/linux/net_tstamp.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h index eb01c37e71e0..3799c79b6c83 100644 --- a/include/linux/net_tstamp.h +++ b/include/linux/net_tstamp.h @@ -5,6 +5,14 @@ #include +#define SOF_TIMESTAMPING_SOFTWARE_MASK (SOF_TIMESTAMPING_RX_SOFTWARE | \ + SOF_TIMESTAMPING_TX_SOFTWARE | \ + SOF_TIMESTAMPING_SOFTWARE) + +#define SOF_TIMESTAMPING_HARDWARE_MASK (SOF_TIMESTAMPING_RX_HARDWARE | \ + SOF_TIMESTAMPING_TX_HARDWARE | \ + SOF_TIMESTAMPING_RAW_HARDWARE) + enum hwtstamp_source { HWTSTAMP_SOURCE_NETDEV, HWTSTAMP_SOURCE_PHYLIB, -- cgit v1.2.3 From 2dd35600590148d843367c04975acad3c1a527c3 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Tue, 9 Jul 2024 15:53:36 +0200 Subject: net: Change the API of PHY default timestamp to MAC Change the API to select MAC default time stamping instead of the PHY. Indeed the PHY is closer to the wire therefore theoretically it has less delay than the MAC timestamping but the reality is different. Due to lower time stamping clock frequency, latency in the MDIO bus and no PHC hardware synchronization between different PHY, the PHY PTP is often less precise than the MAC. The exception is for PHY designed specially for PTP case but these devices are not very widespread. For not breaking the compatibility default_timestamp flag has been introduced in phy_device that is set by the phy driver to know we are using the old API behavior. Reviewed-by: Rahul Rameshbabu Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20240709-feature_ptp_netnext-v17-4-b5317f50df2a@bootlin.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index bd68f9d8e74f..e7a38137211c 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -616,6 +616,8 @@ struct macsec_ops; * handling shall be postponed until PHY has resumed * @irq_rerun: Flag indicating interrupts occurred while PHY was suspended, * requiring a rerun of the interrupt handler after resume + * @default_timestamp: Flag indicating whether we are using the phy + * timestamp as the default one * @interface: enum phy_interface_t value * @possible_interfaces: bitmap if interface modes that the attached PHY * will switch between depending on media speed. @@ -681,6 +683,8 @@ struct phy_device { unsigned irq_suspended:1; unsigned irq_rerun:1; + unsigned default_timestamp:1; + int rate_matching; enum phy_state state; @@ -1625,6 +1629,21 @@ static inline void phy_txtstamp(struct phy_device *phydev, struct sk_buff *skb, phydev->mii_ts->txtstamp(phydev->mii_ts, skb, type); } +/** + * phy_is_default_hwtstamp - Is the PHY hwtstamp the default timestamp + * @phydev: Pointer to phy_device + * + * This is used to get default timestamping device taking into account + * the new API choice, which is selecting the timestamping from MAC by + * default if the phydev does not have default_timestamp flag enabled. + * + * Return: True if phy is the default hw timestamp, false otherwise. + */ +static inline bool phy_is_default_hwtstamp(struct phy_device *phydev) +{ + return phy_has_hwtstamp(phydev) && phydev->default_timestamp; +} + /** * phy_is_internal - Convenience function for testing if a PHY is internal * @phydev: the phy_device struct -- cgit v1.2.3 From bc5a07ed15a3d30df3e8af2ef6bb0472f1d337f9 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Tue, 9 Jul 2024 15:53:37 +0200 Subject: net: net_tstamp: Add unspec field to hwtstamp_source enumeration Prepare for future support of saving hwtstamp source in PTP xarray by introducing HWTSTAMP_SOURCE_UNSPEC to hwtstamp_source enum, setting it to 0 to match old behavior of no source defined. Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20240709-feature_ptp_netnext-v17-5-b5317f50df2a@bootlin.com Signed-off-by: Jakub Kicinski --- include/linux/net_tstamp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h index 3799c79b6c83..662074b08c94 100644 --- a/include/linux/net_tstamp.h +++ b/include/linux/net_tstamp.h @@ -14,6 +14,7 @@ SOF_TIMESTAMPING_RAW_HARDWARE) enum hwtstamp_source { + HWTSTAMP_SOURCE_UNSPEC, HWTSTAMP_SOURCE_NETDEV, HWTSTAMP_SOURCE_PHYLIB, }; -- cgit v1.2.3 From 2111375b85ad173d58e7b8604246a3de60950ac8 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Tue, 9 Jul 2024 15:53:38 +0200 Subject: net: Add struct kernel_ethtool_ts_info In prevision to add new UAPI for hwtstamp we will be limited to the struct ethtool_ts_info that is currently passed in fixed binary format through the ETHTOOL_GET_TS_INFO ethtool ioctl. It would be good if new kernel code already started operating on an extensible kernel variant of that structure, similar in concept to struct kernel_hwtstamp_config vs struct hwtstamp_config. Since struct ethtool_ts_info is in include/uapi/linux/ethtool.h, here we introduce the kernel-only structure in include/linux/ethtool.h. The manual copy is then made in the function called by ETHTOOL_GET_TS_INFO. Acked-by: Shannon Nelson Acked-by: Alexandra Winter Signed-off-by: Kory Maincent Link: https://patch.msgid.link/20240709-feature_ptp_netnext-v17-6-b5317f50df2a@bootlin.com Signed-off-by: Jakub Kicinski --- include/linux/can/dev.h | 2 +- include/linux/ethtool.h | 25 ++++++++++++++++++++++--- include/linux/mii_timestamper.h | 2 +- include/linux/phy.h | 2 +- include/net/dsa.h | 2 +- include/soc/mscc/ocelot.h | 2 +- 6 files changed, 27 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 1b92aed49363..23492213ea35 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -186,7 +186,7 @@ void close_candev(struct net_device *dev); int can_change_mtu(struct net_device *dev, int new_mtu); int can_eth_ioctl_hwts(struct net_device *netdev, struct ifreq *ifr, int cmd); int can_ethtool_op_get_ts_info_hwts(struct net_device *dev, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); int register_candev(struct net_device *dev); void unregister_candev(struct net_device *dev); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index e213b5508da6..6b38bbda5790 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -18,6 +18,7 @@ #include #include #include +#include struct compat_ethtool_rx_flow_spec { u32 flow_type; @@ -713,6 +714,22 @@ struct ethtool_rxfh_param { u8 input_xfrm; }; +/** + * struct kernel_ethtool_ts_info - kernel copy of struct ethtool_ts_info + * @cmd: command number = %ETHTOOL_GET_TS_INFO + * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags + * @phc_index: device index of the associated PHC, or -1 if there is none + * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values + * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values + */ +struct kernel_ethtool_ts_info { + u32 cmd; + u32 so_timestamping; + int phc_index; + enum hwtstamp_tx_types tx_types; + enum hwtstamp_rx_filters rx_filters; +}; + /** * struct ethtool_ops - optional netdev operations * @cap_link_lanes_supported: indicates if the driver supports lanes @@ -1020,7 +1037,7 @@ struct ethtool_ops { int (*get_dump_data)(struct net_device *, struct ethtool_dump *, void *); int (*set_dump)(struct net_device *, struct ethtool_dump *); - int (*get_ts_info)(struct net_device *, struct ethtool_ts_info *); + int (*get_ts_info)(struct net_device *, struct kernel_ethtool_ts_info *); void (*get_ts_stats)(struct net_device *dev, struct ethtool_ts_stats *ts_stats); int (*get_module_info)(struct net_device *, @@ -1181,7 +1198,8 @@ int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index); /* Some generic methods drivers may use in their ethtool_ops */ u32 ethtool_op_get_link(struct net_device *dev); -int ethtool_op_get_ts_info(struct net_device *dev, struct ethtool_ts_info *eti); +int ethtool_op_get_ts_info(struct net_device *dev, + struct kernel_ethtool_ts_info *eti); /** * ethtool_mm_frag_size_add_to_min - Translate (standard) additional fragment @@ -1230,7 +1248,8 @@ static inline int ethtool_mm_frag_size_min_to_add(u32 val_min, u32 *val_add, * @info: buffer to hold the result * Returns zero on success, non-zero otherwise. */ -int ethtool_get_ts_info_by_layer(struct net_device *dev, struct ethtool_ts_info *info); +int ethtool_get_ts_info_by_layer(struct net_device *dev, + struct kernel_ethtool_ts_info *info); /** * ethtool_sprintf - Write formatted string to ethtool string data diff --git a/include/linux/mii_timestamper.h b/include/linux/mii_timestamper.h index 26b04f73f214..995db62570f9 100644 --- a/include/linux/mii_timestamper.h +++ b/include/linux/mii_timestamper.h @@ -59,7 +59,7 @@ struct mii_timestamper { struct phy_device *phydev); int (*ts_info)(struct mii_timestamper *mii_ts, - struct ethtool_ts_info *ts_info); + struct kernel_ethtool_ts_info *ts_info); struct device *device; }; diff --git a/include/linux/phy.h b/include/linux/phy.h index e7a38137211c..04ae5c811cfb 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1618,7 +1618,7 @@ static inline bool phy_rxtstamp(struct phy_device *phydev, struct sk_buff *skb, } static inline int phy_ts_info(struct phy_device *phydev, - struct ethtool_ts_info *tsinfo) + struct kernel_ethtool_ts_info *tsinfo) { return phydev->mii_ts->ts_info(phydev->mii_ts, tsinfo); } diff --git a/include/net/dsa.h b/include/net/dsa.h index f9ae3ca66b6f..96efdd9f90c9 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -934,7 +934,7 @@ struct dsa_switch_ops { * ethtool timestamp info */ int (*get_ts_info)(struct dsa_switch *ds, int port, - struct ethtool_ts_info *ts); + struct kernel_ethtool_ts_info *ts); /* * ethtool MAC merge layer diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 1e1b40f4e664..6a37b29f4b4c 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -1016,7 +1016,7 @@ void ocelot_port_get_eth_mac_stats(struct ocelot *ocelot, int port, void ocelot_port_get_eth_phy_stats(struct ocelot *ocelot, int port, struct ethtool_eth_phy_stats *phy_stats); int ocelot_get_ts_info(struct ocelot *ocelot, int port, - struct ethtool_ts_info *info); + struct kernel_ethtool_ts_info *info); void ocelot_set_ageing_time(struct ocelot *ocelot, unsigned int msecs); int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, bool enabled, struct netlink_ext_ack *extack); -- cgit v1.2.3 From f96eb1172ed8d74cfed7da92d2045ec64028cc38 Mon Sep 17 00:00:00 2001 From: Christian Eggers Date: Mon, 15 Jul 2024 14:30:50 +0200 Subject: dsa: lan9303: consistent naming for PHY address parameter Name it 'addr' instead of 'port' or 'phy'. Signed-off-by: Christian Eggers Link: https://patch.msgid.link/20240715123050.21202-1-ceggers@arri.de Signed-off-by: Jakub Kicinski --- include/linux/dsa/lan9303.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dsa/lan9303.h b/include/linux/dsa/lan9303.h index b4f22112ba75..3ce7cbcc37a3 100644 --- a/include/linux/dsa/lan9303.h +++ b/include/linux/dsa/lan9303.h @@ -5,8 +5,8 @@ struct lan9303; struct lan9303_phy_ops { /* PHY 1 and 2 access*/ - int (*phy_read)(struct lan9303 *chip, int port, int regnum); - int (*phy_write)(struct lan9303 *chip, int port, + int (*phy_read)(struct lan9303 *chip, int addr, int regnum); + int (*phy_write)(struct lan9303 *chip, int addr, int regnum, u16 val); }; -- cgit v1.2.3 From 70de41ef78573ce958ac04ecc2b5671851723c59 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 14 Jul 2024 18:05:56 +0200 Subject: llc: Constify struct llc_conn_state_trans 'struct llc_conn_state_trans' are not modified in this driver. Constifying this structure moves some data to a read-only section, so increase overall security. On a x86_64, with allmodconfig, as an example: Before: ====== text data bss dec hex filename 13923 10896 32 24851 6113 net/llc/llc_c_st.o After: ===== text data bss dec hex filename 21859 3328 0 25187 6263 net/llc/llc_c_st.o Signed-off-by: Christophe JAILLET Reviewed-by: Simon Horman Link: https://patch.msgid.link/87cda89e4c9414e71d1a54bb1eb491b0e7f70375.1720973029.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- include/net/llc_c_st.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/llc_c_st.h b/include/net/llc_c_st.h index 53823d61d8b6..a4bea0f33188 100644 --- a/include/net/llc_c_st.h +++ b/include/net/llc_c_st.h @@ -44,8 +44,8 @@ struct llc_conn_state_trans { }; struct llc_conn_state { - u8 current_state; - struct llc_conn_state_trans **transitions; + u8 current_state; + const struct llc_conn_state_trans **transitions; }; extern struct llc_conn_state llc_conn_state_table[]; -- cgit v1.2.3 From 0970bf676f86c2c4d9bf7e672f5504d390c9fce6 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 14 Jul 2024 18:15:20 +0200 Subject: llc: Constify struct llc_sap_state_trans 'struct llc_sap_state_trans' are not modified in this driver. Constifying this structure moves some data to a read-only section, so increase overall security. On a x86_64, with allmodconfig, as an example: Before: ====== text data bss dec hex filename 339 456 24 819 333 net/llc/llc_s_st.o After: ===== text data bss dec hex filename 683 144 0 827 33b net/llc/llc_s_st.o Signed-off-by: Christophe JAILLET Reviewed-by: Simon Horman Link: https://patch.msgid.link/9d17587639195ee94b74ff06a11ef97d1833ee52.1720973710.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- include/net/llc_s_st.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/llc_s_st.h b/include/net/llc_s_st.h index ed5b2fa40d32..fca49d483d20 100644 --- a/include/net/llc_s_st.h +++ b/include/net/llc_s_st.h @@ -29,8 +29,8 @@ struct llc_sap_state_trans { }; struct llc_sap_state { - u8 curr_state; - struct llc_sap_state_trans **transitions; + u8 curr_state; + const struct llc_sap_state_trans **transitions; }; /* only access to SAP state table */ -- cgit v1.2.3 From c442db3f49f27e5a60a641b2ac9a3c6320796ed6 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 10 Jun 2024 20:25:17 +0900 Subject: kbuild: remove PROVIDE() for kallsyms symbols This reimplements commit 951bcae6c5a0 ("kallsyms: Avoid weak references for kallsyms symbols") because I am not a big fan of PROVIDE(). As an alternative solution, this commit prepends one more kallsyms step. KSYMS .tmp_vmlinux.kallsyms0.S # added AS .tmp_vmlinux.kallsyms0.o # added LD .tmp_vmlinux.btf BTF .btf.vmlinux.bin.o LD .tmp_vmlinux.kallsyms1 NM .tmp_vmlinux.kallsyms1.syms KSYMS .tmp_vmlinux.kallsyms1.S AS .tmp_vmlinux.kallsyms1.o LD .tmp_vmlinux.kallsyms2 NM .tmp_vmlinux.kallsyms2.syms KSYMS .tmp_vmlinux.kallsyms2.S AS .tmp_vmlinux.kallsyms2.o LD vmlinux Step 0 takes /dev/null as input, and generates .tmp_vmlinux.kallsyms0.o, which has a valid kallsyms format with the empty symbol list, and can be linked to vmlinux. Since it is really small, the added compile-time cost is negligible. Signed-off-by: Masahiro Yamada Acked-by: Ard Biesheuvel Reviewed-by: Nicolas Schier --- include/asm-generic/vmlinux.lds.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 5703526d6ebf..62b4cb0462e6 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -451,30 +451,11 @@ #endif #endif -/* - * Some symbol definitions will not exist yet during the first pass of the - * link, but are guaranteed to exist in the final link. Provide preliminary - * definitions that will be superseded in the final link to avoid having to - * rely on weak external linkage, which requires a GOT when used in position - * independent code. - */ -#define PRELIMINARY_SYMBOL_DEFINITIONS \ - PROVIDE(kallsyms_addresses = .); \ - PROVIDE(kallsyms_offsets = .); \ - PROVIDE(kallsyms_names = .); \ - PROVIDE(kallsyms_num_syms = .); \ - PROVIDE(kallsyms_relative_base = .); \ - PROVIDE(kallsyms_token_table = .); \ - PROVIDE(kallsyms_token_index = .); \ - PROVIDE(kallsyms_markers = .); \ - PROVIDE(kallsyms_seqs_of_names = .); - /* * Read only Data */ #define RO_DATA(align) \ . = ALIGN((align)); \ - PRELIMINARY_SYMBOL_DEFINITIONS \ .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ __start_rodata = .; \ *(.rodata) *(.rodata.*) \ -- cgit v1.2.3 From 6e5c85c003e47fd49b72ca052b9181644e04a520 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Sat, 13 Jul 2024 02:18:58 +0000 Subject: net/sched: flower: refactor control flag definitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Redefine the flower control flags as an enum, so they are included in BTF info. Make the kernel-side enum a more explicit superset of TCA_FLOWER_KEY_FLAGS_*, new flags still need to be added to both enums, but at least the bit position only has to be defined once. FLOW_DIS_ENCAPSULATION is never set for mask, so it can't be exposed to userspace in an unsupported flags mask error message, so it will be placed one bit position above the last uAPI flag. Suggested-by: Alexander Lobakin Suggested-by: Jakub Kicinski Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Davide Caratti Link: https://patch.msgid.link/20240713021911.1631517-2-ast@fiberby.net Signed-off-by: Jakub Kicinski --- include/net/flow_dissector.h | 14 +++++++++++--- include/uapi/linux/pkt_cls.h | 3 +++ 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 3e47e123934d..c3fce070b912 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -7,6 +7,7 @@ #include #include #include +#include struct bpf_prog; struct net; @@ -24,9 +25,16 @@ struct flow_dissector_key_control { u32 flags; }; -#define FLOW_DIS_IS_FRAGMENT BIT(0) -#define FLOW_DIS_FIRST_FRAG BIT(1) -#define FLOW_DIS_ENCAPSULATION BIT(2) +/* The control flags are kept in sync with TCA_FLOWER_KEY_FLAGS_*, as those + * flags are exposed to userspace in some error paths, ie. unsupported flags. + */ +enum flow_dissector_ctrl_flags { + FLOW_DIS_IS_FRAGMENT = TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, + FLOW_DIS_FIRST_FRAG = TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST, + + /* These flags are internal to the kernel */ + FLOW_DIS_ENCAPSULATION = (TCA_FLOWER_KEY_FLAGS_MAX << 1), +}; enum flow_dissect_ret { FLOW_DISSECT_RET_OUT_GOOD, diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index b6d38f5fd7c0..12db276f0c11 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -677,8 +677,11 @@ enum { enum { TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0), TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1), + __TCA_FLOWER_KEY_FLAGS_MAX, }; +#define TCA_FLOWER_KEY_FLAGS_MAX (__TCA_FLOWER_KEY_FLAGS_MAX - 1) + enum { TCA_FLOWER_KEY_CFM_OPT_UNSPEC, TCA_FLOWER_KEY_CFM_MD_LEVEL, -- cgit v1.2.3 From bfda5a63137bc83c344c4d995f404c8e701ff0fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Sat, 13 Jul 2024 02:19:00 +0000 Subject: net/sched: flower: define new tunnel flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define new TCA_FLOWER_KEY_FLAGS_* flags for use in struct flow_dissector_key_control, covering the same flags as currently exposed through TCA_FLOWER_KEY_ENC_FLAGS. Put the new flags under FLOW_DIS_F_*. The idea is that we can later, move the existing flags under FLOW_DIS_F_* as well. The ynl flag names have been taken from the RFC iproute2 patch. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20240713021911.1631517-4-ast@fiberby.net Signed-off-by: Jakub Kicinski --- include/net/flow_dissector.h | 7 ++++++- include/uapi/linux/pkt_cls.h | 4 ++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index c3fce070b912..460ea65b9e59 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -17,7 +17,8 @@ struct sk_buff; * struct flow_dissector_key_control: * @thoff: Transport header offset * @addr_type: Type of key. One of FLOW_DISSECTOR_KEY_* - * @flags: Key flags. Any of FLOW_DIS_(IS_FRAGMENT|FIRST_FRAGENCAPSULATION) + * @flags: Key flags. + * Any of FLOW_DIS_(IS_FRAGMENT|FIRST_FRAG|ENCAPSULATION|F_*) */ struct flow_dissector_key_control { u16 thoff; @@ -31,6 +32,10 @@ struct flow_dissector_key_control { enum flow_dissector_ctrl_flags { FLOW_DIS_IS_FRAGMENT = TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_FIRST_FRAG = TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST, + FLOW_DIS_F_TUNNEL_CSUM = TCA_FLOWER_KEY_FLAGS_TUNNEL_CSUM, + FLOW_DIS_F_TUNNEL_DONT_FRAGMENT = TCA_FLOWER_KEY_FLAGS_TUNNEL_DONT_FRAGMENT, + FLOW_DIS_F_TUNNEL_OAM = TCA_FLOWER_KEY_FLAGS_TUNNEL_OAM, + FLOW_DIS_F_TUNNEL_CRIT_OPT = TCA_FLOWER_KEY_FLAGS_TUNNEL_CRIT_OPT, /* These flags are internal to the kernel */ FLOW_DIS_ENCAPSULATION = (TCA_FLOWER_KEY_FLAGS_MAX << 1), diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 12db276f0c11..3dc4388e944c 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -677,6 +677,10 @@ enum { enum { TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0), TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1), + TCA_FLOWER_KEY_FLAGS_TUNNEL_CSUM = (1 << 2), + TCA_FLOWER_KEY_FLAGS_TUNNEL_DONT_FRAGMENT = (1 << 3), + TCA_FLOWER_KEY_FLAGS_TUNNEL_OAM = (1 << 4), + TCA_FLOWER_KEY_FLAGS_TUNNEL_CRIT_OPT = (1 << 5), __TCA_FLOWER_KEY_FLAGS_MAX, }; -- cgit v1.2.3 From 11036bd7a0b3b05c5e1f43d107ddb02abf83adb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Sat, 13 Jul 2024 02:19:06 +0000 Subject: net/sched: cls_flower: rework TCA_FLOWER_KEY_ENC_FLAGS usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch changes how TCA_FLOWER_KEY_ENC_FLAGS is used, so that it is used with TCA_FLOWER_KEY_FLAGS_* flags, in the same way as TCA_FLOWER_KEY_FLAGS is currently used. Where TCA_FLOWER_KEY_FLAGS uses {key,mask}->control.flags, then TCA_FLOWER_KEY_ENC_FLAGS now uses {key,mask}->enc_control.flags, therefore {key,mask}->enc_flags is now unused. As the generic fl_set_key_flags/fl_dump_key_flags() is used with encap set to true, then fl_{set,dump}_key_enc_flags() is removed. This breaks unreleased userspace API (net-next since 2024-06-04). Signed-off-by: Asbjørn Sloth Tønnesen Tested-by: Davide Caratti Reviewed-by: Davide Caratti Link: https://patch.msgid.link/20240713021911.1631517-10-ast@fiberby.net Signed-off-by: Jakub Kicinski --- include/uapi/linux/pkt_cls.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 3dc4388e944c..d36d9cdf0c00 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -554,8 +554,8 @@ enum { TCA_FLOWER_KEY_SPI, /* be32 */ TCA_FLOWER_KEY_SPI_MASK, /* be32 */ - TCA_FLOWER_KEY_ENC_FLAGS, /* u32 */ - TCA_FLOWER_KEY_ENC_FLAGS_MASK, /* u32 */ + TCA_FLOWER_KEY_ENC_FLAGS, /* be32 */ + TCA_FLOWER_KEY_ENC_FLAGS_MASK, /* be32 */ __TCA_FLOWER_MAX, }; -- cgit v1.2.3 From db5271d50ec155abf287a27fa84e2e33a81dbd55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Sat, 13 Jul 2024 02:19:08 +0000 Subject: flow_dissector: cleanup FLOW_DISSECTOR_KEY_ENC_FLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that TCA_FLOWER_KEY_ENC_FLAGS is unused, as it's former data is stored behind TCA_FLOWER_KEY_ENC_CONTROL, then remove the last bits of FLOW_DISSECTOR_KEY_ENC_FLAGS. FLOW_DISSECTOR_KEY_ENC_FLAGS is unreleased, and have been in net-next since 2024-06-04. Signed-off-by: Asbjørn Sloth Tønnesen Tested-by: Davide Caratti Reviewed-by: Davide Caratti Link: https://patch.msgid.link/20240713021911.1631517-12-ast@fiberby.net Signed-off-by: Jakub Kicinski --- include/net/flow_dissector.h | 9 --------- include/net/ip_tunnels.h | 12 ------------ 2 files changed, 21 deletions(-) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 460ea65b9e59..ced79dc8e856 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -342,14 +342,6 @@ struct flow_dissector_key_cfm { #define FLOW_DIS_CFM_MDL_MASK GENMASK(7, 5) #define FLOW_DIS_CFM_MDL_MAX 7 -/** - * struct flow_dissector_key_enc_flags: tunnel metadata control flags - * @flags: tunnel control flags - */ -struct flow_dissector_key_enc_flags { - u32 flags; -}; - enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ @@ -384,7 +376,6 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_L2TPV3, /* struct flow_dissector_key_l2tpv3 */ FLOW_DISSECTOR_KEY_CFM, /* struct flow_dissector_key_cfm */ FLOW_DISSECTOR_KEY_IPSEC, /* struct flow_dissector_key_ipsec */ - FLOW_DISSECTOR_KEY_ENC_FLAGS, /* struct flow_dissector_key_enc_flags */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 3877315cf8b8..1db2417b8ff5 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -247,18 +247,6 @@ static inline bool ip_tunnel_is_options_present(const unsigned long *flags) return ip_tunnel_flags_intersect(flags, present); } -static inline void ip_tunnel_set_encflags_present(unsigned long *flags) -{ - IP_TUNNEL_DECLARE_FLAGS(present) = { }; - - __set_bit(IP_TUNNEL_CSUM_BIT, present); - __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, present); - __set_bit(IP_TUNNEL_OAM_BIT, present); - __set_bit(IP_TUNNEL_CRIT_OPT_BIT, present); - - ip_tunnel_flags_or(flags, flags, present); -} - static inline bool ip_tunnel_flags_is_be16_compat(const unsigned long *flags) { IP_TUNNEL_DECLARE_FLAGS(supp) = { }; -- cgit v1.2.3 From a325742505f6f08141fd7a530a82a907780bfd2c Mon Sep 17 00:00:00 2001 From: Tio Zhang Date: Wed, 3 Jul 2024 11:33:53 +0800 Subject: tracing/sched: sched_switch: place prev_comm and next_comm in right order Switch the order of prev_comm and next_comm in sched_switch's code to align with its printing order. Cc: Signed-off-by: Tio Zhang Link: https://lore.kernel.org/20240703033353.GA2833@didi-ThinkCentre-M930t-N000 Reviewed-by: Madadi Vineeth Reddy Signed-off-by: Steven Rostedt (Google) --- include/trace/events/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 6df2b4685b08..9ea4c404bd4e 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -239,11 +239,11 @@ TRACE_EVENT(sched_switch, ), TP_fast_assign( - memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); + memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); __entry->prev_pid = prev->pid; __entry->prev_prio = prev->prio; __entry->prev_state = __trace_sched_switch_state(preempt, prev_state, prev); - memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); + memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); __entry->next_pid = next->pid; __entry->next_prio = next->prio; /* XXX SCHED_DEADLINE */ -- cgit v1.2.3 From c5eaf1b3f824369b6dcb33a39232b871d2ca8e33 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 12 Jul 2024 08:49:12 -0700 Subject: PCI: Add Meta Platforms vendor ID Add Meta as a vendor ID for PCI devices so we can use the macro for future drivers. Signed-off-by: Alexander Duyck Acked-by: Bjorn Helgaas Link: https://patch.msgid.link/172079935272.1778861.13619056509276833225.stgit@ahduyck-xeon-server.home.arpa Signed-off-by: Jakub Kicinski --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 677aea20d3e1..76a8f2d6bd64 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2601,6 +2601,8 @@ #define PCI_VENDOR_ID_HYGON 0x1d94 +#define PCI_VENDOR_ID_META 0x1d9b + #define PCI_VENDOR_ID_FUNGIBLE 0x1dad #define PCI_VENDOR_ID_HXT 0x1dbf -- cgit v1.2.3 From 33c97e7c0338d921c2ae0c2e54bf17121007fb94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Thu, 11 Jul 2024 16:54:57 +0000 Subject: landlock: Clarify documentation for struct landlock_ruleset_attr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The explanation for @handled_access_fs and @handled_access_net has significant overlap and is better explained together. * Explain the commonalities in structure-level documentation. * Clarify some wording and break up longer sentences. * Put emphasis on the word "handled" to make it clearer that "handled" is a term with special meaning in the context of Landlock. I'd like to transfer this wording into the man pages as well. Signed-off-by: Günther Noack Cc: Alejandro Colomar Cc: Konstantin Meskhidze Cc: linux-security-module@vger.kernel.org Link: https://lore.kernel.org/r/20240711165456.2148590-2-gnoack@google.com [mic: Format commit message] Signed-off-by: Mickaël Salaün --- include/uapi/linux/landlock.h | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index 68625e728f43..e76186da3260 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -12,29 +12,36 @@ #include /** - * struct landlock_ruleset_attr - Ruleset definition + * struct landlock_ruleset_attr - Ruleset definition. * - * Argument of sys_landlock_create_ruleset(). This structure can grow in - * future versions. + * Argument of sys_landlock_create_ruleset(). + * + * This structure defines a set of *handled access rights*, a set of actions on + * different object types, which should be denied by default when the ruleset is + * enacted. Vice versa, access rights that are not specifically listed here are + * not going to be denied by this ruleset when it is enacted. + * + * For historical reasons, the %LANDLOCK_ACCESS_FS_REFER right is always denied + * by default, even when its bit is not set in @handled_access_fs. In order to + * add new rules with this access right, the bit must still be set explicitly + * (cf. `Filesystem flags`_). + * + * The explicit listing of *handled access rights* is required for backwards + * compatibility reasons. In most use cases, processes that use Landlock will + * *handle* a wide range or all access rights that they know about at build time + * (and that they have tested with a kernel that supported them all). + * + * This structure can grow in future Landlock versions. */ struct landlock_ruleset_attr { /** - * @handled_access_fs: Bitmask of actions (cf. `Filesystem flags`_) - * that is handled by this ruleset and should then be forbidden if no - * rule explicitly allow them: it is a deny-by-default list that should - * contain as much Landlock access rights as possible. Indeed, all - * Landlock filesystem access rights that are not part of - * handled_access_fs are allowed. This is needed for backward - * compatibility reasons. One exception is the - * %LANDLOCK_ACCESS_FS_REFER access right, which is always implicitly - * handled, but must still be explicitly handled to add new rules with - * this access right. + * @handled_access_fs: Bitmask of handled filesystem actions + * (cf. `Filesystem flags`_). */ __u64 handled_access_fs; /** - * @handled_access_net: Bitmask of actions (cf. `Network flags`_) - * that is handled by this ruleset and should then be forbidden if no - * rule explicitly allow them. + * @handled_access_net: Bitmask of handled network actions (cf. `Network + * flags`_). */ __u64 handled_access_net; }; -- cgit v1.2.3 From 415fb383ec2bbe4d4bb1a1b5593cd67eb058f1de Mon Sep 17 00:00:00 2001 From: Francis Pravin Date: Mon, 15 Jul 2024 06:31:57 +0530 Subject: nvme-core: choose PIF from QPIF if QPIFS supports and PIF is QTYPE As per TP4141a: "If the Qualified Protection Information Format Support(QPIFS) bit is set to 1 and the Protection Information Format(PIF) field is set to 11b (i.e., Qualified Type), then the pif is as defined in the Qualified Protection Information Format (QPIF) field." So, choose PIF from QPIF if QPIFS supports and PIF is QTYPE. Signed-off-by: Francis Pravin Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- include/linux/nvme.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 57e27e48c913..5c2290f0d5af 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -483,6 +483,9 @@ enum { NVME_ID_NS_NVM_STS_MASK = 0x7f, NVME_ID_NS_NVM_GUARD_SHIFT = 7, NVME_ID_NS_NVM_GUARD_MASK = 0x3, + NVME_ID_NS_NVM_QPIF_SHIFT = 9, + NVME_ID_NS_NVM_QPIF_MASK = 0xf, + NVME_ID_NS_NVM_QPIFS = 1 << 3, }; static inline __u8 nvme_elbaf_sts(__u32 elbaf) @@ -495,6 +498,11 @@ static inline __u8 nvme_elbaf_guard_type(__u32 elbaf) return (elbaf >> NVME_ID_NS_NVM_GUARD_SHIFT) & NVME_ID_NS_NVM_GUARD_MASK; } +static inline __u8 nvme_elbaf_qualified_guard_type(__u32 elbaf) +{ + return (elbaf >> NVME_ID_NS_NVM_QPIF_SHIFT) & NVME_ID_NS_NVM_QPIF_MASK; +} + struct nvme_id_ctrl_nvm { __u8 vsl; __u8 wzsl; @@ -574,6 +582,7 @@ enum { NVME_NVM_NS_16B_GUARD = 0, NVME_NVM_NS_32B_GUARD = 1, NVME_NVM_NS_64B_GUARD = 2, + NVME_NVM_NS_QTYPE_GUARD = 3, }; static inline __u8 nvme_lbaf_index(__u8 flbas) -- cgit v1.2.3 From 88caf544c9305313e1c48ac1a437faa5df8fff06 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Mon, 1 Jul 2024 17:31:46 -0500 Subject: KVM: SEV: Provide support for SNP_GUEST_REQUEST NAE event Version 2 of GHCB specification added support for the SNP Guest Request Message NAE event. The event allows for an SEV-SNP guest to make requests to the SEV-SNP firmware through the hypervisor using the SNP_GUEST_REQUEST API defined in the SEV-SNP firmware specification. This is used by guests primarily to request attestation reports from firmware. There are other request types are available as well, but the specifics of what guest requests are being made generally does not affect how they are handled by the hypervisor, which only serves as a proxy for the guest requests and firmware responses. Implement handling for these events. When an SNP Guest Request is issued, the guest will provide its own request/response pages, which could in theory be passed along directly to firmware. However, these pages would need special care: - Both pages are from shared guest memory, so they need to be protected from migration/etc. occurring while firmware reads/writes to them. At a minimum, this requires elevating the ref counts and potentially needing an explicit pinning of the memory. This places additional restrictions on what type of memory backends userspace can use for shared guest memory since there would be some reliance on using refcounted pages. - The response page needs to be switched to Firmware-owned state before the firmware can write to it, which can lead to potential host RMP #PFs if the guest is misbehaved and hands the host a guest page that KVM is writing to for other reasons (e.g. virtio buffers). Both of these issues can be avoided completely by using separately-allocated bounce pages for both the request/response pages and passing those to firmware instead. So that's the approach taken here. Signed-off-by: Brijesh Singh Co-developed-by: Alexey Kardashevskiy Signed-off-by: Alexey Kardashevskiy Co-developed-by: Ashish Kalra Signed-off-by: Ashish Kalra Reviewed-by: Tom Lendacky Reviewed-by: Liam Merwick [mdr: ensure FW command failures are indicated to guest, drop extended request handling to be re-written as separate patch, massage commit] Signed-off-by: Michael Roth Message-ID: <20240701223148.3798365-2-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/sev-guest.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/sev-guest.h b/include/uapi/linux/sev-guest.h index 154a87a1eca9..fcdfea767fca 100644 --- a/include/uapi/linux/sev-guest.h +++ b/include/uapi/linux/sev-guest.h @@ -89,6 +89,9 @@ struct snp_ext_report_req { #define SNP_GUEST_FW_ERR_MASK GENMASK_ULL(31, 0) #define SNP_GUEST_VMM_ERR_SHIFT 32 #define SNP_GUEST_VMM_ERR(x) (((u64)x) << SNP_GUEST_VMM_ERR_SHIFT) +#define SNP_GUEST_FW_ERR(x) ((x) & SNP_GUEST_FW_ERR_MASK) +#define SNP_GUEST_ERR(vmm_err, fw_err) (SNP_GUEST_VMM_ERR(vmm_err) | \ + SNP_GUEST_FW_ERR(fw_err)) #define SNP_GUEST_VMM_ERR_INVALID_LEN 1 #define SNP_GUEST_VMM_ERR_BUSY 2 -- cgit v1.2.3 From de1177e56005dccd9729d4ca331de64f5d463b90 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 8 Jul 2024 09:47:57 +0200 Subject: virtio: make virtio_find_vqs() call virtio_find_vqs_ctx() In order to prepare for conversion of virtio_find_vqs*() arguments, make virtio_find_vqs() to call virtio_find_vqs_ctx() instead of op directly. Signed-off-by: Jiri Pirko Message-Id: <20240708074814.1739223-3-jiri@resnulli.us> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index da9b271b54db..d19eaf6bafbf 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -225,22 +225,23 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, } static inline -int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs, +int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], + const char * const names[], const bool *ctx, struct irq_affinity *desc) { - return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, desc); + return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, + desc); } static inline -int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs, +int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], const bool *ctx, + const char * const names[], struct irq_affinity *desc) { - return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, - desc); + return virtio_find_vqs_ctx(vdev, nvqs, vqs, callbacks, + names, NULL, desc); } /** -- cgit v1.2.3 From 959538c11a88298672195a0eb3289c0f2f88b02d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 8 Jul 2024 09:47:58 +0200 Subject: virtio: make virtio_find_single_vq() call virtio_find_vqs() In order to prepare for conversion of virtio_find_vqs*() arguments, make virtio_find_single_vq() to call virtio_find_vqs() instead of op directly. Signed-off-by: Jiri Pirko Message-Id: <20240708074814.1739223-4-jiri@resnulli.us> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index d19eaf6bafbf..82a1d798b2f1 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -210,20 +210,6 @@ static inline bool virtio_has_dma_quirk(const struct virtio_device *vdev) return !virtio_has_feature(vdev, VIRTIO_F_ACCESS_PLATFORM); } -static inline -struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, - vq_callback_t *c, const char *n) -{ - vq_callback_t *callbacks[] = { c }; - const char *names[] = { n }; - struct virtqueue *vq; - int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL, - NULL); - if (err < 0) - return ERR_PTR(err); - return vq; -} - static inline int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], @@ -244,6 +230,20 @@ int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs, names, NULL, desc); } +static inline +struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, + vq_callback_t *c, const char *n) +{ + vq_callback_t *callbacks[] = { c }; + const char *names[] = { n }; + struct virtqueue *vq; + int err = virtio_find_vqs(vdev, 1, &vq, callbacks, names, NULL); + + if (err < 0) + return ERR_PTR(err); + return vq; +} + /** * virtio_synchronize_cbs - synchronize with virtqueue callbacks * @dev: the virtio device -- cgit v1.2.3 From c502eb85c34e29bb185e3d15dd9b8fce8a74f303 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 8 Jul 2024 09:47:59 +0200 Subject: virtio: introduce virtio_queue_info struct and find_vqs_info() config op Introduce a structure virtio_queue_info to carry name, callback and ctx together. In order to allow config implementations to accept config op with array of virtio_queue_info structures, introduce a new find_vqs_info() op. Do the needed conversion in virtio_find_vqs_ctx(). Note that whole virtio_find_vqs_ctx() is going to be eventually removed at the and of this patchset. Suggested-by: Xuan Zhuo Signed-off-by: Jiri Pirko Message-Id: <20240708074814.1739223-5-jiri@resnulli.us> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 55 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 82a1d798b2f1..fb7a1fd00ee5 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -18,6 +18,20 @@ struct virtio_shm_region { typedef void vq_callback_t(struct virtqueue *); +/** + * struct virtqueue_info - Info for a virtqueue passed to find_vqs(). + * @name: virtqueue description. Used mainly for debugging, NULL for + * a virtqueue unused by the driver. + * @callback: A callback to invoke on a used buffer notification. + * NULL for a virtqueue that does not need a callback. + * @ctx: A flag to indicate to maintain an extra context per virtqueue. + */ +struct virtqueue_info { + const char *name; + vq_callback_t *callback; + bool ctx; +}; + /** * struct virtio_config_ops - operations for configuring a virtio device * Note: Do not assume that a transport implements all of the operations @@ -58,6 +72,12 @@ typedef void vq_callback_t(struct virtqueue *); * names: array of virtqueue names (mainly for debugging) * include a NULL entry for vqs unused by driver * Returns 0 on success or error status + * @find_vqs_info: find virtqueues and instantiate them. + * vdev: the virtio_device + * nvqs: the number of virtqueues to find + * vqs: on success, includes new virtqueues + * vqs_info: array of virtqueue info structures + * Returns 0 on success or error status * @del_vqs: free virtqueues found by find_vqs(). * @synchronize_cbs: synchronize with the virtqueue callbacks (optional) * The function guarantees that all memory operations on the @@ -109,6 +129,10 @@ struct virtio_config_ops { struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], const bool *ctx, struct irq_affinity *desc); + int (*find_vqs_info)(struct virtio_device *vdev, unsigned int nvqs, + struct virtqueue *vqs[], + struct virtqueue_info vqs_info[], + struct irq_affinity *desc); void (*del_vqs)(struct virtio_device *); void (*synchronize_cbs)(struct virtio_device *); u64 (*get_features)(struct virtio_device *vdev); @@ -117,7 +141,7 @@ struct virtio_config_ops { int (*set_vq_affinity)(struct virtqueue *vq, const struct cpumask *cpu_mask); const struct cpumask *(*get_vq_affinity)(struct virtio_device *vdev, - int index); + int index); bool (*get_shm_region)(struct virtio_device *vdev, struct virtio_shm_region *region, u8 id); int (*disable_vq_and_reset)(struct virtqueue *vq); @@ -210,14 +234,39 @@ static inline bool virtio_has_dma_quirk(const struct virtio_device *vdev) return !virtio_has_feature(vdev, VIRTIO_F_ACCESS_PLATFORM); } +static inline +int virtio_find_vqs_info(struct virtio_device *vdev, unsigned int nvqs, + struct virtqueue *vqs[], + struct virtqueue_info vqs_info[], + struct irq_affinity *desc) +{ + return vdev->config->find_vqs_info(vdev, nvqs, vqs, vqs_info, desc); +} + static inline int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], const bool *ctx, struct irq_affinity *desc) { - return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, - desc); + struct virtqueue_info *vqs_info; + int err, i; + + if (!vdev->config->find_vqs_info) + return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, + names, ctx, desc); + + vqs_info = kmalloc_array(nvqs, sizeof(*vqs_info), GFP_KERNEL); + if (!vqs_info) + return -ENOMEM; + for (i = 0; i < nvqs; i++) { + vqs_info[i].name = names[i]; + vqs_info[i].callback = callbacks[i]; + vqs_info[i].ctx = ctx ? ctx[i] : false; + } + err = virtio_find_vqs_info(vdev, nvqs, vqs, vqs_info, desc); + kfree(vqs_info); + return err; } static inline -- cgit v1.2.3 From 992648f5a61f1c9fab800fbc42dea31727cd8a55 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 8 Jul 2024 09:48:02 +0200 Subject: virtio: call virtio_find_vqs_info() from virtio_find_single_vq() directly Since there are no more implementations of find_vqs() op, call virtio_find_vqs_info() from virtio_find_single_vq() directly. Signed-off-by: Jiri Pirko Message-Id: <20240708074814.1739223-8-jiri@resnulli.us> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index fb7a1fd00ee5..23ce1aec54e8 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -283,10 +283,11 @@ static inline struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, vq_callback_t *c, const char *n) { - vq_callback_t *callbacks[] = { c }; - const char *names[] = { n }; + struct virtqueue_info vqs_info[] = { + { n, c }, + }; struct virtqueue *vq; - int err = virtio_find_vqs(vdev, 1, &vq, callbacks, names, NULL); + int err = virtio_find_vqs_info(vdev, 1, &vq, vqs_info, NULL); if (err < 0) return ERR_PTR(err); -- cgit v1.2.3 From 18cd029fd7f7a85d522973f34c413a2b009255ac Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 8 Jul 2024 09:48:03 +0200 Subject: virtio: remove the original find_vqs() op As it is no longer used, remove it. Signed-off-by: Jiri Pirko Message-Id: <20240708074814.1739223-9-jiri@resnulli.us> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 23ce1aec54e8..1ae346bd2dbd 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -63,15 +63,6 @@ struct virtqueue_info { * After this, status and feature negotiation must be done again * Device must not be reset from its vq/config callbacks, or in * parallel with being added/removed. - * @find_vqs: find virtqueues and instantiate them. - * vdev: the virtio_device - * nvqs: the number of virtqueues to find - * vqs: on success, includes new virtqueues - * callbacks: array of callbacks, for each virtqueue - * include a NULL entry for vqs that do not need a callback - * names: array of virtqueue names (mainly for debugging) - * include a NULL entry for vqs unused by driver - * Returns 0 on success or error status * @find_vqs_info: find virtqueues and instantiate them. * vdev: the virtio_device * nvqs: the number of virtqueues to find @@ -125,10 +116,6 @@ struct virtio_config_ops { u8 (*get_status)(struct virtio_device *vdev); void (*set_status)(struct virtio_device *vdev, u8 status); void (*reset)(struct virtio_device *vdev); - int (*find_vqs)(struct virtio_device *, unsigned nvqs, - struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], const bool *ctx, - struct irq_affinity *desc); int (*find_vqs_info)(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], struct virtqueue_info vqs_info[], @@ -252,10 +239,6 @@ int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs, struct virtqueue_info *vqs_info; int err, i; - if (!vdev->config->find_vqs_info) - return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, - names, ctx, desc); - vqs_info = kmalloc_array(nvqs, sizeof(*vqs_info), GFP_KERNEL); if (!vqs_info) return -ENOMEM; -- cgit v1.2.3 From b49503eaf9c74c3d0efd1e8331aba37ce3c1fd68 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 8 Jul 2024 09:48:04 +0200 Subject: virtio: rename find_vqs_info() op to find_vqs() Since the original find_vqs() is no longer present, rename find_vqs_info() back to find_vqs(). Signed-off-by: Jiri Pirko Message-Id: <20240708074814.1739223-10-jiri@resnulli.us> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 1ae346bd2dbd..7848fba7f48a 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -63,7 +63,7 @@ struct virtqueue_info { * After this, status and feature negotiation must be done again * Device must not be reset from its vq/config callbacks, or in * parallel with being added/removed. - * @find_vqs_info: find virtqueues and instantiate them. + * @find_vqs: find virtqueues and instantiate them. * vdev: the virtio_device * nvqs: the number of virtqueues to find * vqs: on success, includes new virtqueues @@ -116,10 +116,10 @@ struct virtio_config_ops { u8 (*get_status)(struct virtio_device *vdev); void (*set_status)(struct virtio_device *vdev, u8 status); void (*reset)(struct virtio_device *vdev); - int (*find_vqs_info)(struct virtio_device *vdev, unsigned int nvqs, - struct virtqueue *vqs[], - struct virtqueue_info vqs_info[], - struct irq_affinity *desc); + int (*find_vqs)(struct virtio_device *vdev, unsigned int nvqs, + struct virtqueue *vqs[], + struct virtqueue_info vqs_info[], + struct irq_affinity *desc); void (*del_vqs)(struct virtio_device *); void (*synchronize_cbs)(struct virtio_device *); u64 (*get_features)(struct virtio_device *vdev); @@ -227,7 +227,7 @@ int virtio_find_vqs_info(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue_info vqs_info[], struct irq_affinity *desc) { - return vdev->config->find_vqs_info(vdev, nvqs, vqs, vqs_info, desc); + return vdev->config->find_vqs(vdev, nvqs, vqs, vqs_info, desc); } static inline -- cgit v1.2.3 From 3e8d51c7765dac2a70fe2ad222a8ad244c9f29a4 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 8 Jul 2024 09:48:13 +0200 Subject: virtio: remove unused virtio_find_vqs() and virtio_find_vqs_ctx() helpers All callers of virtio_find_vqs() and virtio_find_vqs_ctx() were converted to use virtio_find_vqs_info(). Remove no longer used helpers. Signed-off-by: Jiri Pirko Message-Id: <20240708074814.1739223-19-jiri@resnulli.us> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 7848fba7f48a..b6b615c3e21d 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -230,38 +230,6 @@ int virtio_find_vqs_info(struct virtio_device *vdev, unsigned int nvqs, return vdev->config->find_vqs(vdev, nvqs, vqs, vqs_info, desc); } -static inline -int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], const bool *ctx, - struct irq_affinity *desc) -{ - struct virtqueue_info *vqs_info; - int err, i; - - vqs_info = kmalloc_array(nvqs, sizeof(*vqs_info), GFP_KERNEL); - if (!vqs_info) - return -ENOMEM; - for (i = 0; i < nvqs; i++) { - vqs_info[i].name = names[i]; - vqs_info[i].callback = callbacks[i]; - vqs_info[i].ctx = ctx ? ctx[i] : false; - } - err = virtio_find_vqs_info(vdev, nvqs, vqs, vqs_info, desc); - kfree(vqs_info); - return err; -} - -static inline -int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs, - struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], - struct irq_affinity *desc) -{ - return virtio_find_vqs_ctx(vdev, nvqs, vqs, callbacks, - names, NULL, desc); -} - static inline struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, vq_callback_t *c, const char *n) -- cgit v1.2.3 From 6c85d6b653caeba2ef982925703cbb4f2b3b3163 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 8 Jul 2024 09:48:14 +0200 Subject: virtio: rename virtio_find_vqs_info() to virtio_find_vqs() Since the original virtio_find_vqs() is no longer present, rename virtio_find_vqs_info() back to virtio_find_vqs(). Signed-off-by: Jiri Pirko Message-Id: <20240708074814.1739223-20-jiri@resnulli.us> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index b6b615c3e21d..ab4b9a3fef6b 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -222,10 +222,10 @@ static inline bool virtio_has_dma_quirk(const struct virtio_device *vdev) } static inline -int virtio_find_vqs_info(struct virtio_device *vdev, unsigned int nvqs, - struct virtqueue *vqs[], - struct virtqueue_info vqs_info[], - struct irq_affinity *desc) +int virtio_find_vqs(struct virtio_device *vdev, unsigned int nvqs, + struct virtqueue *vqs[], + struct virtqueue_info vqs_info[], + struct irq_affinity *desc) { return vdev->config->find_vqs(vdev, nvqs, vqs, vqs_info, desc); } @@ -238,7 +238,7 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, { n, c }, }; struct virtqueue *vq; - int err = virtio_find_vqs_info(vdev, 1, &vq, vqs_info, NULL); + int err = virtio_find_vqs(vdev, 1, &vq, vqs_info, NULL); if (err < 0) return ERR_PTR(err); -- cgit v1.2.3 From 332d2c1d713e232e163386c35a3ba0c1b90df83f Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Wed, 1 May 2024 03:52:10 -0500 Subject: crypto: ccp: Add the SNP_VLEK_LOAD command When requesting an attestation report a guest is able to specify whether it wants SNP firmware to sign the report using either a Versioned Chip Endorsement Key (VCEK), which is derived from chip-unique secrets, or a Versioned Loaded Endorsement Key (VLEK) which is obtained from an AMD Key Derivation Service (KDS) and derived from seeds allocated to enrolled cloud service providers (CSPs). For VLEK keys, an SNP_VLEK_LOAD SNP firmware command is used to load them into the system after obtaining them from the KDS. Add a corresponding userspace interface so to allow the loading of VLEK keys into the system. See SEV-SNP Firmware ABI 1.54, SNP_VLEK_LOAD for more details. Reviewed-by: Tom Lendacky Signed-off-by: Michael Roth Message-ID: <20240501085210.2213060-21-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- include/uapi/linux/psp-sev.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h index b7a2c2ee35b7..2289b7c76c59 100644 --- a/include/uapi/linux/psp-sev.h +++ b/include/uapi/linux/psp-sev.h @@ -31,6 +31,7 @@ enum { SNP_PLATFORM_STATUS, SNP_COMMIT, SNP_SET_CONFIG, + SNP_VLEK_LOAD, SEV_MAX, }; @@ -214,6 +215,32 @@ struct sev_user_data_snp_config { __u8 rsvd1[52]; } __packed; +/** + * struct sev_data_snp_vlek_load - SNP_VLEK_LOAD structure + * + * @len: length of the command buffer read by the PSP + * @vlek_wrapped_version: version of wrapped VLEK hashstick (Must be 0h) + * @rsvd: reserved + * @vlek_wrapped_address: address of a wrapped VLEK hashstick + * (struct sev_user_data_snp_wrapped_vlek_hashstick) + */ +struct sev_user_data_snp_vlek_load { + __u32 len; /* In */ + __u8 vlek_wrapped_version; /* In */ + __u8 rsvd[3]; /* In */ + __u64 vlek_wrapped_address; /* In */ +} __packed; + +/** + * struct sev_user_data_snp_vlek_wrapped_vlek_hashstick - Wrapped VLEK data + * + * @data: Opaque data provided by AMD KDS (as described in SEV-SNP Firmware ABI + * 1.54, SNP_VLEK_LOAD) + */ +struct sev_user_data_snp_wrapped_vlek_hashstick { + __u8 data[432]; /* In */ +} __packed; + /** * struct sev_issue_cmd - SEV ioctl parameters * -- cgit v1.2.3 From a7526fe8b94eced7d82aa00b2bcca44e39ae0769 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 11 Jul 2024 18:35:30 +0200 Subject: mm, slab: put should_failslab() back behind CONFIG_SHOULD_FAILSLAB Patch series "revert unconditional slab and page allocator fault injection calls". These two patches largely revert commits that added function call overhead into slab and page allocation hotpaths and that cannot be currently disabled even though related CONFIG_ options do exist. A much more involved solution that can keep the callsites always existing but hidden behind a static key if unused, is possible [1] and can be pursued by anyone who believes it's necessary. Meanwhile the fact the should_failslab() error injection is already not functional on kernels built with current gcc without anyone noticing [2], and lukewarm response to [1] suggests the need is not there. I believe it will be more fair to have the state after this series as a baseline for possible further optimisation, instead of the unconditional overhead. For example a possible compromise for anyone who's fine with an empty function call overhead but not the full CONFIG_FAILSLAB / CONFIG_FAIL_PAGE_ALLOC overhead is to reuse patch 1 from [1] but insert a static key check only inside should_failslab() and should_fail_alloc_page() before performing the more expensive checks. [1] https://lore.kernel.org/all/20240620-fault-injection-statickeys-v2-0-e23947d3d84b@suse.cz/#t [2] https://github.com/bpftrace/bpftrace/issues/3258 This patch (of 2): This mostly reverts commit 4f6923fbb352 ("mm: make should_failslab always available for fault injection"). The commit made should_failslab() a noinline function that's always called from the slab allocation hotpath, even if it's empty because CONFIG_SHOULD_FAILSLAB is not enabled, and there is no option to disable that call. This is visible in profiles and the function call overhead can be noticeable especially with cpu mitigations. Meanwhile the bpftrace program example in the commit silently does not work without CONFIG_SHOULD_FAILSLAB anyway with a recent gcc, because the empty function gets a .constprop clone that is actually being called (uselessly) from the slab hotpath, while the error injection is hooked to the original function that's not being called at all [1]. Thus put the whole should_failslab() function back behind CONFIG_SHOULD_FAILSLAB. It's not a complete revert of 4f6923fbb352 - the int return type that returns -ENOMEM on failure is preserved, as well ALLOW_ERROR_INJECTION annotation. The BTF_ID() record that was meanwhile added is also guarded by CONFIG_SHOULD_FAILSLAB. [1] https://github.com/bpftrace/bpftrace/issues/3258 Link: https://lkml.kernel.org/r/20240711-b4-fault-injection-reverts-v1-0-9e2651945d68@suse.cz Link: https://lkml.kernel.org/r/20240711-b4-fault-injection-reverts-v1-1-9e2651945d68@suse.cz Signed-off-by: Vlastimil Babka Cc: Akinobu Mita Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Christoph Lameter Cc: Daniel Borkmann Cc: David Rientjes Cc: Eduard Zingerman Cc: Hao Luo Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Martin KaFai Lau Cc: Mateusz Guzik Cc: Roman Gushchin Cc: Song Liu Cc: Stanislav Fomichev Cc: Yonghong Song Signed-off-by: Andrew Morton --- include/linux/fault-inject.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 6d5edef09d45..be6d0bc111ad 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -102,11 +102,10 @@ static inline bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) } #endif /* CONFIG_FAIL_PAGE_ALLOC */ -int should_failslab(struct kmem_cache *s, gfp_t gfpflags); #ifdef CONFIG_FAILSLAB -extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags); +int should_failslab(struct kmem_cache *s, gfp_t gfpflags); #else -static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) +static inline int should_failslab(struct kmem_cache *s, gfp_t gfpflags) { return false; } -- cgit v1.2.3 From 53dabce2652fb854eae84609ce9c37429d5d87ba Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 11 Jul 2024 18:35:31 +0200 Subject: mm, page_alloc: put should_fail_alloc_page() back behing CONFIG_FAIL_PAGE_ALLOC This mostly reverts commit af3b854492f3 ("mm/page_alloc.c: allow error injection"). The commit made should_fail_alloc_page() a noinline function that's always called from the page allocation hotpath, even if it's empty because CONFIG_FAIL_PAGE_ALLOC is not enabled, and there is no option to disable it and prevent the associated function call overhead. As with the preceding patch "mm, slab: put should_failslab back behind CONFIG_SHOULD_FAILSLAB" and for the same reasons, put the should_fail_alloc_page() back behind the config option. When enabled, the ALLOW_ERROR_INJECTION and BTF_ID records are preserved so it's not a complete revert. Link: https://lkml.kernel.org/r/20240711-b4-fault-injection-reverts-v1-2-9e2651945d68@suse.cz Signed-off-by: Vlastimil Babka Cc: Akinobu Mita Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Christoph Lameter Cc: Daniel Borkmann Cc: David Rientjes Cc: Eduard Zingerman Cc: Hao Luo Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Martin KaFai Lau Cc: Mateusz Guzik Cc: Roman Gushchin Cc: Song Liu Cc: Stanislav Fomichev Cc: Yonghong Song Signed-off-by: Andrew Morton --- include/linux/fault-inject.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index be6d0bc111ad..354413950d34 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -91,12 +91,10 @@ static inline void fault_config_init(struct fault_config *config, struct kmem_cache; -bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order); - #ifdef CONFIG_FAIL_PAGE_ALLOC -bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order); +bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order); #else -static inline bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) +static inline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) { return false; } -- cgit v1.2.3 From 4810a82c8a8ae06fe6496a23fcb89a4952603e60 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 11 Jul 2024 15:04:55 -0700 Subject: lib: add missing newline character in the warning message Link: https://lkml.kernel.org/r/20240711220457.1751071-1-surenb@google.com Fixes: 22d407b164ff ("lib: add allocation tagging support for memory allocation profiling") Signed-off-by: Suren Baghdasaryan Cc: Kees Cook Cc: Kent Overstreet Cc: Pasha Tatashin Cc: Sourav Panda Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/alloc_tag.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h index abd24016a900..8c61ccd161ba 100644 --- a/include/linux/alloc_tag.h +++ b/include/linux/alloc_tag.h @@ -122,7 +122,7 @@ static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag "alloc_tag was not cleared (got tag for %s:%u)\n", ref->ct->filename, ref->ct->lineno); - WARN_ONCE(!tag, "current->alloc_tag not set"); + WARN_ONCE(!tag, "current->alloc_tag not set\n"); } static inline void alloc_tag_sub_check(union codetag_ref *ref) -- cgit v1.2.3 From fd8acc0097b91fab3104fa8a66ce2fd9cf8b0c11 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 11 Jul 2024 15:04:56 -0700 Subject: lib: reuse page_ext_data() to obtain codetag_ref codetag_ref_from_page_ext() reimplements the same calculation as page_ext_data(). Reuse existing function instead. Link: https://lkml.kernel.org/r/20240711220457.1751071-2-surenb@google.com Fixes: dcfe378c81f7 ("lib: introduce support for page allocation tagging") Signed-off-by: Suren Baghdasaryan Cc: Kees Cook Cc: Kent Overstreet Cc: Pasha Tatashin Cc: Sourav Panda Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/pgalloc_tag.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 9cacadbd61f8..acb1e9ce7981 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -15,7 +15,7 @@ extern struct page_ext_operations page_alloc_tagging_ops; static inline union codetag_ref *codetag_ref_from_page_ext(struct page_ext *page_ext) { - return (void *)page_ext + page_alloc_tagging_ops.offset; + return (union codetag_ref *)page_ext_data(page_ext, &page_alloc_tagging_ops); } static inline struct page_ext *page_ext_from_codetag_ref(union codetag_ref *ref) -- cgit v1.2.3 From 6ab42fe21c84d72da752923b4bd7075344f4a362 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 11 Jul 2024 15:04:57 -0700 Subject: alloc_tag: fix page_ext_get/page_ext_put sequence during page splitting pgalloc_tag_sub() might call page_ext_put() using a page different from the one used in page_ext_get() call. This does not pose an issue since page_ext_put() ignores this parameter as long as it's non-NULL but technically this is wrong. Fix it by storing the original page used in page_ext_get() and passing it to page_ext_put(). Link: https://lkml.kernel.org/r/20240711220457.1751071-3-surenb@google.com Fixes: be25d1d4e822 ("mm: create new codetag references during page splitting") Signed-off-by: Suren Baghdasaryan Cc: Kees Cook Cc: Kent Overstreet Cc: Pasha Tatashin Cc: Sourav Panda Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- include/linux/pgalloc_tag.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index acb1e9ce7981..18cd0c0c73d9 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -71,6 +71,7 @@ static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) static inline void pgalloc_tag_split(struct page *page, unsigned int nr) { int i; + struct page_ext *first_page_ext; struct page_ext *page_ext; union codetag_ref *ref; struct alloc_tag *tag; @@ -78,7 +79,7 @@ static inline void pgalloc_tag_split(struct page *page, unsigned int nr) if (!mem_alloc_profiling_enabled()) return; - page_ext = page_ext_get(page); + first_page_ext = page_ext = page_ext_get(page); if (unlikely(!page_ext)) return; @@ -94,7 +95,7 @@ static inline void pgalloc_tag_split(struct page *page, unsigned int nr) page_ext = page_ext_next(page_ext); } out: - page_ext_put(page_ext); + page_ext_put(first_page_ext); } static inline struct alloc_tag *pgalloc_tag_get(struct page *page) -- cgit v1.2.3 From 667574e873b5f77a220b2a93329689f36fb56d5d Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Fri, 12 Jul 2024 11:13:14 +0800 Subject: mm/hugetlb: fix possible recursive locking detected warning When tries to demote 1G hugetlb folios, a lockdep warning is observed: ============================================ WARNING: possible recursive locking detected 6.10.0-rc6-00452-ga4d0275fa660-dirty #79 Not tainted -------------------------------------------- bash/710 is trying to acquire lock: ffffffff8f0a7850 (&h->resize_lock){+.+.}-{3:3}, at: demote_store+0x244/0x460 but task is already holding lock: ffffffff8f0a6f48 (&h->resize_lock){+.+.}-{3:3}, at: demote_store+0xae/0x460 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&h->resize_lock); lock(&h->resize_lock); *** DEADLOCK *** May be due to missing lock nesting notation 4 locks held by bash/710: #0: ffff8f118439c3f0 (sb_writers#5){.+.+}-{0:0}, at: ksys_write+0x64/0xe0 #1: ffff8f11893b9e88 (&of->mutex#2){+.+.}-{3:3}, at: kernfs_fop_write_iter+0xf8/0x1d0 #2: ffff8f1183dc4428 (kn->active#98){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x100/0x1d0 #3: ffffffff8f0a6f48 (&h->resize_lock){+.+.}-{3:3}, at: demote_store+0xae/0x460 stack backtrace: CPU: 3 PID: 710 Comm: bash Not tainted 6.10.0-rc6-00452-ga4d0275fa660-dirty #79 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x68/0xa0 __lock_acquire+0x10f2/0x1ca0 lock_acquire+0xbe/0x2d0 __mutex_lock+0x6d/0x400 demote_store+0x244/0x460 kernfs_fop_write_iter+0x12c/0x1d0 vfs_write+0x380/0x540 ksys_write+0x64/0xe0 do_syscall_64+0xb9/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7fa61db14887 RSP: 002b:00007ffc56c48358 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007fa61db14887 RDX: 0000000000000002 RSI: 000055a030050220 RDI: 0000000000000001 RBP: 000055a030050220 R08: 00007fa61dbd1460 R09: 000000007fffffff R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000002 R13: 00007fa61dc1b780 R14: 00007fa61dc17600 R15: 00007fa61dc16a00 Lockdep considers this an AA deadlock because the different resize_lock mutexes reside in the same lockdep class, but this is a false positive. Place them in distinct classes to avoid these warnings. Link: https://lkml.kernel.org/r/20240712031314.2570452-1-linmiaohe@huawei.com Fixes: 8531fc6f52f5 ("hugetlb: add hugetlb demote page support") Signed-off-by: Miaohe Lin Acked-by: Muchun Song Cc: Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index ecd0ceeea206..c9bf68c239a0 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -663,6 +663,7 @@ HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable) /* Defines one hugetlb page size */ struct hstate { struct mutex resize_lock; + struct lock_class_key resize_key; int next_nid_to_alloc; int next_nid_to_free; unsigned int order; -- cgit v1.2.3 From 67856f44da381973caf4eb692ad2cca1de7b2d37 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 15 Jul 2024 08:25:33 +0300 Subject: ia64: scrub ia64 from poison.h Link: https://lkml.kernel.org/r/c72e5467-06a8-4739-ae6a-7c84c96cad77@p183 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton --- include/linux/poison.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/poison.h b/include/linux/poison.h index 1f0ee2459f2a..a265b499033b 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -52,12 +52,6 @@ /********** arch/$ARCH/mm/init.c **********/ #define POISON_FREE_INITMEM 0xcc -/********** arch/ia64/hp/common/sba_iommu.c **********/ -/* - * arch/ia64/hp/common/sba_iommu.c uses a 16-byte poison string with a - * value of "SBAIOMMU POISON\0" for spill-over poisoning. - */ - /********** fs/jbd/journal.c **********/ #define JBD_POISON_FREE 0x5b #define JBD2_POISON_FREE 0x5c -- cgit v1.2.3 From c14112a5574ff5cf3de198ab6eeff53ac1234068 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 17 Jul 2024 10:29:16 -0700 Subject: driver core: auxiliary bus: Fix documentation of auxiliary_device Fix the documentation of the below field of struct auxiliary_device include/linux/auxiliary_bus.h:150: warning: Function parameter or struct member 'sysfs' not described in 'auxiliary_device' include/linux/auxiliary_bus.h:150: warning: Excess struct member 'irqs' description in 'auxiliary_device' include/linux/auxiliary_bus.h:150: warning: Excess struct member 'lock' description in 'auxiliary_device' include/linux/auxiliary_bus.h:150: warning: Excess struct member 'irq_dir_exists' description in 'auxiliary_device' Fixes: a808878308a8 ("driver core: auxiliary bus: show auxiliary device IRQs") Reported-by: Stephen Rothwell Signed-off-by: Shay Drory Signed-off-by: Saeed Mahameed Link: https://patch.msgid.link/20240717172916.595808-1-saeed@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/auxiliary_bus.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index 3ba4487c9cd9..1539bbd263d2 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -58,9 +58,10 @@ * in * @name: Match name found by the auxiliary device driver, * @id: unique identitier if multiple devices of the same name are exported, - * @irqs: irqs xarray contains irq indices which are used by the device, - * @lock: Synchronize irq sysfs creation, - * @irq_dir_exists: whether "irqs" directory exists, + * @sysfs: embedded struct which hold all sysfs related fields, + * @sysfs.irqs: irqs xarray contains irq indices which are used by the device, + * @sysfs.lock: Synchronize irq sysfs creation, + * @sysfs.irq_dir_exists: whether "irqs" directory exists, * * An auxiliary_device represents a part of its parent device's functionality. * It is given a name that, combined with the registering drivers -- cgit v1.2.3 From f4b89d8ce5a835afa51404977ee7e3889c2b9722 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Mon, 15 Jul 2024 16:03:29 +0000 Subject: landlock: Various documentation improvements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix some typos, incomplete or confusing phrases. * Split paragraphs where appropriate. * List the same error code multiple times, if it has multiple possible causes. * Bring wording closer to the man page wording, which has undergone more thorough review (esp. for LANDLOCK_ACCESS_FS_WRITE_FILE). * Small semantic clarifications * Call the ephemeral port range "ephemeral" * Clarify reasons for EFAULT in landlock_add_rule() * Clarify @rule_type doc for landlock_add_rule() This is a collection of small fixes which I collected when preparing the corresponding man pages [1]. Cc: Alejandro Colomar Cc: Konstantin Meskhidze Link: https://lore.kernel.org/r/20240715155554.2791018-1-gnoack@google.com [1] Signed-off-by: Günther Noack Link: https://lore.kernel.org/r/20240715160328.2792835-2-gnoack@google.com [mic: Add label to link, fix formatting spotted by make htmldocs, synchronize userspace-api documentation's date] Signed-off-by: Mickaël Salaün --- include/uapi/linux/landlock.h | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index e76186da3260..2c8dbc74b955 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -104,20 +104,21 @@ struct landlock_path_beneath_attr { */ struct landlock_net_port_attr { /** - * @allowed_access: Bitmask of allowed access network for a port + * @allowed_access: Bitmask of allowed network actions for a port * (cf. `Network flags`_). */ __u64 allowed_access; /** * @port: Network port in host endianness. * - * It should be noted that port 0 passed to :manpage:`bind(2)` will - * bind to an available port from a specific port range. This can be - * configured thanks to the ``/proc/sys/net/ipv4/ip_local_port_range`` - * sysctl (also used for IPv6). A Landlock rule with port 0 and the - * ``LANDLOCK_ACCESS_NET_BIND_TCP`` right means that requesting to bind - * on port 0 is allowed and it will automatically translate to binding - * on the related port range. + * It should be noted that port 0 passed to :manpage:`bind(2)` will bind + * to an available port from the ephemeral port range. This can be + * configured with the ``/proc/sys/net/ipv4/ip_local_port_range`` sysctl + * (also used for IPv6). + * + * A Landlock rule with port 0 and the ``LANDLOCK_ACCESS_NET_BIND_TCP`` + * right means that requesting to bind on port 0 is allowed and it will + * automatically translate to binding on the related port range. */ __u64 port; }; @@ -138,10 +139,10 @@ struct landlock_net_port_attr { * The following access rights apply only to files: * * - %LANDLOCK_ACCESS_FS_EXECUTE: Execute a file. - * - %LANDLOCK_ACCESS_FS_WRITE_FILE: Open a file with write access. Note that - * you might additionally need the %LANDLOCK_ACCESS_FS_TRUNCATE right in order - * to overwrite files with :manpage:`open(2)` using ``O_TRUNC`` or - * :manpage:`creat(2)`. + * - %LANDLOCK_ACCESS_FS_WRITE_FILE: Open a file with write access. When + * opening files for writing, you will often additionally need the + * %LANDLOCK_ACCESS_FS_TRUNCATE right. In many cases, these system calls + * truncate existing files when overwriting them (e.g., :manpage:`creat(2)`). * - %LANDLOCK_ACCESS_FS_READ_FILE: Open a file with read access. * - %LANDLOCK_ACCESS_FS_TRUNCATE: Truncate a file with :manpage:`truncate(2)`, * :manpage:`ftruncate(2)`, :manpage:`creat(2)`, or :manpage:`open(2)` with @@ -263,7 +264,7 @@ struct landlock_net_port_attr { * These flags enable to restrict a sandboxed process to a set of network * actions. This is supported since the Landlock ABI version 4. * - * TCP sockets with allowed actions: + * The following access rights apply to TCP port numbers: * * - %LANDLOCK_ACCESS_NET_BIND_TCP: Bind a TCP socket to a local port. * - %LANDLOCK_ACCESS_NET_CONNECT_TCP: Connect an active TCP socket to -- cgit v1.2.3 From 338bb57e4c2a1c2c6fc92f9c0bd35be7587adca7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 15 Jul 2024 17:23:53 +0300 Subject: ipv4: Fix incorrect TOS in route get reply The TOS value that is returned to user space in the route get reply is the one with which the lookup was performed ('fl4->flowi4_tos'). This is fine when the matched route is configured with a TOS as it would not match if its TOS value did not match the one with which the lookup was performed. However, matching on TOS is only performed when the route's TOS is not zero. It is therefore possible to have the kernel incorrectly return a non-zero TOS: # ip link add name dummy1 up type dummy # ip address add 192.0.2.1/24 dev dummy1 # ip route get 192.0.2.2 tos 0xfc 192.0.2.2 tos 0x1c dev dummy1 src 192.0.2.1 uid 0 cache Fix by adding a DSCP field to the FIB result structure (inside an existing 4 bytes hole), populating it in the route lookup and using it when filling the route get reply. Output after the patch: # ip link add name dummy1 up type dummy # ip address add 192.0.2.1/24 dev dummy1 # ip route get 192.0.2.2 tos 0xfc 192.0.2.2 dev dummy1 src 192.0.2.1 uid 0 cache Fixes: 1a00fee4ffb2 ("ipv4: Remove rt_key_{src,dst,tos} from struct rtable.") Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Reviewed-by: Guillaume Nault Signed-off-by: Paolo Abeni --- include/net/ip_fib.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 6e7984bfb986..72af2f223e59 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -173,6 +173,7 @@ struct fib_result { unsigned char type; unsigned char scope; u32 tclassid; + dscp_t dscp; struct fib_nh_common *nhc; struct fib_info *fi; struct fib_table *table; -- cgit v1.2.3 From b9fae9f06d84ffab0f3f9118f3a96bbcdc528bf6 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 11 Jul 2024 13:21:00 -0400 Subject: SUNRPC: Fixup gss_status tracepoint error output The GSS routine errors are values, not flags. Fixes: 0c77668ddb4e ("SUNRPC: Introduce trace points in rpc_auth_gss.ko") Signed-off-by: Benjamin Coddington Reviewed-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcgss.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h index 7f0c1ceae726..b0b6300a0cab 100644 --- a/include/trace/events/rpcgss.h +++ b/include/trace/events/rpcgss.h @@ -54,7 +54,7 @@ TRACE_DEFINE_ENUM(GSS_S_UNSEQ_TOKEN); TRACE_DEFINE_ENUM(GSS_S_GAP_TOKEN); #define show_gss_status(x) \ - __print_flags(x, "|", \ + __print_symbolic(x, \ { GSS_S_BAD_MECH, "GSS_S_BAD_MECH" }, \ { GSS_S_BAD_NAME, "GSS_S_BAD_NAME" }, \ { GSS_S_BAD_NAMETYPE, "GSS_S_BAD_NAMETYPE" }, \ -- cgit v1.2.3 From f2f6a8e8871725035959b90bac048cde555aa0e9 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 18 Jul 2024 13:06:47 +0100 Subject: init/Kconfig: remove CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND Several versions of GCC mis-compile asm goto with outputs. We try to workaround this, but our workaround is demonstrably incomplete and liable to result in subtle bugs, especially on arm64 where get_user() has recently been moved over to using asm goto with outputs. From discussion(s) with Linus at: https://lore.kernel.org/linux-arm-kernel/Zpfv2tnlQ-gOLGac@J2N7QTR9R3.cambridge.arm.com/ https://lore.kernel.org/linux-arm-kernel/ZpfxLrJAOF2YNqCk@J2N7QTR9R3.cambridge.arm.com/ ... it sounds like the best thing to do for now is to remove the workaround and make CC_HAS_ASM_GOTO_OUTPUT depend on working compiler versions. The issue was originally reported to GCC by Sean Christopherson: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 ... and Jakub Jelinek fixed this for GCC 14, with the fix backported to 13.3.0, 12.4.0, and 11.5.0. In the kernel, we tried to workaround broken compilers in commits: 4356e9f841f7 ("work around gcc bugs with 'asm goto' with outputs") 68fb3ca0e408 ("update workarounds for gcc "asm goto" issue") ... but the workaround of adding an empty asm("") after the asm volatile goto(...) demonstrably does not always avoid the problem, as can be seen in the following test case: | #define asm_goto_output(x...) \ | do { asm volatile goto(x); asm (""); } while (0) | | #define __good_or_bad(__val, __key) \ | do { \ | __label__ __failed; \ | unsigned long __tmp; \ | asm_goto_output( \ | " cbnz %[key], %l[__failed]\n" \ | " mov %[val], #0x900d\n" \ | : [val] "=r" (__tmp) \ | : [key] "r" (__key) \ | : \ | : __failed); \ | (__val) = __tmp; \ | break; \ | __failed: \ | (__val) = 0xbad; \ | } while (0) | | unsigned long get_val(unsigned long key); | unsigned long get_val(unsigned long key) | { | unsigned long val = 0xbad; | | __good_or_bad(val, key); | | return val; | } GCC 13.2.0 (at -O2) compiles this to: | cbnz x0, .Lfailed | mov x0, #0x900d | .Lfailed: | ret GCC 14.1.0 (at -O2) compiles this to: | cbnz x0, .Lfailed | mov x0, #0x900d | ret | .Lfailed: | mov x0, #0xbad | ret Note that GCC 13.2.0 erroneously omits the assignment to 'val' in the error path (even though this does not depend on an output of the asm goto). GCC 14.1.0 correctly retains the assignment. This problem can be seen within the kernel with the following test case: | #include | #include | | noinline unsigned long test_unsafe_get_user(unsigned long __user *ptr); | noinline unsigned long test_unsafe_get_user(unsigned long __user *ptr) | { | unsigned long val; | | unsafe_get_user(val, ptr, Efault); | return val; | | Efault: | val = 0x900d; | return val; | } GCC 13.2.0 (arm64 defconfig) compiles this to: | and x0, x0, #0xff7fffffffffffff | ldtr x0, [x0] | .Lextable_fixup: | ret GCC 13.2.0 (x86_64 defconfig + MITIGATION_RETPOLINE=n) compiles this to: | endbr64 | mov (%rdi),%rax | .Lextable_fixup: | ret ... omitting the assignment to 'val' in the error path, and leaving garbage in the result register returned by the function (which happens to contain the faulting address in the generated code). GCC 14.1.0 (arm64 defconfig) compiles this to: | and x0, x0, #0xff7fffffffffffff | ldtr x0, [x0] | ret | .Lextable_fixup: | mov x0, #0x900d // #36877 | ret GCC 14.1.0 (x86_64 defconfig + MITIGATION_RETPOLINE=n) compiles this to: | endbr64 | mov (%rdi),%rax | ret | .Lextable_fixup: | mov $0x900d,%eax | ret ... retaining the expected assignment to 'val' in the error path. We don't have a complete and reasonable workaround. While placing empty asm("") blocks after each goto label *might* be sufficient, we don't know for certain, this is tedious and error-prone, and there doesn't seem to be a neat way to wrap this up (which is especially painful for cases with multiple goto labels). Avoid this issue by disabling CONFIG_CC_HAS_ASM_GOTO_OUTPUT for known-broken compiler versions and removing the workaround (along with the CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND config option). For the moment I've left the default implementation of asm_goto_output() unchanged. This should now be redundant since any compiler with the fix for the clobbering issue whould also have a fix for the (earlier) volatile issue, but it's far less churny to leave it around, which makes it easier to backport this patch if necessary. Signed-off-by: Mark Rutland Cc: Alex Coplan Cc: Catalin Marinas Cc: Jakub Jelinek Cc: Peter Zijlstra Cc: Sean Christopherson Cc: Szabolcs Nagy Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index aff92b1d284f..f805adaa316e 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -64,26 +64,6 @@ __builtin_unreachable(); \ } while (0) -/* - * GCC 'asm goto' with outputs miscompiles certain code sequences: - * - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 - * - * Work around it via the same compiler barrier quirk that we used - * to use for the old 'asm goto' workaround. - * - * Also, always mark such 'asm goto' statements as volatile: all - * asm goto statements are supposed to be volatile as per the - * documentation, but some versions of gcc didn't actually do - * that for asms with outputs: - * - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619 - */ -#ifdef CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND -#define asm_goto_output(x...) \ - do { asm volatile goto(x); asm (""); } while (0) -#endif - #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) #define __HAVE_BUILTIN_BSWAP32__ #define __HAVE_BUILTIN_BSWAP64__ -- cgit v1.2.3 From 7d189c77106ed6df09829f7a419e35ada67b2bd0 Mon Sep 17 00:00:00 2001 From: Shivamurthy Shastri Date: Wed, 26 Jun 2024 21:05:12 +0200 Subject: PCI/MSI: Provide MSI_FLAG_PCI_MSI_MASK_PARENT Most ARM(64) PCI/MSI domains mask and unmask in the parent domain after or before the PCI mask/unmask operation takes place. So there are more than a dozen of the same wrapper implementation all over the place. Don't make the same mistake with the new per device PCI/MSI domains and provide a new MSI feature flag, which lets the domain implementation enable this sequence in the PCI/MSI code. Signed-off-by: Shivamurthy Shastri Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/87ed8j34pj.ffs@tglx --- include/linux/msi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index dc27cf3903d5..04f33e7f6f8b 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -556,6 +556,8 @@ enum { MSI_FLAG_USE_DEV_FWNODE = (1 << 7), /* Set parent->dev into domain->pm_dev on device domain creation */ MSI_FLAG_PARENT_PM_DEV = (1 << 8), + /* Support for parent mask/unmask */ + MSI_FLAG_PCI_MSI_MASK_PARENT = (1 << 9), /* Mask for the generic functionality */ MSI_GENERIC_FLAGS_MASK = GENMASK(15, 0), -- cgit v1.2.3 From f6a9886a9e55a1e6bd78c7e505205d05ef50a71e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Jun 2024 17:18:50 +0200 Subject: genirq/msi: Remove platform_msi_create_device_domain() No more users. Signed-off-by: Thomas Gleixner Signed-off-by: Anna-Maria Behnsen Signed-off-by: Shivamurthy Shastri Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240623142235.395577449@linutronix.de --- include/linux/msi.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 04f33e7f6f8b..4ae036d0c7db 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -660,8 +660,6 @@ __platform_msi_create_device_domain(struct device *dev, const struct irq_domain_ops *ops, void *host_data); -#define platform_msi_create_device_domain(dev, nvec, write, ops, data) \ - __platform_msi_create_device_domain(dev, nvec, false, write, ops, data) #define platform_msi_create_device_tree_domain(dev, nvec, write, ops, data) \ __platform_msi_create_device_domain(dev, nvec, true, write, ops, data) -- cgit v1.2.3 From e9894248994ca8291838baf063f045eab28e5a0e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Jun 2024 17:19:05 +0200 Subject: genirq/msi: Remove platform MSI leftovers No more users! Signed-off-by: Thomas Gleixner Signed-off-by: Anna-Maria Behnsen Signed-off-by: Shivamurthy Shastri Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20240623142235.943295676@linutronix.de --- include/linux/msi.h | 30 ------------------------------ 1 file changed, 30 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 4ae036d0c7db..4c3462a6a97b 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -81,7 +81,6 @@ extern int pci_msi_ignore_mask; /* Helper functions */ struct msi_desc; struct pci_dev; -struct platform_msi_priv_data; struct device_attribute; struct irq_domain; struct irq_affinity_desc; @@ -231,14 +230,12 @@ struct msi_dev_domain { /** * msi_device_data - MSI per device data * @properties: MSI properties which are interesting to drivers - * @platform_data: Platform-MSI specific data * @mutex: Mutex protecting the MSI descriptor store * @__domains: Internal data for per device MSI domains * @__iter_idx: Index to search the next entry for iterators */ struct msi_device_data { unsigned long properties; - struct platform_msi_priv_data *platform_data; struct mutex mutex; struct msi_dev_domain __domains[MSI_MAX_DEVICE_IRQDOMAINS]; unsigned long __iter_idx; @@ -641,33 +638,6 @@ void msi_domain_free_irqs_all(struct device *dev, unsigned int domid); struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain); -struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode, - struct msi_domain_info *info, - struct irq_domain *parent); - -/* When an MSI domain is used as an intermediate domain */ -int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, - int nvec, msi_alloc_info_t *args); -int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, - int virq, int nvec, msi_alloc_info_t *args); -void msi_domain_depopulate_descs(struct device *dev, int virq, int nvec); - -struct irq_domain * -__platform_msi_create_device_domain(struct device *dev, - unsigned int nvec, - bool is_tree, - irq_write_msi_msg_t write_msi_msg, - const struct irq_domain_ops *ops, - void *host_data); - -#define platform_msi_create_device_tree_domain(dev, nvec, write, ops, data) \ - __platform_msi_create_device_domain(dev, nvec, true, write, ops, data) - -int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int virq, - unsigned int nr_irqs); -void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq, - unsigned int nvec); -void *platform_msi_get_host_data(struct irq_domain *domain); /* Per device platform MSI */ int platform_device_msi_init_and_alloc_irqs(struct device *dev, unsigned int nvec, irq_write_msi_msg_t write_msi_msg); -- cgit v1.2.3 From 2fdda02a8749fdaff5621c96aaf24a61d2f8c5a2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Jun 2024 17:19:07 +0200 Subject: genirq/msi: Move msi_device_data to core Now that the platform MSI hack is gone, nothing needs to know about struct msi_device_data outside of the core code. Signed-off-by: Thomas Gleixner Signed-off-by: Anna-Maria Behnsen Signed-off-by: Shivamurthy Shastri Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240623142236.003295177@linutronix.de --- include/linux/msi.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include') diff --git a/include/linux/msi.h b/include/linux/msi.h index 4c3462a6a97b..369367ecae5e 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -21,11 +21,7 @@ #include #include #include -#include -#include -#include #include -#include #include @@ -227,20 +223,6 @@ struct msi_dev_domain { struct irq_domain *domain; }; -/** - * msi_device_data - MSI per device data - * @properties: MSI properties which are interesting to drivers - * @mutex: Mutex protecting the MSI descriptor store - * @__domains: Internal data for per device MSI domains - * @__iter_idx: Index to search the next entry for iterators - */ -struct msi_device_data { - unsigned long properties; - struct mutex mutex; - struct msi_dev_domain __domains[MSI_MAX_DEVICE_IRQDOMAINS]; - unsigned long __iter_idx; -}; - int msi_setup_device_data(struct device *dev); void msi_lock_descs(struct device *dev); -- cgit v1.2.3 From a97b43fac5b9b3ffca71b8a917a249789902fce9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 18 Jul 2024 17:17:10 -0400 Subject: lockdep: Add comments for lockdep_set_no{validate,track}_class() Cc: Waiman Long Signed-off-by: Kent Overstreet --- include/linux/lockdep.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index b76f1bcd2f7f..0759e30df392 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -178,9 +178,24 @@ static inline void lockdep_init_map(struct lockdep_map *lock, const char *name, (lock)->dep_map.wait_type_outer, \ (lock)->dep_map.lock_type) +/** + * lockdep_set_novalidate_class: disable checking of lock ordering on a given + * lock + * @lock: Lock to mark + * + * Lockdep will still record that this lock has been taken, and print held + * instances when dumping locks + */ #define lockdep_set_novalidate_class(lock) \ lockdep_set_class_and_name(lock, &__lockdep_no_validate__, #lock) +/** + * lockdep_set_notrack_class: disable lockdep tracking of a given lock entirely + * @lock: Lock to mark + * + * Bigger hammer than lockdep_set_novalidate_class: so far just for bcachefs, + * which takes more locks than lockdep is able to track (48). + */ #define lockdep_set_notrack_class(lock) \ lockdep_set_class_and_name(lock, &__lockdep_no_track__, #lock) -- cgit v1.2.3 From a2d6d8aee4a4f694920bfaf6f6be3690e0f8d302 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 19 Jul 2024 11:56:34 +0200 Subject: ALSA: hda: tas2781: mark const variables as __maybe_unused An earlier patch changed the DECLARE_TLV_DB_SCALE declaration, but now there are additional static const variables that cause the same build warnings: In file included from sound/pci/hda/tas2781_hda_i2c.c:23: include/sound/tas2781-tlv.h:23:28: error: 'tas2563_dvc_table' defined but not used [-Werror=unused-const-variable=] 23 | static const unsigned char tas2563_dvc_table[][4] = { | ^~~~~~~~~~~~~~~~~ In file included from include/sound/tlv.h:10, from sound/pci/hda/tas2781_hda_i2c.c:22: include/sound/tas2781-tlv.h:20:35: error: 'tas2563_dvc_tlv' defined but not used [-Werror=unused-const-variable=] 20 | static const DECLARE_TLV_DB_SCALE(tas2563_dvc_tlv, -12150, 50, 1); | ^~~~~~~~~~~~~~~ Mark them all as unused as well. Signed-off-by: Arnd Bergmann Link: https://patch.msgid.link/20240719095640.3741247-1-arnd@kernel.org Signed-off-by: Takashi Iwai --- include/sound/tas2781-tlv.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/sound/tas2781-tlv.h b/include/sound/tas2781-tlv.h index 99c41bfc7827..00fd4d449ff3 100644 --- a/include/sound/tas2781-tlv.h +++ b/include/sound/tas2781-tlv.h @@ -16,11 +16,11 @@ #define __TAS2781_TLV_H__ static const __maybe_unused DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 100, 0); -static const DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0); -static const DECLARE_TLV_DB_SCALE(tas2563_dvc_tlv, -12150, 50, 1); +static const __maybe_unused DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0); +static const __maybe_unused DECLARE_TLV_DB_SCALE(tas2563_dvc_tlv, -12150, 50, 1); /* pow(10, db/20) * pow(2,30) */ -static const unsigned char tas2563_dvc_table[][4] = { +static const __maybe_unused unsigned char tas2563_dvc_table[][4] = { { 0X00, 0X00, 0X00, 0X00 }, /* -121.5db */ { 0X00, 0X00, 0X03, 0XBC }, /* -121.0db */ { 0X00, 0X00, 0X03, 0XF5 }, /* -120.5db */ -- cgit v1.2.3 From c8f51feee135f37f0d77b4616083c25524daa7b0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 19 Jul 2024 11:29:01 +0000 Subject: block: remove QUEUE_FLAG_STOPPED QUEUE_FLAG_STOPPED is entirely unused. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Bart Van Assche Reviewed-by: Johannes Thumshirn Signed-off-by: John Garry Link: https://lore.kernel.org/r/20240719112912.3830443-5-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index dce4a6bf7307..942ad4e0f231 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -588,7 +588,6 @@ struct request_queue { }; /* Keep blk_queue_flag_name[] in sync with the definitions below */ -#define QUEUE_FLAG_STOPPED 0 /* queue is stopped */ #define QUEUE_FLAG_DYING 1 /* queue being torn down */ #define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */ @@ -608,7 +607,6 @@ struct request_queue { void blk_queue_flag_set(unsigned int flag, struct request_queue *q); void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); -#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) #define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) -- cgit v1.2.3 From 3dff6155733f25872530ad358c6f5559800f4ccb Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 19 Jul 2024 11:29:02 +0000 Subject: block: Relocate BLK_MQ_CPU_WORK_BATCH BLK_MQ_CPU_WORK_BATCH is defined in include/linux/blk-mq.h, but only used in blk-mq.c, so relocate to block/blk-mq.h Signed-off-by: John Garry Link: https://lore.kernel.org/r/20240719112912.3830443-6-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 89ba6b16fe8b..df775a203a4d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -672,8 +672,6 @@ enum { BLK_MQ_S_INACTIVE = 3, BLK_MQ_MAX_DEPTH = 10240, - - BLK_MQ_CPU_WORK_BATCH = 8, }; #define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \ ((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \ -- cgit v1.2.3 From 793356d23f8a817e164a917c792741a6d6d651ed Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 19 Jul 2024 11:29:03 +0000 Subject: block: Relocate BLK_MQ_MAX_DEPTH BLK_MQ_MAX_DEPTH is defined as an enumerated value, but has no real relation to the other members in its enum, so just use #define to provide the definition. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20240719112912.3830443-7-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index df775a203a4d..8a84f49468d5 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -670,8 +670,6 @@ enum { /* hw queue is inactive after all its CPUs become offline */ BLK_MQ_S_INACTIVE = 3, - - BLK_MQ_MAX_DEPTH = 10240, }; #define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \ ((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \ @@ -680,6 +678,7 @@ enum { ((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \ << BLK_MQ_F_ALLOC_POLICY_START_BIT) +#define BLK_MQ_MAX_DEPTH (10240) #define BLK_MQ_NO_HCTX_IDX (-1U) struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, -- cgit v1.2.3 From 55177adf1837bc56f878f7f6f7123947a2088148 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 19 Jul 2024 11:29:04 +0000 Subject: block: Make QUEUE_FLAG_x as an enum This will allow us better keep in sync with blk_queue_flag_name[]. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20240719112912.3830443-8-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 942ad4e0f231..ded46fad67a0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -588,19 +588,22 @@ struct request_queue { }; /* Keep blk_queue_flag_name[] in sync with the definitions below */ -#define QUEUE_FLAG_DYING 1 /* queue being torn down */ -#define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */ -#define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */ -#define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */ -#define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ -#define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ -#define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ -#define QUEUE_FLAG_STATS 20 /* track IO start and completion times */ -#define QUEUE_FLAG_REGISTERED 22 /* queue has been registered to a disk */ -#define QUEUE_FLAG_QUIESCED 24 /* queue has been quiesced */ -#define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ -#define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ -#define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ +enum { + QUEUE_FLAG_DYING, /* queue being torn down */ + QUEUE_FLAG_NOMERGES, /* disable merge attempts */ + QUEUE_FLAG_SAME_COMP, /* complete on same CPU-group */ + QUEUE_FLAG_FAIL_IO, /* fake timeout */ + QUEUE_FLAG_NOXMERGES, /* No extended merges */ + QUEUE_FLAG_SAME_FORCE, /* force complete on same CPU */ + QUEUE_FLAG_INIT_DONE, /* queue is initialized */ + QUEUE_FLAG_STATS, /* track IO start and completion times */ + QUEUE_FLAG_REGISTERED, /* queue has been registered to a disk */ + QUEUE_FLAG_QUIESCED, /* queue has been quiesced */ + QUEUE_FLAG_RQ_ALLOC_TIME, /* record rq->alloc_time_ns */ + QUEUE_FLAG_HCTX_ACTIVE, /* at least one blk-mq hctx is active */ + QUEUE_FLAG_SQ_SCHED, /* single queue style io dispatch */ + QUEUE_FLAG_MAX +}; #define QUEUE_FLAG_MQ_DEFAULT (1UL << QUEUE_FLAG_SAME_COMP) -- cgit v1.2.3 From 23827310cce7eff3477aeaeb59ea3718f5c9c633 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 19 Jul 2024 11:29:06 +0000 Subject: block: Catch possible entries missing from hctx_state_name[] Add a build-time assert that we are not missing entries from hctx_state_name[]. For this, create a separate enum for state flags and add a "max" entry for BLK_MQ_S_x flags. The numbering for those enum values is as default, so don't explicitly number. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20240719112912.3830443-10-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 8a84f49468d5..4905a1d67551 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -663,13 +663,6 @@ enum { BLK_MQ_F_NO_SCHED_BY_DEFAULT = 1 << 7, BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, BLK_MQ_F_ALLOC_POLICY_BITS = 1, - - BLK_MQ_S_STOPPED = 0, - BLK_MQ_S_TAG_ACTIVE = 1, - BLK_MQ_S_SCHED_RESTART = 2, - - /* hw queue is inactive after all its CPUs become offline */ - BLK_MQ_S_INACTIVE = 3, }; #define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \ ((flags >> BLK_MQ_F_ALLOC_POLICY_START_BIT) & \ @@ -681,6 +674,16 @@ enum { #define BLK_MQ_MAX_DEPTH (10240) #define BLK_MQ_NO_HCTX_IDX (-1U) +enum { + /* Keep hctx_state_name[] in sync with the definitions below */ + BLK_MQ_S_STOPPED, + BLK_MQ_S_TAG_ACTIVE, + BLK_MQ_S_SCHED_RESTART, + /* hw queue is inactive after all its CPUs become offline */ + BLK_MQ_S_INACTIVE, + BLK_MQ_S_MAX +}; + struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, struct queue_limits *lim, void *queuedata, struct lock_class_key *lkclass); -- cgit v1.2.3 From 226f0f6afc3e5c8903c6e57e1f6073ad8ad189b5 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 19 Jul 2024 11:29:07 +0000 Subject: block: Catch possible entries missing from hctx_flag_name[] Refresh values in BLK_MQ_F_x enum, and then re-arrange members in hctx_flag_name[] to match that enum. Renumber BLK_MQ_F_ALLOC_POLICY_START_BIT to match the value refresh. Add a BUILD_BUG_ON() call to ensure that we are not missing entries in hctx_flag_name[]. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20240719112912.3830443-11-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 4905a1d67551..27241009c8f9 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -644,6 +644,7 @@ struct blk_mq_ops { #endif }; +/* Keep hctx_flag_name[] in sync with the definitions below */ enum { BLK_MQ_F_SHOULD_MERGE = 1 << 0, BLK_MQ_F_TAG_QUEUE_SHARED = 1 << 1, @@ -653,15 +654,16 @@ enum { */ BLK_MQ_F_STACKING = 1 << 2, BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3, - BLK_MQ_F_BLOCKING = 1 << 5, + BLK_MQ_F_BLOCKING = 1 << 4, /* Do not allow an I/O scheduler to be configured. */ - BLK_MQ_F_NO_SCHED = 1 << 6, + BLK_MQ_F_NO_SCHED = 1 << 5, + /* * Select 'none' during queue registration in case of a single hwq * or shared hwqs instead of 'mq-deadline'. */ - BLK_MQ_F_NO_SCHED_BY_DEFAULT = 1 << 7, - BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, + BLK_MQ_F_NO_SCHED_BY_DEFAULT = 1 << 6, + BLK_MQ_F_ALLOC_POLICY_START_BIT = 7, BLK_MQ_F_ALLOC_POLICY_BITS = 1, }; #define BLK_MQ_FLAG_TO_ALLOC_POLICY(flags) \ -- cgit v1.2.3 From 26d3bdb57ec3fa56eaf8d2e74b5d488e55f43013 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 19 Jul 2024 11:29:08 +0000 Subject: block: Catch possible entries missing from alloc_policy_name[] Make BLK_TAG_ALLOC_x an enum and add a "max" entry. Add a BUILD_BUG_ON() call to ensure that we are not missing entries in hctx_flag_name[]. Reviewed-by: Bart Van Assche Signed-off-by: John Garry Link: https://lore.kernel.org/r/20240719112912.3830443-12-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 27241009c8f9..a64a50a0edf7 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -278,8 +278,12 @@ enum blk_eh_timer_return { BLK_EH_RESET_TIMER, }; -#define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */ -#define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */ +/* Keep alloc_policy_name[] in sync with the definitions below */ +enum { + BLK_TAG_ALLOC_FIFO, /* allocate starting from 0 */ + BLK_TAG_ALLOC_RR, /* allocate starting from last allocated tag */ + BLK_TAG_ALLOC_MAX +}; /** * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware -- cgit v1.2.3 From 6fa99325ec86bcd442363d77561a1babd8d9a427 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 19 Jul 2024 11:29:09 +0000 Subject: block: Catch possible entries missing from cmd_flag_name[] Add a BUILD_BUG_ON() call to ensure that we are not missing entries in cmd_flag_name[]. Reviewed-by: Bart Van Assche Signed-off-by: John Garry Link: https://lore.kernel.org/r/20240719112912.3830443-13-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 632edd71f8c6..36ed96133217 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -354,6 +354,7 @@ enum req_op { REQ_OP_LAST = (__force blk_opf_t)36, }; +/* Keep cmd_flag_name[] in sync with the definitions below */ enum req_flag_bits { __REQ_FAILFAST_DEV = /* no driver retries of device errors */ REQ_OP_BITS, -- cgit v1.2.3 From 5f89154e8e9e3445f9b592e58a7045e06153b822 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 19 Jul 2024 11:29:10 +0000 Subject: block: Use enum to define RQF_x bit indexes Similar to what we do for enum req_flag_bits, divide the definition of RQF_x flags into an enum to declare the bits and an actual flag. Tweak some comments to not spill onto new lines. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20240719112912.3830443-14-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 86 +++++++++++++++++++++++++++++++------------------- 1 file changed, 54 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index a64a50a0edf7..af52ec6a1ed5 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -27,38 +27,60 @@ typedef enum rq_end_io_ret (rq_end_io_fn)(struct request *, blk_status_t); * request flags */ typedef __u32 __bitwise req_flags_t; -/* drive already may have started this one */ -#define RQF_STARTED ((__force req_flags_t)(1 << 1)) -/* request for flush sequence */ -#define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << 4)) -/* merge of different types, fail separately */ -#define RQF_MIXED_MERGE ((__force req_flags_t)(1 << 5)) -/* don't call prep for this one */ -#define RQF_DONTPREP ((__force req_flags_t)(1 << 7)) -/* use hctx->sched_tags */ -#define RQF_SCHED_TAGS ((__force req_flags_t)(1 << 8)) -/* use an I/O scheduler for this request */ -#define RQF_USE_SCHED ((__force req_flags_t)(1 << 9)) -/* vaguely specified driver internal error. Ignored by the block layer */ -#define RQF_FAILED ((__force req_flags_t)(1 << 10)) -/* don't warn about errors */ -#define RQF_QUIET ((__force req_flags_t)(1 << 11)) -/* account into disk and partition IO statistics */ -#define RQF_IO_STAT ((__force req_flags_t)(1 << 13)) -/* runtime pm request */ -#define RQF_PM ((__force req_flags_t)(1 << 15)) -/* on IO scheduler merge hash */ -#define RQF_HASHED ((__force req_flags_t)(1 << 16)) -/* track IO completion time */ -#define RQF_STATS ((__force req_flags_t)(1 << 17)) -/* Look at ->special_vec for the actual data payload instead of the - bio chain. */ -#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18)) -/* The request completion needs to be signaled to zone write pluging. */ -#define RQF_ZONE_WRITE_PLUGGING ((__force req_flags_t)(1 << 20)) -/* ->timeout has been called, don't expire again */ -#define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21)) -#define RQF_RESV ((__force req_flags_t)(1 << 23)) +enum { + /* drive already may have started this one */ + __RQF_STARTED, + /* request for flush sequence */ + __RQF_FLUSH_SEQ, + /* merge of different types, fail separately */ + __RQF_MIXED_MERGE, + /* don't call prep for this one */ + __RQF_DONTPREP, + /* use hctx->sched_tags */ + __RQF_SCHED_TAGS, + /* use an I/O scheduler for this request */ + __RQF_USE_SCHED, + /* vaguely specified driver internal error. Ignored by block layer */ + __RQF_FAILED, + /* don't warn about errors */ + __RQF_QUIET, + /* account into disk and partition IO statistics */ + __RQF_IO_STAT, + /* runtime pm request */ + __RQF_PM, + /* on IO scheduler merge hash */ + __RQF_HASHED, + /* track IO completion time */ + __RQF_STATS, + /* Look at ->special_vec for the actual data payload instead of the + bio chain. */ + __RQF_SPECIAL_PAYLOAD, + /* request completion needs to be signaled to zone write plugging. */ + __RQF_ZONE_WRITE_PLUGGING, + /* ->timeout has been called, don't expire again */ + __RQF_TIMED_OUT, + __RQF_RESV, + __RQF_BITS +}; + +#define RQF_STARTED ((__force req_flags_t)(1 << __RQF_STARTED)) +#define RQF_FLUSH_SEQ ((__force req_flags_t)(1 << __RQF_FLUSH_SEQ)) +#define RQF_MIXED_MERGE ((__force req_flags_t)(1 << __RQF_MIXED_MERGE)) +#define RQF_DONTPREP ((__force req_flags_t)(1 << __RQF_DONTPREP)) +#define RQF_SCHED_TAGS ((__force req_flags_t)(1 << __RQF_SCHED_TAGS)) +#define RQF_USE_SCHED ((__force req_flags_t)(1 << __RQF_USE_SCHED)) +#define RQF_FAILED ((__force req_flags_t)(1 << __RQF_FAILED)) +#define RQF_QUIET ((__force req_flags_t)(1 << __RQF_QUIET)) +#define RQF_IO_STAT ((__force req_flags_t)(1 << __RQF_IO_STAT)) +#define RQF_PM ((__force req_flags_t)(1 << __RQF_PM)) +#define RQF_HASHED ((__force req_flags_t)(1 << __RQF_HASHED)) +#define RQF_STATS ((__force req_flags_t)(1 << __RQF_STATS)) +#define RQF_SPECIAL_PAYLOAD \ + ((__force req_flags_t)(1 << __RQF_SPECIAL_PAYLOAD)) +#define RQF_ZONE_WRITE_PLUGGING \ + ((__force req_flags_t)(1 << __RQF_ZONE_WRITE_PLUGGING)) +#define RQF_TIMED_OUT ((__force req_flags_t)(1 << __RQF_TIMED_OUT)) +#define RQF_RESV ((__force req_flags_t)(1 << __RQF_RESV)) /* flags that prevent us from merging requests: */ #define RQF_NOMERGE_FLAGS \ -- cgit v1.2.3 From 8a47e33f50dd779f94bc277c6d3de81672463c5e Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 19 Jul 2024 11:29:12 +0000 Subject: block: Catch possible entries missing from rqf_name[] Add a BUILD_BUG_ON() call to ensure that we are not missing entries in rqf_name[]. Reviewed-by: Bart Van Assche Signed-off-by: John Garry Link: https://lore.kernel.org/r/20240719112912.3830443-16-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index af52ec6a1ed5..8d304b1d16b1 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -27,6 +27,7 @@ typedef enum rq_end_io_ret (rq_end_io_fn)(struct request *, blk_status_t); * request flags */ typedef __u32 __bitwise req_flags_t; +/* Keep rqf_name[] in sync with the definitions below */ enum { /* drive already may have started this one */ __RQF_STARTED, -- cgit v1.2.3 From 72d04bdcf3f7d7e07d82f9757946f68802a7270a Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Tue, 16 Jul 2024 16:26:27 +0800 Subject: sbitmap: fix io hung due to race on sbitmap_word::cleared Configuration for sbq: depth=64, wake_batch=6, shift=6, map_nr=1 1. There are 64 requests in progress: map->word = 0xFFFFFFFFFFFFFFFF 2. After all the 64 requests complete, and no more requests come: map->word = 0xFFFFFFFFFFFFFFFF, map->cleared = 0xFFFFFFFFFFFFFFFF 3. Now two tasks try to allocate requests: T1: T2: __blk_mq_get_tag . __sbitmap_queue_get . sbitmap_get . sbitmap_find_bit . sbitmap_find_bit_in_word . __sbitmap_get_word -> nr=-1 __blk_mq_get_tag sbitmap_deferred_clear __sbitmap_queue_get /* map->cleared=0xFFFFFFFFFFFFFFFF */ sbitmap_find_bit if (!READ_ONCE(map->cleared)) sbitmap_find_bit_in_word return false; __sbitmap_get_word -> nr=-1 mask = xchg(&map->cleared, 0) sbitmap_deferred_clear atomic_long_andnot() /* map->cleared=0 */ if (!(map->cleared)) return false; /* * map->cleared is cleared by T1 * T2 fail to acquire the tag */ 4. T2 is the sole tag waiter. When T1 puts the tag, T2 cannot be woken up due to the wake_batch being set at 6. If no more requests come, T1 will wait here indefinitely. This patch achieves two purposes: 1. Check on ->cleared and update on both ->cleared and ->word need to be done atomically, and using spinlock could be the simplest solution. 2. Add extra check in sbitmap_deferred_clear(), to identify whether ->word has free bits. Fixes: ea86ea2cdced ("sbitmap: ammortize cost of clearing bits") Signed-off-by: Yang Yang Reviewed-by: Ming Lei Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20240716082644.659566-1-yang.yang@vivo.com Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index d662cf136021..c09cdcc99471 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -36,6 +36,11 @@ struct sbitmap_word { * @cleared: word holding cleared bits */ unsigned long cleared ____cacheline_aligned_in_smp; + + /** + * @swap_lock: serializes simultaneous updates of ->word and ->cleared + */ + spinlock_t swap_lock; } ____cacheline_aligned_in_smp; /** -- cgit v1.2.3 From 89ed6c9ac69ec398ccb648f5f675b43e8ca679ca Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Tue, 16 Jul 2024 13:30:58 +0000 Subject: blk-cgroup: move congestion_count to struct blkcg The congestion_count was introduced into the struct cgroup by commit d09d8df3a294 ("blkcg: add generic throttling mechanism"), but since it is closely related to the blkio subsys, it is not appropriate to put it in the struct cgroup, so let's move it to struct blkcg. There should be no functional changes because blkcg is per cgroup. Signed-off-by: Xiu Jianfeng Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20240716133058.3491350-1-xiujianfeng@huawei.com Signed-off-by: Jens Axboe --- include/linux/cgroup-defs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index ea48c861cd36..ec82c4b7ed4d 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -534,9 +534,6 @@ struct cgroup { /* used to store eBPF programs */ struct cgroup_bpf bpf; - /* If there is block congestion on this cgroup. */ - atomic_t congestion_count; - /* Used to store internal freezer state */ struct cgroup_freezer_state freezer; -- cgit v1.2.3 From 9651fcedf7b92d3f7f1ab179e8ab55b85ee10fc1 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 8 Dec 2022 17:55:04 +0100 Subject: mm: add MAP_DROPPABLE for designating always lazily freeable mappings The vDSO getrandom() implementation works with a buffer allocated with a new system call that has certain requirements: - It shouldn't be written to core dumps. * Easy: VM_DONTDUMP. - It should be zeroed on fork. * Easy: VM_WIPEONFORK. - It shouldn't be written to swap. * Uh-oh: mlock is rlimited. * Uh-oh: mlock isn't inherited by forks. - It shouldn't reserve actual memory, but it also shouldn't crash when page faulting in memory if none is available * Uh-oh: VM_NORESERVE means segfaults. It turns out that the vDSO getrandom() function has three really nice characteristics that we can exploit to solve this problem: 1) Due to being wiped during fork(), the vDSO code is already robust to having the contents of the pages it reads zeroed out midway through the function's execution. 2) In the absolute worst case of whatever contingency we're coding for, we have the option to fallback to the getrandom() syscall, and everything is fine. 3) The buffers the function uses are only ever useful for a maximum of 60 seconds -- a sort of cache, rather than a long term allocation. These characteristics mean that we can introduce VM_DROPPABLE, which has the following semantics: a) It never is written out to swap. b) Under memory pressure, mm can just drop the pages (so that they're zero when read back again). c) It is inherited by fork. d) It doesn't count against the mlock budget, since nothing is locked. e) If there's not enough memory to service a page fault, it's not fatal, and no signal is sent. This way, allocations used by vDSO getrandom() can use: VM_DROPPABLE | VM_DONTDUMP | VM_WIPEONFORK | VM_NORESERVE And there will be no problem with OOMing, crashing on overcommitment, using memory when not in use, not wiping on fork(), coredumps, or writing out to swap. In order to let vDSO getrandom() use this, expose these via mmap(2) as MAP_DROPPABLE. Note that this involves removing the MADV_FREE special case from sort_folio(), which according to Yu Zhao is unnecessary and will simply result in an extra call to shrink_folio_list() in the worst case. The chunk removed reenables the swapbacked flag, which we don't want for VM_DROPPABLE, and we can't conditionalize it here because there isn't a vma reference available. Finally, the provided self test ensures that this is working as desired. Cc: linux-mm@kvack.org Acked-by: David Hildenbrand Signed-off-by: Jason A. Donenfeld --- include/linux/mm.h | 7 +++++++ include/linux/userfaultfd_k.h | 3 +++ include/trace/events/mmflags.h | 7 +++++++ include/uapi/linux/mman.h | 1 + 4 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index eb7c96d24ac0..e078c2890bf8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -406,6 +406,13 @@ extern unsigned int kobjsize(const void *objp); #define VM_ALLOW_ANY_UNCACHED VM_NONE #endif +#ifdef CONFIG_64BIT +#define VM_DROPPABLE_BIT 40 +#define VM_DROPPABLE BIT(VM_DROPPABLE_BIT) +#else +#define VM_DROPPABLE VM_NONE +#endif + #ifdef CONFIG_64BIT /* VM is sealed, in vm_flags */ #define VM_SEALED _BITUL(63) diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 05d59f74fc88..a12bcf042551 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -218,6 +218,9 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma, { vm_flags &= __VM_UFFD_FLAGS; + if (vm_flags & VM_DROPPABLE) + return false; + if ((vm_flags & VM_UFFD_MINOR) && (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma))) return false; diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index e46d6e82765e..b63d211bd141 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -165,6 +165,12 @@ IF_HAVE_PG_ARCH_X(arch_3) # define IF_HAVE_UFFD_MINOR(flag, name) #endif +#ifdef CONFIG_64BIT +# define IF_HAVE_VM_DROPPABLE(flag, name) {flag, name}, +#else +# define IF_HAVE_VM_DROPPABLE(flag, name) +#endif + #define __def_vmaflag_names \ {VM_READ, "read" }, \ {VM_WRITE, "write" }, \ @@ -197,6 +203,7 @@ IF_HAVE_VM_SOFTDIRTY(VM_SOFTDIRTY, "softdirty" ) \ {VM_MIXEDMAP, "mixedmap" }, \ {VM_HUGEPAGE, "hugepage" }, \ {VM_NOHUGEPAGE, "nohugepage" }, \ +IF_HAVE_VM_DROPPABLE(VM_DROPPABLE, "droppable" ) \ {VM_MERGEABLE, "mergeable" } \ #define show_vma_flags(flags) \ diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h index a246e11988d5..e89d00528f2f 100644 --- a/include/uapi/linux/mman.h +++ b/include/uapi/linux/mman.h @@ -17,6 +17,7 @@ #define MAP_SHARED 0x01 /* Share changes */ #define MAP_PRIVATE 0x02 /* Changes are private */ #define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ +#define MAP_DROPPABLE 0x08 /* Zero memory under memory pressure. */ /* * Huge page size encoding when MAP_HUGETLB is specified, and a huge page -- cgit v1.2.3 From 4ad10a5f5f78a5b3e525a63bd075a4eb1139dde1 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 18 Nov 2022 17:23:34 +0100 Subject: random: introduce generic vDSO getrandom() implementation Provide a generic C vDSO getrandom() implementation, which operates on an opaque state returned by vgetrandom_alloc() and produces random bytes the same way as getrandom(). This has the following API signature: ssize_t vgetrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len); The return value and the first three arguments are the same as ordinary getrandom(), while the last two arguments are a pointer to the opaque allocated state and its size. Were all five arguments passed to the getrandom() syscall, nothing different would happen, and the functions would have the exact same behavior. The actual vDSO RNG algorithm implemented is the same one implemented by drivers/char/random.c, using the same fast-erasure techniques as that. Should the in-kernel implementation change, so too will the vDSO one. It requires an implementation of ChaCha20 that does not use any stack, in order to maintain forward secrecy if a multi-threaded program forks (though this does not account for a similar issue with SA_SIGINFO copying registers to the stack), so this is left as an architecture-specific fill-in. Stack-less ChaCha20 is an easy algorithm to implement on a variety of architectures, so this shouldn't be too onerous. Initially, the state is keyless, and so the first call makes a getrandom() syscall to generate that key, and then uses it for subsequent calls. By keeping track of a generation counter, it knows when its key is invalidated and it should fetch a new one using the syscall. Later, more than just a generation counter might be used. Since MADV_WIPEONFORK is set on the opaque state, the key and related state is wiped during a fork(), so secrets don't roll over into new processes, and the same state doesn't accidentally generate the same random stream. The generation counter, as well, is always >0, so that the 0 counter is a useful indication of a fork() or otherwise uninitialized state. If the kernel RNG is not yet initialized, then the vDSO always calls the syscall, because that behavior cannot be emulated in userspace, but fortunately that state is short lived and only during early boot. If it has been initialized, then there is no need to inspect the `flags` argument, because the behavior does not change post-initialization regardless of the `flags` value. Since the opaque state passed to it is mutated, vDSO getrandom() is not reentrant, when used with the same opaque state, which libc should be mindful of. The function works over an opaque per-thread state of a particular size, which must be marked VM_WIPEONFORK, VM_DONTDUMP, VM_NORESERVE, and VM_DROPPABLE for proper operation. Over time, the nuances of these allocations may change or grow or even differ based on architectural features. The opaque state passed to vDSO getrandom() must be allocated using the mmap_flags and mmap_prot parameters provided by the vgetrandom_opaque_params struct, which also contains the size of each state. That struct can be obtained with a call to vgetrandom(NULL, 0, 0, ¶ms, ~0UL). Then, libc can call mmap(2) and slice up the returned array into a state per each thread, while ensuring that no single state straddles a page boundary. Libc is expected to allocate a chunk of these on first use, and then dole them out to threads as they're created, allocating more when needed. vDSO getrandom() provides the ability for userspace to generate random bytes quickly and safely, and is intended to be integrated into libc's thread management. As an illustrative example, the introduced code in the vdso_test_getrandom self test later in this series might be used to do the same outside of libc. In a libc the various pthread-isms are expected to be elided into libc internals. Reviewed-by: Thomas Gleixner Signed-off-by: Jason A. Donenfeld --- include/uapi/linux/random.h | 15 +++++++++++++++ include/vdso/datapage.h | 11 +++++++++++ include/vdso/getrandom.h | 46 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+) create mode 100644 include/vdso/getrandom.h (limited to 'include') diff --git a/include/uapi/linux/random.h b/include/uapi/linux/random.h index e744c23582eb..2a3fe4c2cdc9 100644 --- a/include/uapi/linux/random.h +++ b/include/uapi/linux/random.h @@ -55,4 +55,19 @@ struct rand_pool_info { #define GRND_RANDOM 0x0002 #define GRND_INSECURE 0x0004 +/** + * struct vgetrandom_opaque_params - arguments for allocating memory for vgetrandom + * + * @size_per_opaque_state: Size of each state that is to be passed to vgetrandom(). + * @mmap_prot: Value of the prot argument in mmap(2). + * @mmap_flags: Value of the flags argument in mmap(2). + * @reserved: Reserved for future use. + */ +struct vgetrandom_opaque_params { + __u32 size_of_opaque_state; + __u32 mmap_prot; + __u32 mmap_flags; + __u32 reserved[13]; +}; + #endif /* _UAPI_LINUX_RANDOM_H */ diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index d04d394db064..05e5787beb73 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -113,6 +113,16 @@ struct vdso_data { struct arch_vdso_data arch_data; }; +/** + * struct vdso_rng_data - vdso RNG state information + * @generation: counter representing the number of RNG reseeds + * @is_ready: boolean signaling whether the RNG is initialized + */ +struct vdso_rng_data { + u64 generation; + u8 is_ready; +}; + /* * We use the hidden visibility to prevent the compiler from generating a GOT * relocation. Not only is going through a GOT useless (the entry couldn't and @@ -124,6 +134,7 @@ struct vdso_data { */ extern struct vdso_data _vdso_data[CS_BASES] __attribute__((visibility("hidden"))); extern struct vdso_data _timens_data[CS_BASES] __attribute__((visibility("hidden"))); +extern struct vdso_rng_data _vdso_rng_data __attribute__((visibility("hidden"))); /** * union vdso_data_store - Generic vDSO data page diff --git a/include/vdso/getrandom.h b/include/vdso/getrandom.h new file mode 100644 index 000000000000..a8b7c14b0ae0 --- /dev/null +++ b/include/vdso/getrandom.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022-2024 Jason A. Donenfeld . All Rights Reserved. + */ + +#ifndef _VDSO_GETRANDOM_H +#define _VDSO_GETRANDOM_H + +#include + +#define CHACHA_KEY_SIZE 32 +#define CHACHA_BLOCK_SIZE 64 + +/** + * struct vgetrandom_state - State used by vDSO getrandom(). + * + * @batch: One and a half ChaCha20 blocks of buffered RNG output. + * + * @key: Key to be used for generating next batch. + * + * @batch_key: Union of the prior two members, which is exactly two full + * ChaCha20 blocks in size, so that @batch and @key can be filled + * together. + * + * @generation: Snapshot of @rng_info->generation in the vDSO data page at + * the time @key was generated. + * + * @pos: Offset into @batch of the next available random byte. + * + * @in_use: Reentrancy guard for reusing a state within the same thread + * due to signal handlers. + */ +struct vgetrandom_state { + union { + struct { + u8 batch[CHACHA_BLOCK_SIZE * 3 / 2]; + u32 key[CHACHA_KEY_SIZE / sizeof(u32)]; + }; + u8 batch_key[CHACHA_BLOCK_SIZE * 2]; + }; + u64 generation; + u8 pos; + bool in_use; +}; + +#endif /* _VDSO_GETRANDOM_H */ -- cgit v1.2.3 From 13f75d9ecf3d8efcf597dfcd8f7be7460cdaa11a Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 15 Jul 2024 04:50:07 +0200 Subject: random: note that RNDGETPOOL was removed in 2.6.9-rc2 RNDGETPOOL was thankfully removed twenty years ago, but it's stuck around in headers. Probably removing it from uapi headers isn't great in case there are some weird users out there, but we should at least mark this as having been removed, to save future readers the same goose chase I just went on. Link: https://lore.kernel.org/all/E1By1St-0001TS-Qj@thunk.org/ Link: https://lore.kernel.org/all/Pine.LNX.4.58.0409130937050.4094@ppc970.osdl.org/ Signed-off-by: Jason A. Donenfeld --- include/uapi/linux/random.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/random.h b/include/uapi/linux/random.h index 2a3fe4c2cdc9..1dd047ec98a1 100644 --- a/include/uapi/linux/random.h +++ b/include/uapi/linux/random.h @@ -20,7 +20,7 @@ /* Add to (or subtract from) the entropy count. (Superuser only.) */ #define RNDADDTOENTCNT _IOW( 'R', 0x01, int ) -/* Get the contents of the entropy pool. (Superuser only.) */ +/* Get the contents of the entropy pool. (Superuser only.) (Removed in 2.6.9-rc2.) */ #define RNDGETPOOL _IOR( 'R', 0x02, int [2] ) /* -- cgit v1.2.3 From 45b8ee7182d5ef8df6959297046f86dc128d6a06 Mon Sep 17 00:00:00 2001 From: Bastien Curutchet Date: Mon, 17 Jun 2024 14:08:18 +0200 Subject: i2c: mux: gpio: Add support for the 'settle-time-us' property Some hardware need some time to switch from a bus to another. This can cause the first transfers following the selection of a bus to fail. There is no way to configure this kind of waiting time in the driver. Add support for the 'settle-time-us' device-tree property. When set, the i2c_mux_gpio_select() applies a delay before returning, leaving enough time to the hardware to switch to the new bus. Signed-off-by: Bastien Curutchet Reviewed-by: Andi Shyti Acked-by: Peter Rosin Signed-off-by: Andi Shyti --- include/linux/platform_data/i2c-mux-gpio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/i2c-mux-gpio.h b/include/linux/platform_data/i2c-mux-gpio.h index 816a4cd3ccb5..96843aab4d1e 100644 --- a/include/linux/platform_data/i2c-mux-gpio.h +++ b/include/linux/platform_data/i2c-mux-gpio.h @@ -19,6 +19,7 @@ * position * @n_values: Number of multiplexer positions (busses to instantiate) * @idle: Bitmask to write to MUX when idle or GPIO_I2CMUX_NO_IDLE if not used + * @settle_time: Delay to wait when a new bus is selected */ struct i2c_mux_gpio_platform_data { int parent; @@ -26,6 +27,7 @@ struct i2c_mux_gpio_platform_data { const unsigned *values; int n_values; unsigned idle; + u32 settle_time; }; #endif /* _LINUX_I2C_MUX_GPIO_H */ -- cgit v1.2.3 From d83763e44944d4d6dee38099c5d0a0984ac66385 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 16 Jul 2024 10:36:24 +0200 Subject: i2c: header: remove unneeded stuff regarding i2c_algorithm The forward declaration is not needed anymore. The sentence about "following structs" became obsolete when struct i2c_algorithm became a kdoc. The paragraph about return values can go because we have this information in kdoc already. Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index e9cc14b1f9a1..1e34b486f604 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -30,7 +30,6 @@ extern const struct device_type i2c_client_type; /* --- General options ------------------------------------------------ */ struct i2c_msg; -struct i2c_algorithm; struct i2c_adapter; struct i2c_client; struct i2c_driver; @@ -533,8 +532,6 @@ i2c_register_board_info(int busnum, struct i2c_board_info const *info, * @reg_slave: deprecated, use @reg_target * @unreg_slave: deprecated, use @unreg_target * - * - * The following structs are for those who like to implement new bus drivers: * i2c_algorithm is the interface to a class of hardware solutions which can * be addressed using the same bus algorithms - i.e. bit-banging or the PCF8584 * to name two of the most common. @@ -550,9 +547,6 @@ struct i2c_algorithm { * to NULL. If an adapter algorithm can do SMBus access, set * smbus_xfer. If set to NULL, the SMBus protocol is simulated * using common I2C messages. - * - * xfer should return the number of messages successfully - * processed, or a negative value on error */ union { int (*xfer)(struct i2c_adapter *adap, struct i2c_msg *msgs, -- cgit v1.2.3 From 385ac870bdd531348de123d6790626ccd7827f69 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 16 Jul 2024 10:36:25 +0200 Subject: i2c: header: improve kdoc for i2c_algorithm Reword the explanation of @xfer, the old one was confusing and mixing up terminology. Other than that, capitalize some words correctly and use full line length. Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 1e34b486f604..8caaa13834bf 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -511,16 +511,15 @@ i2c_register_board_info(int busnum, struct i2c_board_info const *info, #endif /* I2C_BOARDINFO */ /** - * struct i2c_algorithm - represent I2C transfer method - * @xfer: Issue a set of i2c transactions to the given I2C adapter - * defined by the msgs array, with num messages available to transfer via - * the adapter specified by adap. - * @xfer_atomic: same as @xfer. Yet, only using atomic context - * so e.g. PMICs can be accessed very late before shutdown. Optional. - * @smbus_xfer: Issue smbus transactions to the given I2C adapter. If this + * struct i2c_algorithm - represent I2C transfer methods + * @xfer: Transfer a given number of messages defined by the msgs array via + * the specified adapter. + * @xfer_atomic: Same as @xfer. Yet, only using atomic context so e.g. PMICs + * can be accessed very late before shutdown. Optional. + * @smbus_xfer: Issue SMBus transactions to the given I2C adapter. If this * is not present, then the bus layer will try and convert the SMBus calls * into I2C transfers instead. - * @smbus_xfer_atomic: same as @smbus_xfer. Yet, only using atomic context + * @smbus_xfer_atomic: Same as @smbus_xfer. Yet, only using atomic context * so e.g. PMICs can be accessed very late before shutdown. Optional. * @functionality: Return the flags that this algorithm/adapter pair supports * from the ``I2C_FUNC_*`` flags. -- cgit v1.2.3 From eabd9db64ea8ba64d2a0b1d70da38e1a95dcd08b Mon Sep 17 00:00:00 2001 From: Haibo Xu Date: Thu, 13 Jun 2024 16:54:33 +0800 Subject: ACPI: RISCV: Add NUMA support based on SRAT and SLIT Add acpi_numa.c file to enable parse NUMA information from ACPI SRAT and SLIT tables. SRAT table provide CPUs(Hart) and memory nodes to proximity domain mapping, while SLIT table provide the distance metrics between proximity domains. Signed-off-by: Haibo Xu Reviewed-by: Sunil V L Reviewed-by: Hanjun Guo Link: https://lore.kernel.org/r/65dbad1fda08a32922c44886e4581e49b4a2fecc.1718268003.git.haibo1.xu@intel.com Signed-off-by: Palmer Dabbelt --- include/linux/acpi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 28c3fb2bef0d..5a2b620ccd58 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -264,6 +264,12 @@ static inline void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { } #endif +#ifdef CONFIG_RISCV +void acpi_numa_rintc_affinity_init(struct acpi_srat_rintc_affinity *pa); +#else +static inline void acpi_numa_rintc_affinity_init(struct acpi_srat_rintc_affinity *pa) { } +#endif + #ifndef PHYS_CPUID_INVALID typedef u32 phys_cpuid_t; #define PHYS_CPUID_INVALID (phys_cpuid_t)(-1) -- cgit v1.2.3 From 10a0e6f3d3db7dcfe36e578923e5f038f1d2b72a Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Wed, 17 Jul 2024 11:49:40 +0200 Subject: timers/migration: Move hierarchy setup into cpuhotplug prepare callback When a CPU comes online the first time, it is possible that a new top level group will be created. In general all propagation is done from the bottom to top. This minimizes complexity and prevents possible races. But when a new top level group is created, the formely top level group needs to be connected to the new level. This is the only time, when the direction to propagate changes is changed: the changes are propagated from top (new top level group) to bottom (formerly top level group). This introduces two races (see (A) and (B)) as reported by Frederic: (A) This race happens, when marking the formely top level group as active, but the last active CPU of the formerly top level group goes idle. Then it's likely that formerly group is no longer active, but marked nevertheless as active in new top level group: [GRP0:0] migrator = 0 active = 0 nextevt = KTIME_MAX / \ 0 1 .. 7 active idle 0) Hierarchy has for now only 8 CPUs and CPU 0 is the only active CPU. [GRP1:0] migrator = TMIGR_NONE active = NONE nextevt = KTIME_MAX \ [GRP0:0] [GRP0:1] migrator = 0 migrator = TMIGR_NONE active = 0 active = NONE nextevt = KTIME_MAX nextevt = KTIME_MAX / \ 0 1 .. 7 8 active idle !online 1) CPU 8 is booting and creates a new group in first level GRP0:1 and therefore also a new top group GRP1:0. For now the setup code proceeded only until the connected between GRP0:1 to the new top group. The connection between CPU8 and GRP0:1 is not yet established and CPU 8 is still !online. [GRP1:0] migrator = TMIGR_NONE active = NONE nextevt = KTIME_MAX / \ [GRP0:0] [GRP0:1] migrator = 0 migrator = TMIGR_NONE active = 0 active = NONE nextevt = KTIME_MAX nextevt = KTIME_MAX / \ 0 1 .. 7 8 active idle !online 2) Setup code now connects GRP0:0 to GRP1:0 and observes while in tmigr_connect_child_parent() that GRP0:0 is not TMIGR_NONE. So it prepares to call tmigr_active_up() on it. It hasn't done it yet. [GRP1:0] migrator = TMIGR_NONE active = NONE nextevt = KTIME_MAX / \ [GRP0:0] [GRP0:1] migrator = TMIGR_NONE migrator = TMIGR_NONE active = NONE active = NONE nextevt = KTIME_MAX nextevt = KTIME_MAX / \ 0 1 .. 7 8 idle idle !online 3) CPU 0 goes idle. Since GRP0:0->parent has been updated by CPU 8 with GRP0:0->lock held, CPU 0 observes GRP1:0 after calling tmigr_update_events() and it propagates the change to the top (no change there and no wakeup programmed since there is no timer). [GRP1:0] migrator = GRP0:0 active = GRP0:0 nextevt = KTIME_MAX / \ [GRP0:0] [GRP0:1] migrator = TMIGR_NONE migrator = TMIGR_NONE active = NONE active = NONE nextevt = KTIME_MAX nextevt = KTIME_MAX / \ 0 1 .. 7 8 idle idle !online 4) Now the setup code finally calls tmigr_active_up() to and sets GRP0:0 active in GRP1:0 [GRP1:0] migrator = GRP0:0 active = GRP0:0, GRP0:1 nextevt = KTIME_MAX / \ [GRP0:0] [GRP0:1] migrator = TMIGR_NONE migrator = 8 active = NONE active = 8 nextevt = KTIME_MAX nextevt = KTIME_MAX / \ | 0 1 .. 7 8 idle idle active 5) Now CPU 8 is connected with GRP0:1 and CPU 8 calls tmigr_active_up() out of tmigr_cpu_online(). [GRP1:0] migrator = GRP0:0 active = GRP0:0 nextevt = T8 / \ [GRP0:0] [GRP0:1] migrator = TMIGR_NONE migrator = TMIGR_NONE active = NONE active = NONE nextevt = KTIME_MAX nextevt = T8 / \ | 0 1 .. 7 8 idle idle idle 5) CPU 8 goes idle with a timer T8 and relies on GRP0:0 as the migrator. But it's not really active, so T8 gets ignored. --> The update which is done in third step is not noticed by setup code. So a wrong migrator is set to top level group and a timer could get ignored. (B) Reading group->parent and group->childmask when an hierarchy update is ongoing and reaches the formerly top level group is racy as those values could be inconsistent. (The notation of migrator and active now slightly changes in contrast to the above example, as now the childmasks are used.) [GRP1:0] migrator = TMIGR_NONE active = 0x00 nextevt = KTIME_MAX \ [GRP0:0] [GRP0:1] migrator = TMIGR_NONE migrator = TMIGR_NONE active = 0x00 active = 0x00 nextevt = KTIME_MAX nextevt = KTIME_MAX childmask= 0 childmask= 1 parent = NULL parent = GRP1:0 / \ 0 1 .. 7 8 idle idle !online childmask=1 1) Hierarchy has 8 CPUs. CPU 8 is at the moment in the process of onlining but did not yet connect GRP0:0 to GRP1:0. [GRP1:0] migrator = TMIGR_NONE active = 0x00 nextevt = KTIME_MAX / \ [GRP0:0] [GRP0:1] migrator = TMIGR_NONE migrator = TMIGR_NONE active = 0x00 active = 0x00 nextevt = KTIME_MAX nextevt = KTIME_MAX childmask= 0 childmask= 1 parent = GRP1:0 parent = GRP1:0 / \ 0 1 .. 7 8 idle idle !online childmask=1 2) Setup code (running on CPU 8) now connects GRP0:0 to GRP1:0, updates parent pointer of GRP0:0 and ... [GRP1:0] migrator = TMIGR_NONE active = 0x00 nextevt = KTIME_MAX / \ [GRP0:0] [GRP0:1] migrator = 0x01 migrator = TMIGR_NONE active = 0x01 active = 0x00 nextevt = KTIME_MAX nextevt = KTIME_MAX childmask= 0 childmask= 1 parent = GRP1:0 parent = GRP1:0 / \ 0 1 .. 7 8 active idle !online childmask=1 tmigr_walk.childmask = 0 3) ... CPU 0 comes active in the same time. As migrator in GRP0:0 was TMIGR_NONE, childmask of GRP0:0 is stored in update propagation data structure tmigr_walk (as update of childmask is not yet visible/updated). And now ... [GRP1:0] migrator = TMIGR_NONE active = 0x00 nextevt = KTIME_MAX / \ [GRP0:0] [GRP0:1] migrator = 0x01 migrator = TMIGR_NONE active = 0x01 active = 0x00 nextevt = KTIME_MAX nextevt = KTIME_MAX childmask= 2 childmask= 1 parent = GRP1:0 parent = GRP1:0 / \ 0 1 .. 7 8 active idle !online childmask=1 tmigr_walk.childmask = 0 4) ... childmask of GRP0:0 is updated by CPU 8 (still part of setup code). [GRP1:0] migrator = 0x00 active = 0x00 nextevt = KTIME_MAX / \ [GRP0:0] [GRP0:1] migrator = 0x01 migrator = TMIGR_NONE active = 0x01 active = 0x00 nextevt = KTIME_MAX nextevt = KTIME_MAX childmask= 2 childmask= 1 parent = GRP1:0 parent = GRP1:0 / \ 0 1 .. 7 8 active idle !online childmask=1 tmigr_walk.childmask = 0 5) CPU 0 sees the connection to GRP1:0 and now propagates active state to GRP1:0 but with childmask = 0 as stored in propagation data structure. --> Now GRP1:0 always has a migrator as 0x00 != TMIGR_NONE and for all CPUs it looks like GRP1:0 is always active. To prevent those races, the setup of the hierarchy is moved into the cpuhotplug prepare callback. The prepare callback is not executed by the CPU which will come online, it is executed by the CPU which prepares onlining of the other CPU. This CPU is active while it is connecting the formerly top level to the new one. This prevents from (A) to happen and it also prevents from any further walk above the formerly top level until that active CPU becomes inactive, releasing the new ->parent and ->childmask updates to be visible by any subsequent walk up above the formerly top level hierarchy. This prevents from (B) to happen. The direction for the updates is now forced to look like "from bottom to top". However if the active CPU prevents from tmigr_cpu_(in)active() to walk up with the update not-or-half visible, nothing prevents walking up to the new top with a 0 childmask in tmigr_handle_remote_up() or tmigr_requires_handle_remote_up() if the active CPU doing the prepare is not the migrator. But then it looks fine because: * tmigr_check_migrator() should just return false * The migrator is active and should eventually observe the new childmask at some point in a future tick. Split setup functionality of online callback into the cpuhotplug prepare callback and setup hotplug state. Change init call into early_initcall() to make sure an already active CPU prepares everything for newly upcoming CPUs. Reorder the code, that all prepare related functions are close to each other and online and offline callbacks are also close together. Fixes: 7ee988770326 ("timers: Implement the hierarchical pull model") Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20240717094940.18687-1-anna-maria@linutronix.de --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 7a5785f405b6..df59666a2a66 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -122,6 +122,7 @@ enum cpuhp_state { CPUHP_KVM_PPC_BOOK3S_PREPARE, CPUHP_ZCOMP_PREPARE, CPUHP_TIMERS_PREPARE, + CPUHP_TMIGR_PREPARE, CPUHP_MIPS_SOC_PREPARE, CPUHP_BP_PREPARE_DYN, CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20, -- cgit v1.2.3 From 835a9a67f54f01033417a254e53a1391f99db708 Mon Sep 17 00:00:00 2001 From: Anna-Maria Behnsen Date: Tue, 16 Jul 2024 16:19:24 +0200 Subject: timers/migration: Rename childmask by groupmask to make naming more obvious childmask in the group reflects the mask that is required to 'reference' this group in the parent. When reading childmask, this might be confusing, as this suggests, that this is the mask of the child of the group. Clarify this by renaming childmask in the tmigr_group and tmc_group by groupmask. Signed-off-by: Anna-Maria Behnsen Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20240716-tmigr-fixes-v4-6-757baa7803fe@linutronix.de --- include/trace/events/timer_migration.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/trace/events/timer_migration.h b/include/trace/events/timer_migration.h index 79f19e76a80b..47db5eaf2f9a 100644 --- a/include/trace/events/timer_migration.h +++ b/include/trace/events/timer_migration.h @@ -43,7 +43,7 @@ TRACE_EVENT(tmigr_connect_child_parent, __field( unsigned int, lvl ) __field( unsigned int, numa_node ) __field( unsigned int, num_children ) - __field( u32, childmask ) + __field( u32, groupmask ) ), TP_fast_assign( @@ -52,11 +52,11 @@ TRACE_EVENT(tmigr_connect_child_parent, __entry->lvl = child->parent->level; __entry->numa_node = child->parent->numa_node; __entry->num_children = child->parent->num_children; - __entry->childmask = child->childmask; + __entry->groupmask = child->groupmask; ), - TP_printk("group=%p childmask=%0x parent=%p lvl=%d numa=%d num_children=%d", - __entry->child, __entry->childmask, __entry->parent, + TP_printk("group=%p groupmask=%0x parent=%p lvl=%d numa=%d num_children=%d", + __entry->child, __entry->groupmask, __entry->parent, __entry->lvl, __entry->numa_node, __entry->num_children) ); @@ -72,7 +72,7 @@ TRACE_EVENT(tmigr_connect_cpu_parent, __field( unsigned int, lvl ) __field( unsigned int, numa_node ) __field( unsigned int, num_children ) - __field( u32, childmask ) + __field( u32, groupmask ) ), TP_fast_assign( @@ -81,11 +81,11 @@ TRACE_EVENT(tmigr_connect_cpu_parent, __entry->lvl = tmc->tmgroup->level; __entry->numa_node = tmc->tmgroup->numa_node; __entry->num_children = tmc->tmgroup->num_children; - __entry->childmask = tmc->childmask; + __entry->groupmask = tmc->groupmask; ), - TP_printk("cpu=%d childmask=%0x parent=%p lvl=%d numa=%d num_children=%d", - __entry->cpu, __entry->childmask, __entry->parent, + TP_printk("cpu=%d groupmask=%0x parent=%p lvl=%d numa=%d num_children=%d", + __entry->cpu, __entry->groupmask, __entry->parent, __entry->lvl, __entry->numa_node, __entry->num_children) ); -- cgit v1.2.3 From a2b72b81fb3ba18717fc000949ca9d45a3351130 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 24 Jul 2024 12:16:20 +0100 Subject: io_uring: kill REQ_F_CANCEL_SEQ We removed the reliance on the flag by the cancellation path and now it's unused. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/e57afe566bbe4fefeb44daffb08900f2a4756577.1721819383.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 3bb6198d1523..e62aa9f0629f 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -461,7 +461,6 @@ enum { REQ_F_SUPPORT_NOWAIT_BIT, REQ_F_ISREG_BIT, REQ_F_POLL_NO_LAZY_BIT, - REQ_F_CANCEL_SEQ_BIT, REQ_F_CAN_POLL_BIT, REQ_F_BL_EMPTY_BIT, REQ_F_BL_NO_RECYCLE_BIT, @@ -536,8 +535,6 @@ enum { REQ_F_HASH_LOCKED = IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT), /* don't use lazy poll wake for this request */ REQ_F_POLL_NO_LAZY = IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT), - /* cancel sequence is set and valid */ - REQ_F_CANCEL_SEQ = IO_REQ_FLAG(REQ_F_CANCEL_SEQ_BIT), /* file is pollable */ REQ_F_CAN_POLL = IO_REQ_FLAG(REQ_F_CAN_POLL_BIT), /* buffer list was empty after selection of buffer */ -- cgit v1.2.3 From 78eb4ea25cd5fdbdae7eb9fdf87b99195ff67508 Mon Sep 17 00:00:00 2001 From: Joel Granados Date: Wed, 24 Jul 2024 20:59:29 +0200 Subject: sysctl: treewide: constify the ctl_table argument of proc_handlers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit const qualify the struct ctl_table argument in the proc_handler function signatures. This is a prerequisite to moving the static ctl_table structs into .rodata data which will ensure that proc_handler function pointers cannot be modified. This patch has been generated by the following coccinelle script: ``` virtual patch @r1@ identifier ctl, write, buffer, lenp, ppos; identifier func !~ "appldata_(timer|interval)_handler|sched_(rt|rr)_handler|rds_tcp_skbuf_handler|proc_sctp_do_(hmac_alg|rto_min|rto_max|udp_port|alpha_beta|auth|probe_interval)"; @@ int func( - struct ctl_table *ctl + const struct ctl_table *ctl ,int write, void *buffer, size_t *lenp, loff_t *ppos); @r2@ identifier func, ctl, write, buffer, lenp, ppos; @@ int func( - struct ctl_table *ctl + const struct ctl_table *ctl ,int write, void *buffer, size_t *lenp, loff_t *ppos) { ... } @r3@ identifier func; @@ int func( - struct ctl_table * + const struct ctl_table * ,int , void *, size_t *, loff_t *); @r4@ identifier func, ctl; @@ int func( - struct ctl_table *ctl + const struct ctl_table *ctl ,int , void *, size_t *, loff_t *); @r5@ identifier func, write, buffer, lenp, ppos; @@ int func( - struct ctl_table * + const struct ctl_table * ,int write, void *buffer, size_t *lenp, loff_t *ppos); ``` * Code formatting was adjusted in xfs_sysctl.c to comply with code conventions. The xfs_stats_clear_proc_handler, xfs_panic_mask_proc_handler and xfs_deprecated_dointvec_minmax where adjusted. * The ctl_table argument in proc_watchdog_common was const qualified. This is called from a proc_handler itself and is calling back into another proc_handler, making it necessary to change it as part of the proc_handler migration. Co-developed-by: Thomas Weißschuh Signed-off-by: Thomas Weißschuh Co-developed-by: Joel Granados Signed-off-by: Joel Granados --- include/linux/ftrace.h | 4 ++-- include/linux/mm.h | 8 ++++---- include/linux/perf_event.h | 6 +++--- include/linux/security.h | 2 +- include/linux/sysctl.h | 34 +++++++++++++++---------------- include/linux/vmstat.h | 4 ++-- include/linux/writeback.h | 2 +- include/net/ndisc.h | 2 +- include/net/neighbour.h | 6 +++--- include/net/netfilter/nf_hooks_lwtunnel.h | 2 +- 10 files changed, 35 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 51575b76818e..fd5e84d0ec47 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -471,7 +471,7 @@ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, extern int stack_tracer_enabled; -int stack_trace_sysctl(struct ctl_table *table, int write, void *buffer, +int stack_trace_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); /* DO NOT MODIFY THIS VARIABLE DIRECTLY! */ @@ -1175,7 +1175,7 @@ extern int tracepoint_printk; extern void disable_trace_on_warning(void); extern int __disable_trace_on_warning; -int tracepoint_printk_sysctl(struct ctl_table *table, int write, +int tracepoint_printk_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); #else /* CONFIG_TRACING */ diff --git a/include/linux/mm.h b/include/linux/mm.h index aa4fccb2a693..4563b560ced7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -204,11 +204,11 @@ extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; extern unsigned long sysctl_overcommit_kbytes; -int overcommit_ratio_handler(struct ctl_table *, int, void *, size_t *, +int overcommit_ratio_handler(const struct ctl_table *, int, void *, size_t *, loff_t *); -int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *, +int overcommit_kbytes_handler(const struct ctl_table *, int, void *, size_t *, loff_t *); -int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, +int overcommit_policy_handler(const struct ctl_table *, int, void *, size_t *, loff_t *); #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) @@ -3854,7 +3854,7 @@ extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm); #ifdef CONFIG_SYSCTL extern int sysctl_drop_caches; -int drop_caches_sysctl_handler(struct ctl_table *, int, void *, size_t *, +int drop_caches_sysctl_handler(const struct ctl_table *, int, void *, size_t *, loff_t *); #endif diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 65ece0d5b4b6..1a8942277dda 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1582,11 +1582,11 @@ extern int sysctl_perf_cpu_time_max_percent; extern void perf_sample_event_took(u64 sample_len_ns); -int perf_event_max_sample_rate_handler(struct ctl_table *table, int write, +int perf_event_max_sample_rate_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, +int perf_cpu_time_max_percent_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -int perf_event_max_stack_handler(struct ctl_table *table, int write, +int perf_event_max_stack_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); /* Access to perf_event_open(2) syscall. */ diff --git a/include/linux/security.h b/include/linux/security.h index de3af33e6ff5..1390f1efb4f0 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -228,7 +228,7 @@ struct request_sock; #define LSM_UNSAFE_NO_NEW_PRIVS 4 #ifdef CONFIG_MMU -extern int mmap_min_addr_handler(struct ctl_table *table, int write, +extern int mmap_min_addr_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); #endif diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 54fbec062772..aa4c6d44aaa0 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -61,31 +61,31 @@ extern const int sysctl_vals[]; extern const unsigned long sysctl_long_vals[]; -typedef int proc_handler(struct ctl_table *ctl, int write, void *buffer, +typedef int proc_handler(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); -int proc_dostring(struct ctl_table *, int, void *, size_t *, loff_t *); -int proc_dobool(struct ctl_table *table, int write, void *buffer, +int proc_dostring(const struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_dobool(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -int proc_dointvec(struct ctl_table *, int, void *, size_t *, loff_t *); -int proc_douintvec(struct ctl_table *, int, void *, size_t *, loff_t *); -int proc_dointvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *); -int proc_douintvec_minmax(struct ctl_table *table, int write, void *buffer, +int proc_dointvec(const struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_douintvec(const struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_dointvec_minmax(const struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_douintvec_minmax(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -int proc_dou8vec_minmax(struct ctl_table *table, int write, void *buffer, +int proc_dou8vec_minmax(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -int proc_dointvec_jiffies(struct ctl_table *, int, void *, size_t *, loff_t *); -int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write, +int proc_dointvec_jiffies(const struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_dointvec_ms_jiffies_minmax(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -int proc_dointvec_userhz_jiffies(struct ctl_table *, int, void *, size_t *, +int proc_dointvec_userhz_jiffies(const struct ctl_table *, int, void *, size_t *, loff_t *); -int proc_dointvec_ms_jiffies(struct ctl_table *, int, void *, size_t *, +int proc_dointvec_ms_jiffies(const struct ctl_table *, int, void *, size_t *, loff_t *); -int proc_doulongvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *); -int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, void *, +int proc_doulongvec_minmax(const struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_doulongvec_ms_jiffies_minmax(const struct ctl_table *table, int, void *, size_t *, loff_t *); -int proc_do_large_bitmap(struct ctl_table *, int, void *, size_t *, loff_t *); -int proc_do_static_key(struct ctl_table *table, int write, void *buffer, +int proc_do_large_bitmap(const struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_do_static_key(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); /* @@ -287,7 +287,7 @@ static inline bool sysctl_is_alias(char *param) } #endif /* CONFIG_SYSCTL */ -int sysctl_max_threads(struct ctl_table *table, int write, void *buffer, +int sysctl_max_threads(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); #endif /* _LINUX_SYSCTL_H */ diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 16b0cfa80502..23cd17942036 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -17,7 +17,7 @@ extern int sysctl_stat_interval; #define DISABLE_NUMA_STAT 0 extern int sysctl_vm_numa_stat; DECLARE_STATIC_KEY_TRUE(vm_numa_stat_key); -int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write, +int sysctl_vm_numa_stat_handler(const struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos); #endif @@ -301,7 +301,7 @@ void cpu_vm_stats_fold(int cpu); void refresh_zone_stat_thresholds(void); struct ctl_table; -int vmstat_refresh(struct ctl_table *, int write, void *buffer, size_t *lenp, +int vmstat_refresh(const struct ctl_table *, int write, void *buffer, size_t *lenp, loff_t *ppos); void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 112d806ddbe4..1a54676d843a 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -350,7 +350,7 @@ extern unsigned int dirty_expire_interval; extern unsigned int dirtytime_expire_interval; extern int laptop_mode; -int dirtytime_interval_handler(struct ctl_table *table, int write, +int dirtytime_interval_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 9bbdf6eaa942..7a533d5b1d59 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -486,7 +486,7 @@ void igmp6_event_report(struct sk_buff *skb); #ifdef CONFIG_SYSCTL -int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, +int ndisc_ifinfo_sysctl_change(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); #endif diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 0d28172193fa..a44f262a7384 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -412,12 +412,12 @@ void *neigh_seq_start(struct seq_file *, loff_t *, struct neigh_table *, void *neigh_seq_next(struct seq_file *, void *, loff_t *); void neigh_seq_stop(struct seq_file *, void *); -int neigh_proc_dointvec(struct ctl_table *ctl, int write, +int neigh_proc_dointvec(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); -int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, +int neigh_proc_dointvec_jiffies(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); -int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write, +int neigh_proc_dointvec_ms_jiffies(const struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, diff --git a/include/net/netfilter/nf_hooks_lwtunnel.h b/include/net/netfilter/nf_hooks_lwtunnel.h index 52e27920f829..cef7a4eb8f97 100644 --- a/include/net/netfilter/nf_hooks_lwtunnel.h +++ b/include/net/netfilter/nf_hooks_lwtunnel.h @@ -2,6 +2,6 @@ #include #ifdef CONFIG_SYSCTL -int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write, +int nf_hooks_lwtunnel_sysctl_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); #endif -- cgit v1.2.3 From d5e726d9143c5624135f5dc9e4069799adeef734 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 12 Jul 2024 18:52:51 -0700 Subject: xsk: Require XDP_UMEM_TX_METADATA_LEN to actuate tx_metadata_len Julian reports that commit 341ac980eab9 ("xsk: Support tx_metadata_len") can break existing use cases which don't zero-initialize xdp_umem_reg padding. Introduce new XDP_UMEM_TX_METADATA_LEN to make sure we interpret the padding as tx_metadata_len only when being explicitly asked. Fixes: 341ac980eab9 ("xsk: Support tx_metadata_len") Reported-by: Julian Schindel Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Reviewed-by: Maciej Fijalkowski Link: https://lore.kernel.org/bpf/20240713015253.121248-2-sdf@fomichev.me --- include/uapi/linux/if_xdp.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index d31698410410..42ec5ddaab8d 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -41,6 +41,10 @@ */ #define XDP_UMEM_TX_SW_CSUM (1 << 1) +/* Request to reserve tx_metadata_len bytes of per-chunk metadata. + */ +#define XDP_UMEM_TX_METADATA_LEN (1 << 2) + struct sockaddr_xdp { __u16 sxdp_family; __u16 sxdp_flags; -- cgit v1.2.3 From 9722c3b66e21ff08aec570d02a97d331087fd70f Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Wed, 24 Jul 2024 18:33:06 +0200 Subject: of: remove internal arguments from of_property_for_each_u32() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The of_property_for_each_u32() macro needs five parameters, two of which are primarily meant as internal variables for the macro itself (in the for() clause). Yet these two parameters are used by a few drivers, and this can be considered misuse or at least bad practice. Now that the kernel uses C11 to build, these two parameters can be avoided by declaring them internally, thus changing this pattern: struct property *prop; const __be32 *p; u32 val; of_property_for_each_u32(np, "xyz", prop, p, val) { ... } to this: u32 val; of_property_for_each_u32(np, "xyz", val) { ... } However two variables cannot be declared in the for clause even with C11, so declare one struct that contain the two variables we actually need. As the variables inside this struct are not meant to be used by users of this macro, give the struct instance the noticeable name "_it" so it is visible during code reviews, helping to avoid new code to use it directly. Most usages are trivially converted as they do not use those two parameters, as expected. The non-trivial cases are: - drivers/clk/clk.c, of_clk_get_parent_name(): easily doable anyway - drivers/clk/clk-si5351.c, si5351_dt_parse(): this is more complex as the checks had to be replicated in a different way, making code more verbose and somewhat uglier, but I refrained from a full rework to keep as much of the original code untouched having no hardware to test my changes All the changes have been build tested. The few for which I have the hardware have been runtime-tested too. Reviewed-by: Andre Przywara # drivers/clk/sunxi/clk-simple-gates.c, drivers/clk/sunxi/clk-sun8i-bus-gates.c Acked-by: Bartosz Golaszewski # drivers/gpio/gpio-brcmstb.c Acked-by: Nicolas Ferre # drivers/irqchip/irq-atmel-aic-common.c Acked-by: Jonathan Cameron # drivers/iio/adc/ti_am335x_adc.c Acked-by: Uwe Kleine-König # drivers/pwm/pwm-samsung.c Acked-by: Richard Leitner # drivers/usb/misc/usb251xb.c Acked-by: Mark Brown # sound/soc/codecs/arizona.c Reviewed-by: Richard Fitzgerald # sound/soc/codecs/arizona.c Acked-by: Michael Ellerman # arch/powerpc/sysdev/xive/spapr.c Acked-by: Stephen Boyd # clk Signed-off-by: Luca Ceresoli Acked-by: Lee Jones Link: https://lore.kernel.org/r/20240724-of_property_for_each_u32-v3-1-bea82ce429e2@bootlin.com Signed-off-by: Rob Herring (Arm) --- include/linux/of.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/of.h b/include/linux/of.h index 13cf7a43b473..85b60ac9eec5 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -430,11 +430,9 @@ extern int of_detach_node(struct device_node *); #define of_match_ptr(_ptr) (_ptr) /* - * struct property *prop; - * const __be32 *p; * u32 u; * - * of_property_for_each_u32(np, "propname", prop, p, u) + * of_property_for_each_u32(np, "propname", u) * printk("U32 value: %x\n", u); */ const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur, @@ -1431,11 +1429,12 @@ static inline int of_property_read_s32(const struct device_node *np, err == 0; \ err = of_phandle_iterator_next(it)) -#define of_property_for_each_u32(np, propname, prop, p, u) \ - for (prop = of_find_property(np, propname, NULL), \ - p = of_prop_next_u32(prop, NULL, &u); \ - p; \ - p = of_prop_next_u32(prop, p, &u)) +#define of_property_for_each_u32(np, propname, u) \ + for (struct {struct property *prop; const __be32 *item; } _it = \ + {of_find_property(np, propname, NULL), \ + of_prop_next_u32(_it.prop, NULL, &u)}; \ + _it.item; \ + _it.item = of_prop_next_u32(_it.prop, _it.item, &u)) #define of_property_for_each_string(np, propname, prop, s) \ for (prop = of_find_property(np, propname, NULL), \ -- cgit v1.2.3 From 63c33ca0969cf4d4574103b69fb46f58a19a182b Mon Sep 17 00:00:00 2001 From: Bhoomik Gupta Date: Mon, 8 Jul 2024 11:08:35 +0530 Subject: i3c: master: Enhance i3c_bus_type visibility for device searching & event monitoring Improve the visibility of i3c_bus_type to facilitate searching for i3c devices attached to the i3c bus. Enable other drivers to use bus_register_notifier to monitor i3c bus device events. Signed-off-by: Bhoomik Gupta Link: https://lore.kernel.org/r/20240708053835.3003986-1-bhoomik.gupta@nxp.com Signed-off-by: Alexandre Belloni --- include/linux/i3c/master.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h index 0ca27dd86956..074f632868d9 100644 --- a/include/linux/i3c/master.h +++ b/include/linux/i3c/master.h @@ -33,6 +33,7 @@ enum { struct i3c_master_controller; struct i3c_bus; struct i3c_device; +extern const struct bus_type i3c_bus_type; /** * struct i3c_i2c_dev_desc - Common part of the I3C/I2C device descriptor -- cgit v1.2.3 From 24168c5e6dfbdd5b414f048f47f75d64533296ca Mon Sep 17 00:00:00 2001 From: Carlos Song Date: Mon, 15 Jul 2024 18:53:51 -0400 Subject: dt-bindings: i3c: add header for generic I3C flags Add header file for generic I3C flags to avoid hard code in dts file. Signed-off-by: Carlos Song Reviewed-by: Frank Li Acked-by: Jason Liu Signed-off-by: Frank Li Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240715225351.3237284-1-Frank.Li@nxp.com Signed-off-by: Alexandre Belloni --- include/dt-bindings/i3c/i3c.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 include/dt-bindings/i3c/i3c.h (limited to 'include') diff --git a/include/dt-bindings/i3c/i3c.h b/include/dt-bindings/i3c/i3c.h new file mode 100644 index 000000000000..373439218bba --- /dev/null +++ b/include/dt-bindings/i3c/i3c.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ +/* + * Copyright 2024 NXP + */ + +#ifndef _DT_BINDINGS_I3C_I3C_H +#define _DT_BINDINGS_I3C_I3C_H + +#define I2C_FM (1 << 4) +#define I2C_FM_PLUS (0 << 4) + +#define I2C_FILTER (0 << 5) +#define I2C_NO_FILTER_HIGH_FREQUENCY (1 << 5) +#define I2C_NO_FILTER_LOW_FREQUENCY (2 << 5) + +#endif -- cgit v1.2.3 From 342b2e395d5f34c9f111a818556e617939f83a8c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 26 Jul 2024 15:24:30 +0100 Subject: io_uring/napi: use ktime in busy polling It's more natural to use ktime/ns instead of keeping around usec, especially since we're comparing it against user provided timers, so convert napi busy poll internal handling to ktime. It's also nicer since the type (ktime_t vs unsigned long) now tells the unit of measure. Keep everything as ktime, which we convert to/from micro seconds for IORING_[UN]REGISTER_NAPI. The net/ busy polling works seems to work with usec, however it's not real usec as shift by 10 is used to get it from nsecs, see busy_loop_current_time(), so it's easy to get truncated nsec back and we get back better precision. Note, we can further improve it later by removing the truncation and maybe convincing net/ to use ktime/ns instead. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/95e7ec8d095069a3ed5d40a4bc6f8b586698bc7e.1722003776.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index e62aa9f0629f..3315005df117 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -404,7 +404,7 @@ struct io_ring_ctx { spinlock_t napi_lock; /* napi_list lock */ /* napi busy poll default timeout */ - unsigned int napi_busy_poll_to; + ktime_t napi_busy_poll_dt; bool napi_prefer_busy_poll; bool napi_enabled; -- cgit v1.2.3 From d659b715e94ac039803d7601505d3473393fc0be Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Mon, 15 Jul 2024 10:04:23 +1000 Subject: mm/huge_memory: avoid PMD-size page cache if needed xarray can't support arbitrary page cache size. the largest and supported page cache size is defined as MAX_PAGECACHE_ORDER by commit 099d90642a71 ("mm/filemap: make MAX_PAGECACHE_ORDER acceptable to xarray"). However, it's possible to have 512MB page cache in the huge memory's collapsing path on ARM64 system whose base page size is 64KB. 512MB page cache is breaking the limitation and a warning is raised when the xarray entry is split as shown in the following example. [root@dhcp-10-26-1-207 ~]# cat /proc/1/smaps | grep KernelPageSize KernelPageSize: 64 kB [root@dhcp-10-26-1-207 ~]# cat /tmp/test.c : int main(int argc, char **argv) { const char *filename = TEST_XFS_FILENAME; int fd = 0; void *buf = (void *)-1, *p; int pgsize = getpagesize(); int ret = 0; if (pgsize != 0x10000) { fprintf(stdout, "System with 64KB base page size is required!\n"); return -EPERM; } system("echo 0 > /sys/devices/virtual/bdi/253:0/read_ahead_kb"); system("echo 1 > /proc/sys/vm/drop_caches"); /* Open the xfs file */ fd = open(filename, O_RDONLY); assert(fd > 0); /* Create VMA */ buf = mmap(NULL, TEST_MEM_SIZE, PROT_READ, MAP_SHARED, fd, 0); assert(buf != (void *)-1); fprintf(stdout, "mapped buffer at 0x%p\n", buf); /* Populate VMA */ ret = madvise(buf, TEST_MEM_SIZE, MADV_NOHUGEPAGE); assert(ret == 0); ret = madvise(buf, TEST_MEM_SIZE, MADV_POPULATE_READ); assert(ret == 0); /* Collapse VMA */ ret = madvise(buf, TEST_MEM_SIZE, MADV_HUGEPAGE); assert(ret == 0); ret = madvise(buf, TEST_MEM_SIZE, MADV_COLLAPSE); if (ret) { fprintf(stdout, "Error %d to madvise(MADV_COLLAPSE)\n", errno); goto out; } /* Split xarray entry. Write permission is needed */ munmap(buf, TEST_MEM_SIZE); buf = (void *)-1; close(fd); fd = open(filename, O_RDWR); assert(fd > 0); fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, TEST_MEM_SIZE - pgsize, pgsize); out: if (buf != (void *)-1) munmap(buf, TEST_MEM_SIZE); if (fd > 0) close(fd); return ret; } [root@dhcp-10-26-1-207 ~]# gcc /tmp/test.c -o /tmp/test [root@dhcp-10-26-1-207 ~]# /tmp/test ------------[ cut here ]------------ WARNING: CPU: 25 PID: 7560 at lib/xarray.c:1025 xas_split_alloc+0xf8/0x128 Modules linked in: nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib \ nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct \ nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 \ ip_set rfkill nf_tables nfnetlink vfat fat virtio_balloon drm fuse \ xfs libcrc32c crct10dif_ce ghash_ce sha2_ce sha256_arm64 virtio_net \ sha1_ce net_failover virtio_blk virtio_console failover dimlib virtio_mmio CPU: 25 PID: 7560 Comm: test Kdump: loaded Not tainted 6.10.0-rc7-gavin+ #9 Hardware name: QEMU KVM Virtual Machine, BIOS edk2-20240524-1.el9 05/24/2024 pstate: 83400005 (Nzcv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--) pc : xas_split_alloc+0xf8/0x128 lr : split_huge_page_to_list_to_order+0x1c4/0x780 sp : ffff8000ac32f660 x29: ffff8000ac32f660 x28: ffff0000e0969eb0 x27: ffff8000ac32f6c0 x26: 0000000000000c40 x25: ffff0000e0969eb0 x24: 000000000000000d x23: ffff8000ac32f6c0 x22: ffffffdfc0700000 x21: 0000000000000000 x20: 0000000000000000 x19: ffffffdfc0700000 x18: 0000000000000000 x17: 0000000000000000 x16: ffffd5f3708ffc70 x15: 0000000000000000 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: ffffffffffffffc0 x10: 0000000000000040 x9 : ffffd5f3708e692c x8 : 0000000000000003 x7 : 0000000000000000 x6 : ffff0000e0969eb8 x5 : ffffd5f37289e378 x4 : 0000000000000000 x3 : 0000000000000c40 x2 : 000000000000000d x1 : 000000000000000c x0 : 0000000000000000 Call trace: xas_split_alloc+0xf8/0x128 split_huge_page_to_list_to_order+0x1c4/0x780 truncate_inode_partial_folio+0xdc/0x160 truncate_inode_pages_range+0x1b4/0x4a8 truncate_pagecache_range+0x84/0xa0 xfs_flush_unmap_range+0x70/0x90 [xfs] xfs_file_fallocate+0xfc/0x4d8 [xfs] vfs_fallocate+0x124/0x2f0 ksys_fallocate+0x4c/0xa0 __arm64_sys_fallocate+0x24/0x38 invoke_syscall.constprop.0+0x7c/0xd8 do_el0_svc+0xb4/0xd0 el0_svc+0x44/0x1d8 el0t_64_sync_handler+0x134/0x150 el0t_64_sync+0x17c/0x180 Fix it by correcting the supported page cache orders, different sets for DAX and other files. With it corrected, 512MB page cache becomes disallowed on all non-DAX files on ARM64 system where the base page size is 64KB. After this patch is applied, the test program fails with error -EINVAL returned from __thp_vma_allowable_orders() and the madvise() system call to collapse the page caches. Link: https://lkml.kernel.org/r/20240715000423.316491-1-gshan@redhat.com Fixes: 6b24ca4a1a8d ("mm: Use multi-index entries in the page cache") Signed-off-by: Gavin Shan Acked-by: David Hildenbrand Reviewed-by: Ryan Roberts Acked-by: Zi Yan Cc: Baolin Wang Cc: Barry Song Cc: Don Dutile Cc: Matthew Wilcox (Oracle) Cc: Peter Xu Cc: Ryan Roberts Cc: William Kucharski Cc: [5.17+] Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index cff002be83eb..e25d9ebfdf89 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -74,14 +74,20 @@ extern struct kobj_attribute thpsize_shmem_enabled_attr; #define THP_ORDERS_ALL_ANON ((BIT(PMD_ORDER + 1) - 1) & ~(BIT(0) | BIT(1))) /* - * Mask of all large folio orders supported for file THP. + * Mask of all large folio orders supported for file THP. Folios in a DAX + * file is never split and the MAX_PAGECACHE_ORDER limit does not apply to + * it. */ -#define THP_ORDERS_ALL_FILE (BIT(PMD_ORDER) | BIT(PUD_ORDER)) +#define THP_ORDERS_ALL_FILE_DAX \ + (BIT(PMD_ORDER) | BIT(PUD_ORDER)) +#define THP_ORDERS_ALL_FILE_DEFAULT \ + ((BIT(MAX_PAGECACHE_ORDER + 1) - 1) & ~BIT(0)) /* * Mask of all large folio orders supported for THP. */ -#define THP_ORDERS_ALL (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE) +#define THP_ORDERS_ALL \ + (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE_DAX | THP_ORDERS_ALL_FILE_DEFAULT) #define TVA_SMAPS (1 << 0) /* Will be used for procfs */ #define TVA_IN_PF (1 << 1) /* Page fault handler */ -- cgit v1.2.3 From b3bebe44306e23827397d0d774d206e3fa374041 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 17 Jul 2024 14:28:44 -0700 Subject: alloc_tag: outline and export free_reserved_page() Outline and export free_reserved_page() because modules use it and it in turn uses page_ext_{get|put} which should not be exported. The same result could be obtained by outlining {get|put}_page_tag_ref() but that would have higher performance impact as these functions are used in more performance critical paths. Link: https://lkml.kernel.org/r/20240717212844.2749975-1-surenb@google.com Fixes: dcfe378c81f7 ("lib: introduce support for page allocation tagging") Signed-off-by: Suren Baghdasaryan Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202407080044.DWMC9N9I-lkp@intel.com/ Suggested-by: Christoph Hellwig Suggested-by: Vlastimil Babka Acked-by: Vlastimil Babka Cc: Kees Cook Cc: Kent Overstreet Cc: Pasha Tatashin Cc: Sourav Panda Cc: [6.10] Signed-off-by: Andrew Morton --- include/linux/mm.h | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 4563b560ced7..c4b238a20b76 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3137,21 +3137,7 @@ extern void reserve_bootmem_region(phys_addr_t start, phys_addr_t end, int nid); /* Free the reserved page into the buddy system, so it gets managed. */ -static inline void free_reserved_page(struct page *page) -{ - if (mem_alloc_profiling_enabled()) { - union codetag_ref *ref = get_page_tag_ref(page); - - if (ref) { - set_codetag_empty(ref); - put_page_tag_ref(ref); - } - } - ClearPageReserved(page); - init_page_count(page); - __free_page(page); - adjust_managed_page_count(page, 1); -} +void free_reserved_page(struct page *page); #define free_highmem_page(page) free_reserved_page(page) static inline void mark_page_reserved(struct page *page) -- cgit v1.2.3 From f59adcf5933271ab7247c4c8938c67be8905b725 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 23 Jul 2024 17:12:44 +0000 Subject: mm: memcg: add cacheline padding after lruvec in mem_cgroup_per_node Oliver Sand reported a performance regression caused by commit 98c9daf5ae6b ("mm: memcg: guard memcg1-specific members of struct mem_cgroup_per_node"), which puts some fields of the mem_cgroup_per_node structure under the CONFIG_MEMCG_V1 config option. Apparently it causes a false cache sharing between lruvec and lru_zone_size members of the structure. Fix it by adding an explicit padding after the lruvec member. Even though the padding is not required with CONFIG_MEMCG_V1 set, it seems like the introduced memory overhead is not significant enough to warrant another divergence in the mem_cgroup_per_node layout, so the padding is added unconditionally. Link: https://lkml.kernel.org/r/20240723171244.747521-1-roman.gushchin@linux.dev Fixes: 98c9daf5ae6b ("mm: memcg: guard memcg1-specific members of struct mem_cgroup_per_node") Signed-off-by: Roman Gushchin Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202407121335.31a10cb6-oliver.sang@intel.com Tested-by: Oliver Sang Acked-by: Shakeel Butt Cc: Johannes Weiner Cc: Michal Hocko Cc: Muchun Song Cc: Roman Gushchin Signed-off-by: Andrew Morton --- include/linux/memcontrol.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 7e2eb091049a..0e5bf25d324f 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -109,6 +109,7 @@ struct mem_cgroup_per_node { /* Fields which get updated often at the end. */ struct lruvec lruvec; + CACHELINE_PADDING(_pad2_); unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; struct mem_cgroup_reclaim_iter iter; }; -- cgit v1.2.3 From 3a7e02c040b130b5545e4b115aada7bacd80a2b6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 26 Jul 2024 15:32:27 -0700 Subject: minmax: avoid overly complicated constant expressions in VM code The minmax infrastructure is overkill for simple constants, and can cause huge expansions because those simple constants are then used by other things. For example, 'pageblock_order' is a core VM constant, but because it was implemented using 'min_t()' and all the type-checking that involves, it actually expanded to something like 2.5kB of preprocessor noise. And when that simple constant was then used inside other expansions: #define pageblock_nr_pages (1UL << pageblock_order) #define pageblock_start_pfn(pfn) ALIGN_DOWN((pfn), pageblock_nr_pages) and we then use that inside a 'max()' macro: case ISOLATE_SUCCESS: update_cached = false; last_migrated_pfn = max(cc->zone->zone_start_pfn, pageblock_start_pfn(cc->migrate_pfn - 1)); the end result was that one statement expanding to 253kB in size. There are probably other cases of this, but this one case certainly stood out. I've added 'MIN_T()' and 'MAX_T()' macros for this kind of "core simple constant with specific type" use. These macros skip the type checking, and as such need to be very sparingly used only for obvious cases that have active issues like this. Reported-by: Lorenzo Stoakes Link: https://lore.kernel.org/all/36aa2cad-1db1-4abf-8dd2-fb20484aabc3@lucifer.local/ Cc: David Laight Signed-off-by: Linus Torvalds --- include/linux/minmax.h | 7 +++++++ include/linux/pageblock-flags.h | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 2ec559284a9f..a7ef65f78933 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -270,4 +270,11 @@ static inline bool in_range32(u32 val, u32 start, u32 len) #define swap(a, b) \ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) +/* + * Use these carefully: no type checking, and uses the arguments + * multiple times. Use for obvious constants only. + */ +#define MIN_T(type,a,b) __cmp(min,(type)(a),(type)(b)) +#define MAX_T(type,a,b) __cmp(max,(type)(a),(type)(b)) + #endif /* _LINUX_MINMAX_H */ diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index 547e82cdc89a..fc6b9c87cb0a 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -41,13 +41,13 @@ extern unsigned int pageblock_order; * Huge pages are a constant size, but don't exceed the maximum allocation * granularity. */ -#define pageblock_order min_t(unsigned int, HUGETLB_PAGE_ORDER, MAX_PAGE_ORDER) +#define pageblock_order MIN_T(unsigned int, HUGETLB_PAGE_ORDER, MAX_PAGE_ORDER) #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ #elif defined(CONFIG_TRANSPARENT_HUGEPAGE) -#define pageblock_order min_t(unsigned int, HPAGE_PMD_ORDER, MAX_PAGE_ORDER) +#define pageblock_order MIN_T(unsigned int, HPAGE_PMD_ORDER, MAX_PAGE_ORDER) #else /* CONFIG_TRANSPARENT_HUGEPAGE */ -- cgit v1.2.3 From 00e3913b0416fe69d28745c0a2a340e2f76c219c Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Fri, 26 Jul 2024 01:16:48 +0900 Subject: Revert "firewire: Annotate struct fw_iso_packet with __counted_by()" This reverts commit d3155742db89df3b3c96da383c400e6ff4d23c25. The header_length field is byte unit, thus it can not express the number of elements in header field. It seems that the argument for counted_by attribute can have no arithmetic expression, therefore this commit just reverts the issued commit. Suggested-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20240725161648.130404-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- include/linux/firewire.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 00abe0e5d602..1cca14cf5652 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -462,9 +462,8 @@ struct fw_iso_packet { /* rx: Sync bit, wait for matching sy */ u32 tag:2; /* tx: Tag in packet header */ u32 sy:4; /* tx: Sy in packet header */ - u32 header_length:8; /* Length of immediate header */ - /* tx: Top of 1394 isoch. data_block */ - u32 header[] __counted_by(header_length); + u32 header_length:8; /* Size of immediate header */ + u32 header[]; /* tx: Top of 1394 isoch. data_block */ }; #define FW_ISO_CONTEXT_TRANSMIT 0 -- cgit v1.2.3 From 017fa3e89187848fd056af757769c9e66ac3e93d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 28 Jul 2024 13:50:01 -0700 Subject: minmax: simplify and clarify min_t()/max_t() implementation This simplifies the min_t() and max_t() macros by no longer making them work in the context of a C constant expression. That means that you can no longer use them for static initializers or for array sizes in type definitions, but there were only a couple of such uses, and all of them were converted (famous last words) to use MIN_T/MAX_T instead. Cc: David Laight Cc: Lorenzo Stoakes Signed-off-by: Linus Torvalds --- include/linux/minmax.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/minmax.h b/include/linux/minmax.h index a7ef65f78933..9c2848abc804 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -45,17 +45,20 @@ #define __cmp(op, x, y) ((x) __cmp_op_##op (y) ? (x) : (y)) -#define __cmp_once(op, x, y, unique_x, unique_y) ({ \ - typeof(x) unique_x = (x); \ - typeof(y) unique_y = (y); \ +#define __cmp_once_unique(op, type, x, y, ux, uy) \ + ({ type ux = (x); type uy = (y); __cmp(op, ux, uy); }) + +#define __cmp_once(op, type, x, y) \ + __cmp_once_unique(op, type, x, y, __UNIQUE_ID(x_), __UNIQUE_ID(y_)) + +#define __careful_cmp_once(op, x, y) ({ \ static_assert(__types_ok(x, y), \ #op "(" #x ", " #y ") signedness error, fix types or consider u" #op "() before " #op "_t()"); \ - __cmp(op, unique_x, unique_y); }) + __cmp_once(op, __auto_type, x, y); }) #define __careful_cmp(op, x, y) \ __builtin_choose_expr(__is_constexpr((x) - (y)), \ - __cmp(op, x, y), \ - __cmp_once(op, x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y))) + __cmp(op, x, y), __careful_cmp_once(op, x, y)) #define __clamp(val, lo, hi) \ ((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val))) @@ -158,7 +161,7 @@ * @x: first value * @y: second value */ -#define min_t(type, x, y) __careful_cmp(min, (type)(x), (type)(y)) +#define min_t(type, x, y) __cmp_once(min, type, x, y) /** * max_t - return maximum of two values, using the specified type @@ -166,7 +169,7 @@ * @x: first value * @y: second value */ -#define max_t(type, x, y) __careful_cmp(max, (type)(x), (type)(y)) +#define max_t(type, x, y) __cmp_once(max, type, x, y) /* * Do not check the array parameter using __must_be_array(). -- cgit v1.2.3